refactor: enhance document processing UI and functionality with new components and translations

This commit is contained in:
twwu 2025-05-22 23:05:58 +08:00
parent fe435c23c3
commit 21c24977d8
14 changed files with 363 additions and 35 deletions

View File

@ -15,11 +15,11 @@ export const useAddDocumentsSteps = () => {
value: AddDocumentsStep.dataSource, value: AddDocumentsStep.dataSource,
}, },
{ {
label: t('datasetPipeline.addDocuments.steps.ProcessDocuments'), label: t('datasetPipeline.addDocuments.steps.processDocuments'),
value: AddDocumentsStep.processDocuments, value: AddDocumentsStep.processDocuments,
}, },
{ {
label: t('datasetPipeline.addDocuments.steps.ProcessingDocuments'), label: t('datasetPipeline.addDocuments.steps.processingDocuments'),
value: AddDocumentsStep.processingDocuments, value: AddDocumentsStep.processingDocuments,
}, },
] ]

View File

@ -25,10 +25,12 @@ import FilePreview from './preview/file-preview'
import NotionPagePreview from './preview/notion-page-preview' import NotionPagePreview from './preview/notion-page-preview'
import WebsitePreview from './preview/web-preview' import WebsitePreview from './preview/web-preview'
import ProcessDocuments from './process-documents' import ProcessDocuments from './process-documents'
import ChunkPreview from './preview/chunk-preview'
import Processing from './processing'
const TestRunPanel = () => { const TestRunPanel = () => {
const { t } = useTranslation() const { t } = useTranslation()
const [currentStep, setCurrentStep] = useState(1) const [currentStep, setCurrentStep] = useState(3)
const [datasource, setDatasource] = useState<Datasource>() const [datasource, setDatasource] = useState<Datasource>()
const [fileList, setFiles] = useState<FileItem[]>([]) const [fileList, setFiles] = useState<FileItem[]>([])
const [notionPages, setNotionPages] = useState<NotionPage[]>([]) const [notionPages, setNotionPages] = useState<NotionPage[]>([])
@ -40,7 +42,10 @@ const TestRunPanel = () => {
const plan = useProviderContextSelector(state => state.plan) const plan = useProviderContextSelector(state => state.plan)
const enableBilling = useProviderContextSelector(state => state.enableBilling) const enableBilling = useProviderContextSelector(state => state.enableBilling)
const datasetId = useDatasetDetailContextWithSelector(s => s.dataset?.id)
const pipelineId = useDatasetDetailContextWithSelector(s => s.dataset?.pipeline_id) const pipelineId = useDatasetDetailContextWithSelector(s => s.dataset?.pipeline_id)
const indexingType = useDatasetDetailContextWithSelector(s => s.dataset?.indexing_technique)
const retrievalMethod = useDatasetDetailContextWithSelector(s => s.dataset?.retrieval_model_dict.search_method)
const { data: pipelineInfo, isFetching: isFetchingPipelineInfo } = usePublishedPipelineInfo(pipelineId || '') const { data: pipelineInfo, isFetching: isFetchingPipelineInfo } = usePublishedPipelineInfo(pipelineId || '')
@ -120,6 +125,10 @@ const TestRunPanel = () => {
setCurrentStep(preStep => preStep - 1) setCurrentStep(preStep => preStep - 1)
}, []) }, [])
const handlePreviewChunks = useCallback((data: Record<string, any>) => {
console.log(data)
}, [])
const handleProcess = useCallback((data: Record<string, any>) => { const handleProcess = useCallback((data: Record<string, any>) => {
if (!datasource) if (!datasource)
return return
@ -146,7 +155,8 @@ const TestRunPanel = () => {
} }
// todo: Run Pipeline // todo: Run Pipeline
console.log('datasource_type', datasource_type) console.log('datasource_type', datasource_type)
}, [datasource, fileList, notionPages, websiteCrawlJobId, websitePages]) handleNextStep()
}, [datasource, fileList, handleNextStep, notionPages, websiteCrawlJobId, websitePages])
if (isFetchingPipelineInfo) { if (isFetchingPipelineInfo) {
return ( return (
@ -232,24 +242,47 @@ const TestRunPanel = () => {
<ProcessDocuments <ProcessDocuments
dataSourceNodeId={datasource?.nodeId || ''} dataSourceNodeId={datasource?.nodeId || ''}
onProcess={handleProcess} onProcess={handleProcess}
onPreview={handlePreviewChunks}
onBack={handleBackStep} onBack={handleBackStep}
/> />
) )
} }
{
currentStep === 3 && (
<Processing
datasetId={datasetId!}
batchId={''}
documents={[]}
indexingType={indexingType!}
retrievalMethod={retrievalMethod!}
/>
)
}
</div> </div>
</div> </div>
{/* Preview */} {/* Preview */}
<div className='flex h-full flex-1 shrink-0 flex-col pl-2 pt-2'> {
{ currentStep === 1 && (
currentStep === 1 && ( <div className='flex h-full flex-1 shrink-0 flex-col pl-2 pt-2'>
<> {currentFile && <FilePreview file={currentFile} hidePreview={hideFilePreview} />}
{currentFile && <FilePreview file={currentFile} hidePreview={hideFilePreview} />} {currentNotionPage && <NotionPagePreview currentPage={currentNotionPage} hidePreview={hideNotionPagePreview} />}
{currentNotionPage && <NotionPagePreview currentPage={currentNotionPage} hidePreview={hideNotionPagePreview} />} {currentWebsite && <WebsitePreview payload={currentWebsite} hidePreview={hideWebsitePreview} />}
{currentWebsite && <WebsitePreview payload={currentWebsite} hidePreview={hideWebsitePreview} />} </div>
</> )
) }
} {
</div> currentStep === 2 && (
<ChunkPreview
datasource={datasource!}
files={fileList.map(file => file.file)}
notionPages={notionPages}
websitePages={websitePages}
isIdle={true}
isPending={true}
estimateData={undefined}
/>
)
}
</div> </div>
) )
} }

View File

@ -30,14 +30,16 @@ const LeftHeader = ({
<div className='system-md-semibold text-text-primary'> <div className='system-md-semibold text-text-primary'>
{steps[currentStep - 1]?.label} {steps[currentStep - 1]?.label}
</div> </div>
<a {currentStep !== steps.length && (
className='absolute -left-11 top-3.5' <a
href={`/datasets/${datasetId}/documents`} className='absolute -left-11 top-3.5'
> href={`/datasets/${datasetId}/documents`}
<Button variant='secondary-accent' className='size-9 rounded-full p-0'> >
<RiArrowLeftLine className='size-5 ' /> <Button variant='secondary-accent' className='size-9 rounded-full p-0'>
</Button> <RiArrowLeftLine className='size-5 ' />
</a> </Button>
</a>
)}
<Effect className='left-8 top-[-34px] opacity-20' /> <Effect className='left-8 top-[-34px] opacity-20' />
</div> </div>
) )

View File

@ -0,0 +1,204 @@
import React, { useMemo, useState } from 'react'
import { useTranslation } from 'react-i18next'
import { PreviewContainer } from '../../../preview/container'
import { PreviewHeader } from '../../../preview/header'
import type { Datasource } from '@/app/components/rag-pipeline/components/panel/test-run/types'
import type { CrawlResultItem, CustomFile, DocumentItem, FileIndexingEstimateResponse } from '@/models/datasets'
import { ChunkingMode, DataSourceType } from '@/models/datasets'
import type { NotionPage } from '@/models/common'
import { DataSourceProvider } from '@/models/common'
import PreviewDocumentPicker from '../../../common/document-picker/preview-document-picker'
import { useDatasetDetailContextWithSelector } from '@/context/dataset-detail'
import { ChunkContainer, QAPreview } from '../../../chunk'
import { FormattedText } from '../../../formatted-text/formatted'
import { PreviewSlice } from '../../../formatted-text/flavours/preview-slice'
import { SkeletonContainer, SkeletonPoint, SkeletonRectangle, SkeletonRow } from '@/app/components/base/skeleton'
import { RiSearchEyeLine } from '@remixicon/react'
import Badge from '@/app/components/base/badge'
type ChunkPreviewProps = {
datasource: Datasource
files: CustomFile[]
notionPages: NotionPage[]
websitePages: CrawlResultItem[]
isIdle: boolean
isPending: boolean
estimateData: FileIndexingEstimateResponse | undefined
}
const ChunkPreview = ({
datasource,
files,
notionPages,
websitePages,
isIdle,
isPending,
estimateData,
}: ChunkPreviewProps) => {
const { t } = useTranslation()
const currentDocForm = useDatasetDetailContextWithSelector(s => s.dataset?.doc_form)
const [previewFile, setPreviewFile] = useState<DocumentItem>(files[0] as DocumentItem)
const [previewNotionPage, setPreviewNotionPage] = useState<NotionPage>(notionPages[0])
const [previewWebsitePage, setPreviewWebsitePage] = useState<CrawlResultItem>(websitePages[0])
const dataSourceType = useMemo(() => {
const type = datasource.type
if (type === DataSourceProvider.fireCrawl || type === DataSourceProvider.jinaReader || type === DataSourceProvider.waterCrawl)
return DataSourceType.WEB
return type
}, [datasource.type])
return (
<PreviewContainer
header={<PreviewHeader
title={t('datasetCreation.stepTwo.preview')}
>
<div className='flex items-center gap-1'>
{dataSourceType === DataSourceType.FILE
&& <PreviewDocumentPicker
files={files as Array<Required<CustomFile>>}
onChange={(selected) => {
setPreviewFile(selected)
}}
value={previewFile}
/>
}
{dataSourceType === DataSourceType.NOTION
&& <PreviewDocumentPicker
files={
notionPages.map(page => ({
id: page.page_id,
name: page.page_name,
extension: 'md',
}))
}
onChange={(selected) => {
const selectedPage = notionPages.find(page => page.page_id === selected.id)
setPreviewNotionPage(selectedPage!)
}}
value={{
id: previewNotionPage?.page_id || '',
name: previewNotionPage?.page_name || '',
extension: 'md',
}}
/>
}
{dataSourceType === DataSourceType.WEB
&& <PreviewDocumentPicker
files={
websitePages.map(page => ({
id: page.source_url,
name: page.title,
extension: 'md',
}))
}
onChange={(selected) => {
const selectedPage = websitePages.find(page => page.source_url === selected.id)
setPreviewWebsitePage(selectedPage!)
}}
value={
{
id: previewWebsitePage?.source_url || '',
name: previewWebsitePage?.title || '',
extension: 'md',
}
}
/>
}
{
currentDocForm !== ChunkingMode.qa
&& <Badge text={t('datasetCreation.stepTwo.previewChunkCount', {
count: estimateData?.total_segments || 0,
}) as string}
/>
}
</div>
</PreviewHeader>}
className='relative flex h-full w-1/2 shrink-0 p-4 pr-0'
mainClassName='space-y-6'
>
{currentDocForm === ChunkingMode.qa && estimateData?.qa_preview && (
estimateData?.qa_preview.map((item, index) => (
<ChunkContainer
key={item.question}
label={`Chunk-${index + 1}`}
characterCount={item.question.length + item.answer.length}
>
<QAPreview qa={item} />
</ChunkContainer>
))
)}
{currentDocForm === ChunkingMode.text && estimateData?.preview && (
estimateData?.preview.map((item, index) => (
<ChunkContainer
key={item.content}
label={`Chunk-${index + 1}`}
characterCount={item.content.length}
>
{item.content}
</ChunkContainer>
))
)}
{currentDocForm === ChunkingMode.parentChild && estimateData?.preview && (
estimateData?.preview?.map((item, index) => {
const indexForLabel = index + 1
// const childChunks = parentChildConfig.chunkForContext === 'full-doc'
// ? item.child_chunks.slice(0, FULL_DOC_PREVIEW_LENGTH)
// : item.child_chunks
return (
<ChunkContainer
key={item.content}
label={`Chunk-${indexForLabel}`}
characterCount={item.content.length}
>
<FormattedText>
{item.child_chunks.map((child, index) => {
const indexForLabel = index + 1
return (
<PreviewSlice
key={child}
label={`C-${indexForLabel}`}
text={child}
tooltip={`Child-chunk-${indexForLabel} · ${child.length} Characters`}
labelInnerClassName='text-[10px] font-semibold align-bottom leading-7'
dividerClassName='leading-7'
/>
)
})}
</FormattedText>
</ChunkContainer>
)
})
)}
{!isIdle && (
<div className='flex h-full w-full items-center justify-center'>
<div className='flex flex-col items-center justify-center gap-3'>
<RiSearchEyeLine className='size-10 text-text-empty-state-icon' />
<p className='text-sm text-text-tertiary'>
{t('datasetCreation.stepTwo.previewChunkTip')}
</p>
</div>
</div>
)}
{isPending && (
<div className='space-y-6'>
{Array.from({ length: 10 }, (_, i) => (
<SkeletonContainer key={i}>
<SkeletonRow>
<SkeletonRectangle className='w-20' />
<SkeletonPoint />
<SkeletonRectangle className='w-24' />
</SkeletonRow>
<SkeletonRectangle className='w-full' />
<SkeletonRectangle className='w-full' />
<SkeletonRectangle className='w-[422px]' />
</SkeletonContainer>
))}
</div>
)}
</PreviewContainer>
)
}
export default React.memo(ChunkPreview)

View File

@ -1,6 +1,7 @@
import React from 'react' import React from 'react'
import Button from '@/app/components/base/button' import Button from '@/app/components/base/button'
import { useTranslation } from 'react-i18next' import { useTranslation } from 'react-i18next'
import { RiArrowLeftLine } from '@remixicon/react'
type ActionsProps = { type ActionsProps = {
onBack: () => void onBack: () => void
@ -18,8 +19,10 @@ const Actions = ({
<Button <Button
variant='secondary' variant='secondary'
onClick={onBack} onClick={onBack}
className='gap-x-0.5'
> >
{t('datasetPipeline.operations.dataSource')} <RiArrowLeftLine className='size-4' />
<span className='px-0.5'>{t('datasetPipeline.operations.dataSource')}</span>
</Button> </Button>
<Button <Button
variant='primary' variant='primary'

View File

@ -1,6 +1,7 @@
import React from 'react' import React from 'react'
import Button from '@/app/components/base/button' import Button from '@/app/components/base/button'
import { useTranslation } from 'react-i18next' import { useTranslation } from 'react-i18next'
import { RiSearchEyeLine } from '@remixicon/react'
type HeaderProps = { type HeaderProps = {
onReset: () => void onReset: () => void
@ -21,10 +22,15 @@ const Header = ({
{t('datasetPipeline.addDocuments.stepTwo.chunkSettings')} {t('datasetPipeline.addDocuments.stepTwo.chunkSettings')}
</div> </div>
<Button variant='ghost' disabled={disableReset} onClick={onReset}> <Button variant='ghost' disabled={disableReset} onClick={onReset}>
{t('common.operations.reset')} {t('common.operation.reset')}
</Button> </Button>
<Button variant='primary' onClick={onPreview}> <Button
{t('common.operations.reset')} variant='secondary-accent'
onClick={onPreview}
className='gap-x-0.5'
>
<RiSearchEyeLine className='size-4' />
<span className='px-0.5'>{t('datasetPipeline.addDocuments.stepTwo.previewChunks')}</span>
</Button> </Button>
</div> </div>
) )

View File

@ -1,8 +1,8 @@
import { useMemo } from 'react' import { useMemo } from 'react'
import { BaseFieldType } from '@/app/components/base/form/form-scenarios/base/types' import { BaseFieldType } from '@/app/components/base/form/form-scenarios/base/types'
import { useStore } from '@/app/components/workflow/store'
import { usePublishedPipelineProcessingParams } from '@/service/use-pipeline' import { usePublishedPipelineProcessingParams } from '@/service/use-pipeline'
import { PipelineInputVarType } from '@/models/pipeline' import { PipelineInputVarType } from '@/models/pipeline'
import { useDatasetDetailContextWithSelector } from '@/context/dataset-detail'
type PartialInputVarType = PipelineInputVarType.textInput | PipelineInputVarType.number | PipelineInputVarType.select | PipelineInputVarType.checkbox type PartialInputVarType = PipelineInputVarType.textInput | PipelineInputVarType.number | PipelineInputVarType.select | PipelineInputVarType.checkbox
@ -14,7 +14,7 @@ const VAR_TYPE_MAP: Record<PartialInputVarType, BaseFieldType> = {
} }
export const useConfigurations = (datasourceNodeId: string) => { export const useConfigurations = (datasourceNodeId: string) => {
const pipelineId = useStore(state => state.pipelineId) const pipelineId = useDatasetDetailContextWithSelector(state => state.dataset?.pipeline_id)
const { data: paramsConfig } = usePublishedPipelineProcessingParams({ const { data: paramsConfig } = usePublishedPipelineProcessingParams({
pipeline_id: pipelineId!, pipeline_id: pipelineId!,
node_id: datasourceNodeId, node_id: datasourceNodeId,

View File

@ -8,36 +8,45 @@ import Header from './header'
type ProcessDocumentsProps = { type ProcessDocumentsProps = {
dataSourceNodeId: string dataSourceNodeId: string
onProcess: (data: Record<string, any>) => void onProcess: (data: Record<string, any>) => void
onPreview: (data: Record<string, any>) => void
onBack: () => void onBack: () => void
} }
const ProcessDocuments = ({ const ProcessDocuments = ({
dataSourceNodeId, dataSourceNodeId,
onProcess, onProcess,
onPreview,
onBack, onBack,
}: ProcessDocumentsProps) => { }: ProcessDocumentsProps) => {
const formRef = useRef<any>(null) const formRef = useRef<any>(null)
const isPreview = useRef(false)
const { initialData, configurations } = useConfigurations(dataSourceNodeId) const { initialData, configurations } = useConfigurations(dataSourceNodeId)
const schema = generateZodSchema(configurations) const schema = generateZodSchema(configurations)
const handleProcess = useCallback(() => { const handleProcess = useCallback(() => {
isPreview.current = false
formRef.current?.submit() formRef.current?.submit()
}, []) }, [])
const handlePreview = useCallback(() => { const handlePreview = useCallback(() => {
isPreview.current = true
formRef.current?.submit() formRef.current?.submit()
}, []) }, [])
const handleSubmit = useCallback((data: Record<string, any>) => {
isPreview.current ? onPreview(data) : onProcess(data)
}, [onPreview, onProcess])
const handleReset = useCallback(() => { const handleReset = useCallback(() => {
formRef.current?.reset() formRef.current?.reset()
}, []) }, [])
return ( return (
<div className='flex flex-col gap-y-4 pt-4'> <div className='flex flex-col gap-y-4 pt-4'>
<div className='flex flex-col rounded-lg border-components-panel-border bg-components-panel-bg'> <div className='flex flex-col rounded-lg border border-components-panel-border bg-components-panel-bg'>
<Header <Header
onReset={handleReset} onReset={handleReset}
disableReset={formRef.current.isDirty()} disableReset={!formRef.current?.isDirty()}
onPreview={handlePreview} onPreview={handlePreview}
/> />
<Options <Options
@ -45,7 +54,7 @@ const ProcessDocuments = ({
initialData={initialData} initialData={initialData}
configurations={configurations} configurations={configurations}
schema={schema} schema={schema}
onSubmit={onProcess} onSubmit={handleSubmit}
/> />
</div> </div>
<Actions onBack={onBack} onProcess={handleProcess} /> <Actions onBack={onBack} onProcess={handleProcess} />

View File

@ -66,7 +66,7 @@ const Options = ({
form.handleSubmit() form.handleSubmit()
}} }}
> >
<div className='flex flex-col gap-3 px-4 py-3'> <div className='flex flex-col gap-3 border-t border-divider-subtle px-4 py-3'>
{configurations.map((config, index) => { {configurations.map((config, index) => {
const FieldComponent = BaseField({ const FieldComponent = BaseField({
initialData, initialData,

View File

@ -0,0 +1,63 @@
'use client'
import React from 'react'
import { useTranslation } from 'react-i18next'
import { RiBookOpenLine } from '@remixicon/react'
import type { FullDocumentDetail, InitialDocumentDetail } from '@/models/datasets'
import EmbeddingProcess from '../../../create/embedding-process'
import { useGetDocLanguage } from '@/context/i18n'
type ProcessingProps = {
datasetId: string
indexingType: string
retrievalMethod: string
batchId: string
documents: InitialDocumentDetail[]
}
const Processing = ({
datasetId,
batchId,
documents,
indexingType,
retrievalMethod,
}: ProcessingProps) => {
const { t } = useTranslation()
const docLanguage = useGetDocLanguage()
return (
<div className='flex h-full w-full justify-center overflow-hidden'>
<div className='h-full w-3/5 overflow-y-auto pb-8 pt-10'>
<div className='max-w-[640px]'>
<EmbeddingProcess
datasetId={datasetId}
batchId={batchId || ''}
documents={documents as FullDocumentDetail[]}
indexingType={indexingType}
retrievalMethod={retrievalMethod}
/>
</div>
</div>
<div className='w-2/5 pr-8 pt-[88px]'>
<div className='flex w-[328px] flex-col gap-3 rounded-xl bg-background-section p-6'>
<div className='flex size-10 items-center justify-center rounded-[10px] bg-components-card-bg shadow-lg shadow-shadow-shadow-5'>
<RiBookOpenLine className='size-5 text-text-accent' />
</div>
<div className='flex flex-col gap-y-2'>
<div className='system-xl-semibold text-text-secondary'>{t('datasetCreation.stepThree.sideTipTitle')}</div>
<div className='system-sm-regular text-text-tertiary'>{t('datasetCreation.stepThree.sideTipContent')}</div>
<a
href={`https://docs.dify.ai/${docLanguage}/guides/knowledge-base/integrate-knowledge-within-application`}
target='_blank'
rel='noreferrer noopener'
className='system-sm-regular text-text-accent'
>
{t('datasetPipeline.addDocuments.stepThree.learnMore')}
</a>
</div>
</div>
</div>
</div>
)
}
export default Processing

View File

@ -201,7 +201,7 @@ const translation = {
resume: 'Resume processing', resume: 'Resume processing',
navTo: 'Go to document', navTo: 'Go to document',
sideTipTitle: 'What\'s next', sideTipTitle: 'What\'s next',
sideTipContent: 'After the document finishes indexing, the Knowledge can be integrated into the application as context, you can find the context setting in the prompt orchestration page. You can also create it as an independent ChatGPT indexing plugin for release.', sideTipContent: 'After finishing document indexing, you can manage and edit documents, run retrieval tests, and modify knowledge settings. Knowledge can then be integrated into your application as context, so make sure to adjust the Retrieval Setting to ensure optimal performance.',
modelTitle: 'Are you sure to stop embedding?', modelTitle: 'Are you sure to stop embedding?',
modelContent: 'If you need to resume processing later, you will continue from where you left off.', modelContent: 'If you need to resume processing later, you will continue from where you left off.',
modelButtonConfirm: 'Confirm', modelButtonConfirm: 'Confirm',

View File

@ -85,6 +85,10 @@ const translation = {
}, },
stepTwo: { stepTwo: {
chunkSettings: 'Chunk Settings', chunkSettings: 'Chunk Settings',
previewChunks: 'Preview Chunks',
},
stepThree: {
learnMore: 'Learn more',
}, },
characters: 'characters', characters: 'characters',
}, },

View File

@ -201,7 +201,7 @@ const translation = {
resume: '恢复处理', resume: '恢复处理',
navTo: '前往文档', navTo: '前往文档',
sideTipTitle: '接下来做什么', sideTipTitle: '接下来做什么',
sideTipContent: '当文档完成索引处理后,知识库即可集成至应用内作为上下文使用,你可以在提示词编排页找到上下文设置。你也可以创建成可独立使用的 ChatGPT 索引插件发布。', sideTipContent: '当文档完成索引后,您可以管理和编辑文档、运行检索测试以及修改知识库设置。知识库即可集成到应用程序内作为上下文使用,因此请调整检索设置以确保最佳性能。',
modelTitle: '确认停止索引过程吗?', modelTitle: '确认停止索引过程吗?',
modelContent: '如果您需要稍后恢复处理,则从停止处继续。', modelContent: '如果您需要稍后恢复处理,则从停止处继续。',
modelButtonConfirm: '确认停止', modelButtonConfirm: '确认停止',

View File

@ -85,6 +85,10 @@ const translation = {
}, },
stepTwo: { stepTwo: {
chunkSettings: '分段设置', chunkSettings: '分段设置',
previewChunks: '预览分段',
},
stepThree: {
learnMore: '了解更多',
}, },
characters: '字符', characters: '字符',
}, },