diff --git a/web/app/components/datasets/documents/create-from-pipeline/hooks.ts b/web/app/components/datasets/documents/create-from-pipeline/hooks.ts index 429a6eb880..3b93537ddc 100644 --- a/web/app/components/datasets/documents/create-from-pipeline/hooks.ts +++ b/web/app/components/datasets/documents/create-from-pipeline/hooks.ts @@ -15,11 +15,11 @@ export const useAddDocumentsSteps = () => { value: AddDocumentsStep.dataSource, }, { - label: t('datasetPipeline.addDocuments.steps.ProcessDocuments'), + label: t('datasetPipeline.addDocuments.steps.processDocuments'), value: AddDocumentsStep.processDocuments, }, { - label: t('datasetPipeline.addDocuments.steps.ProcessingDocuments'), + label: t('datasetPipeline.addDocuments.steps.processingDocuments'), value: AddDocumentsStep.processingDocuments, }, ] diff --git a/web/app/components/datasets/documents/create-from-pipeline/index.tsx b/web/app/components/datasets/documents/create-from-pipeline/index.tsx index 93288b6f50..874f7d3beb 100644 --- a/web/app/components/datasets/documents/create-from-pipeline/index.tsx +++ b/web/app/components/datasets/documents/create-from-pipeline/index.tsx @@ -25,10 +25,12 @@ import FilePreview from './preview/file-preview' import NotionPagePreview from './preview/notion-page-preview' import WebsitePreview from './preview/web-preview' import ProcessDocuments from './process-documents' +import ChunkPreview from './preview/chunk-preview' +import Processing from './processing' const TestRunPanel = () => { const { t } = useTranslation() - const [currentStep, setCurrentStep] = useState(1) + const [currentStep, setCurrentStep] = useState(3) const [datasource, setDatasource] = useState() const [fileList, setFiles] = useState([]) const [notionPages, setNotionPages] = useState([]) @@ -40,7 +42,10 @@ const TestRunPanel = () => { const plan = useProviderContextSelector(state => state.plan) const enableBilling = useProviderContextSelector(state => state.enableBilling) + const datasetId = useDatasetDetailContextWithSelector(s => s.dataset?.id) const pipelineId = useDatasetDetailContextWithSelector(s => s.dataset?.pipeline_id) + const indexingType = useDatasetDetailContextWithSelector(s => s.dataset?.indexing_technique) + const retrievalMethod = useDatasetDetailContextWithSelector(s => s.dataset?.retrieval_model_dict.search_method) const { data: pipelineInfo, isFetching: isFetchingPipelineInfo } = usePublishedPipelineInfo(pipelineId || '') @@ -120,6 +125,10 @@ const TestRunPanel = () => { setCurrentStep(preStep => preStep - 1) }, []) + const handlePreviewChunks = useCallback((data: Record) => { + console.log(data) + }, []) + const handleProcess = useCallback((data: Record) => { if (!datasource) return @@ -146,7 +155,8 @@ const TestRunPanel = () => { } // todo: Run Pipeline console.log('datasource_type', datasource_type) - }, [datasource, fileList, notionPages, websiteCrawlJobId, websitePages]) + handleNextStep() + }, [datasource, fileList, handleNextStep, notionPages, websiteCrawlJobId, websitePages]) if (isFetchingPipelineInfo) { return ( @@ -232,24 +242,47 @@ const TestRunPanel = () => { ) } + { + currentStep === 3 && ( + + ) + } {/* Preview */} -
- { - currentStep === 1 && ( - <> - {currentFile && } - {currentNotionPage && } - {currentWebsite && } - - ) - } -
+ { + currentStep === 1 && ( +
+ {currentFile && } + {currentNotionPage && } + {currentWebsite && } +
+ ) + } + { + currentStep === 2 && ( + file.file)} + notionPages={notionPages} + websitePages={websitePages} + isIdle={true} + isPending={true} + estimateData={undefined} + /> + ) + } ) } diff --git a/web/app/components/datasets/documents/create-from-pipeline/left-header.tsx b/web/app/components/datasets/documents/create-from-pipeline/left-header.tsx index 77bb00fc5a..eb03b6ef23 100644 --- a/web/app/components/datasets/documents/create-from-pipeline/left-header.tsx +++ b/web/app/components/datasets/documents/create-from-pipeline/left-header.tsx @@ -30,14 +30,16 @@ const LeftHeader = ({
{steps[currentStep - 1]?.label}
- - - + {currentStep !== steps.length && ( + + + + )} ) diff --git a/web/app/components/datasets/documents/create-from-pipeline/preview/chunk-preview.tsx b/web/app/components/datasets/documents/create-from-pipeline/preview/chunk-preview.tsx new file mode 100644 index 0000000000..29064b4295 --- /dev/null +++ b/web/app/components/datasets/documents/create-from-pipeline/preview/chunk-preview.tsx @@ -0,0 +1,204 @@ +import React, { useMemo, useState } from 'react' +import { useTranslation } from 'react-i18next' +import { PreviewContainer } from '../../../preview/container' +import { PreviewHeader } from '../../../preview/header' +import type { Datasource } from '@/app/components/rag-pipeline/components/panel/test-run/types' +import type { CrawlResultItem, CustomFile, DocumentItem, FileIndexingEstimateResponse } from '@/models/datasets' +import { ChunkingMode, DataSourceType } from '@/models/datasets' +import type { NotionPage } from '@/models/common' +import { DataSourceProvider } from '@/models/common' +import PreviewDocumentPicker from '../../../common/document-picker/preview-document-picker' +import { useDatasetDetailContextWithSelector } from '@/context/dataset-detail' +import { ChunkContainer, QAPreview } from '../../../chunk' +import { FormattedText } from '../../../formatted-text/formatted' +import { PreviewSlice } from '../../../formatted-text/flavours/preview-slice' +import { SkeletonContainer, SkeletonPoint, SkeletonRectangle, SkeletonRow } from '@/app/components/base/skeleton' +import { RiSearchEyeLine } from '@remixicon/react' +import Badge from '@/app/components/base/badge' + +type ChunkPreviewProps = { + datasource: Datasource + files: CustomFile[] + notionPages: NotionPage[] + websitePages: CrawlResultItem[] + isIdle: boolean + isPending: boolean + estimateData: FileIndexingEstimateResponse | undefined +} + +const ChunkPreview = ({ + datasource, + files, + notionPages, + websitePages, + isIdle, + isPending, + estimateData, +}: ChunkPreviewProps) => { + const { t } = useTranslation() + const currentDocForm = useDatasetDetailContextWithSelector(s => s.dataset?.doc_form) + + const [previewFile, setPreviewFile] = useState(files[0] as DocumentItem) + const [previewNotionPage, setPreviewNotionPage] = useState(notionPages[0]) + const [previewWebsitePage, setPreviewWebsitePage] = useState(websitePages[0]) + + const dataSourceType = useMemo(() => { + const type = datasource.type + if (type === DataSourceProvider.fireCrawl || type === DataSourceProvider.jinaReader || type === DataSourceProvider.waterCrawl) + return DataSourceType.WEB + return type + }, [datasource.type]) + + return ( + +
+ {dataSourceType === DataSourceType.FILE + && >} + onChange={(selected) => { + setPreviewFile(selected) + }} + value={previewFile} + /> + } + {dataSourceType === DataSourceType.NOTION + && ({ + id: page.page_id, + name: page.page_name, + extension: 'md', + })) + } + onChange={(selected) => { + const selectedPage = notionPages.find(page => page.page_id === selected.id) + setPreviewNotionPage(selectedPage!) + }} + value={{ + id: previewNotionPage?.page_id || '', + name: previewNotionPage?.page_name || '', + extension: 'md', + }} + /> + } + {dataSourceType === DataSourceType.WEB + && ({ + id: page.source_url, + name: page.title, + extension: 'md', + })) + } + onChange={(selected) => { + const selectedPage = websitePages.find(page => page.source_url === selected.id) + setPreviewWebsitePage(selectedPage!) + }} + value={ + { + id: previewWebsitePage?.source_url || '', + name: previewWebsitePage?.title || '', + extension: 'md', + } + } + /> + } + { + currentDocForm !== ChunkingMode.qa + && + } +
+ } + className='relative flex h-full w-1/2 shrink-0 p-4 pr-0' + mainClassName='space-y-6' + > + {currentDocForm === ChunkingMode.qa && estimateData?.qa_preview && ( + estimateData?.qa_preview.map((item, index) => ( + + + + )) + )} + {currentDocForm === ChunkingMode.text && estimateData?.preview && ( + estimateData?.preview.map((item, index) => ( + + {item.content} + + )) + )} + {currentDocForm === ChunkingMode.parentChild && estimateData?.preview && ( + estimateData?.preview?.map((item, index) => { + const indexForLabel = index + 1 + // const childChunks = parentChildConfig.chunkForContext === 'full-doc' + // ? item.child_chunks.slice(0, FULL_DOC_PREVIEW_LENGTH) + // : item.child_chunks + return ( + + + {item.child_chunks.map((child, index) => { + const indexForLabel = index + 1 + return ( + + ) + })} + + + ) + }) + )} + {!isIdle && ( +
+
+ +

+ {t('datasetCreation.stepTwo.previewChunkTip')} +

+
+
+ )} + {isPending && ( +
+ {Array.from({ length: 10 }, (_, i) => ( + + + + + + + + + + + ))} +
+ )} +
+ ) +} + +export default React.memo(ChunkPreview) diff --git a/web/app/components/datasets/documents/create-from-pipeline/process-documents/actions.tsx b/web/app/components/datasets/documents/create-from-pipeline/process-documents/actions.tsx index 571b6c8bbd..072ab61a98 100644 --- a/web/app/components/datasets/documents/create-from-pipeline/process-documents/actions.tsx +++ b/web/app/components/datasets/documents/create-from-pipeline/process-documents/actions.tsx @@ -1,6 +1,7 @@ import React from 'react' import Button from '@/app/components/base/button' import { useTranslation } from 'react-i18next' +import { RiArrowLeftLine } from '@remixicon/react' type ActionsProps = { onBack: () => void @@ -18,8 +19,10 @@ const Actions = ({ - ) diff --git a/web/app/components/datasets/documents/create-from-pipeline/process-documents/hooks.ts b/web/app/components/datasets/documents/create-from-pipeline/process-documents/hooks.ts index 186d477d4c..7701d9b04e 100644 --- a/web/app/components/datasets/documents/create-from-pipeline/process-documents/hooks.ts +++ b/web/app/components/datasets/documents/create-from-pipeline/process-documents/hooks.ts @@ -1,8 +1,8 @@ import { useMemo } from 'react' import { BaseFieldType } from '@/app/components/base/form/form-scenarios/base/types' -import { useStore } from '@/app/components/workflow/store' import { usePublishedPipelineProcessingParams } from '@/service/use-pipeline' import { PipelineInputVarType } from '@/models/pipeline' +import { useDatasetDetailContextWithSelector } from '@/context/dataset-detail' type PartialInputVarType = PipelineInputVarType.textInput | PipelineInputVarType.number | PipelineInputVarType.select | PipelineInputVarType.checkbox @@ -14,7 +14,7 @@ const VAR_TYPE_MAP: Record = { } export const useConfigurations = (datasourceNodeId: string) => { - const pipelineId = useStore(state => state.pipelineId) + const pipelineId = useDatasetDetailContextWithSelector(state => state.dataset?.pipeline_id) const { data: paramsConfig } = usePublishedPipelineProcessingParams({ pipeline_id: pipelineId!, node_id: datasourceNodeId, diff --git a/web/app/components/datasets/documents/create-from-pipeline/process-documents/index.tsx b/web/app/components/datasets/documents/create-from-pipeline/process-documents/index.tsx index 02fa29f395..d522099925 100644 --- a/web/app/components/datasets/documents/create-from-pipeline/process-documents/index.tsx +++ b/web/app/components/datasets/documents/create-from-pipeline/process-documents/index.tsx @@ -8,36 +8,45 @@ import Header from './header' type ProcessDocumentsProps = { dataSourceNodeId: string onProcess: (data: Record) => void + onPreview: (data: Record) => void onBack: () => void } const ProcessDocuments = ({ dataSourceNodeId, onProcess, + onPreview, onBack, }: ProcessDocumentsProps) => { const formRef = useRef(null) + const isPreview = useRef(false) const { initialData, configurations } = useConfigurations(dataSourceNodeId) const schema = generateZodSchema(configurations) const handleProcess = useCallback(() => { + isPreview.current = false formRef.current?.submit() }, []) const handlePreview = useCallback(() => { + isPreview.current = true formRef.current?.submit() }, []) + const handleSubmit = useCallback((data: Record) => { + isPreview.current ? onPreview(data) : onProcess(data) + }, [onPreview, onProcess]) + const handleReset = useCallback(() => { formRef.current?.reset() }, []) return (
-
+
diff --git a/web/app/components/datasets/documents/create-from-pipeline/process-documents/options.tsx b/web/app/components/datasets/documents/create-from-pipeline/process-documents/options.tsx index c8b4d5eb41..d4400dff77 100644 --- a/web/app/components/datasets/documents/create-from-pipeline/process-documents/options.tsx +++ b/web/app/components/datasets/documents/create-from-pipeline/process-documents/options.tsx @@ -66,7 +66,7 @@ const Options = ({ form.handleSubmit() }} > -
+
{configurations.map((config, index) => { const FieldComponent = BaseField({ initialData, diff --git a/web/app/components/datasets/documents/create-from-pipeline/processing/index.tsx b/web/app/components/datasets/documents/create-from-pipeline/processing/index.tsx new file mode 100644 index 0000000000..8095122339 --- /dev/null +++ b/web/app/components/datasets/documents/create-from-pipeline/processing/index.tsx @@ -0,0 +1,63 @@ +'use client' +import React from 'react' +import { useTranslation } from 'react-i18next' +import { RiBookOpenLine } from '@remixicon/react' +import type { FullDocumentDetail, InitialDocumentDetail } from '@/models/datasets' +import EmbeddingProcess from '../../../create/embedding-process' +import { useGetDocLanguage } from '@/context/i18n' + +type ProcessingProps = { + datasetId: string + indexingType: string + retrievalMethod: string + batchId: string + documents: InitialDocumentDetail[] +} + +const Processing = ({ + datasetId, + batchId, + documents, + indexingType, + retrievalMethod, +}: ProcessingProps) => { + const { t } = useTranslation() + const docLanguage = useGetDocLanguage() + + return ( +
+
+
+ +
+
+
+
+
+ +
+
+
{t('datasetCreation.stepThree.sideTipTitle')}
+
{t('datasetCreation.stepThree.sideTipContent')}
+ + {t('datasetPipeline.addDocuments.stepThree.learnMore')} + +
+
+
+
+ ) +} + +export default Processing diff --git a/web/i18n/en-US/dataset-creation.ts b/web/i18n/en-US/dataset-creation.ts index 8629413311..96640cf8a5 100644 --- a/web/i18n/en-US/dataset-creation.ts +++ b/web/i18n/en-US/dataset-creation.ts @@ -201,7 +201,7 @@ const translation = { resume: 'Resume processing', navTo: 'Go to document', sideTipTitle: 'What\'s next', - sideTipContent: 'After the document finishes indexing, the Knowledge can be integrated into the application as context, you can find the context setting in the prompt orchestration page. You can also create it as an independent ChatGPT indexing plugin for release.', + sideTipContent: 'After finishing document indexing, you can manage and edit documents, run retrieval tests, and modify knowledge settings. Knowledge can then be integrated into your application as context, so make sure to adjust the Retrieval Setting to ensure optimal performance.', modelTitle: 'Are you sure to stop embedding?', modelContent: 'If you need to resume processing later, you will continue from where you left off.', modelButtonConfirm: 'Confirm', diff --git a/web/i18n/en-US/dataset-pipeline.ts b/web/i18n/en-US/dataset-pipeline.ts index f1bf5592e4..ab53bb5f97 100644 --- a/web/i18n/en-US/dataset-pipeline.ts +++ b/web/i18n/en-US/dataset-pipeline.ts @@ -85,6 +85,10 @@ const translation = { }, stepTwo: { chunkSettings: 'Chunk Settings', + previewChunks: 'Preview Chunks', + }, + stepThree: { + learnMore: 'Learn more', }, characters: 'characters', }, diff --git a/web/i18n/zh-Hans/dataset-creation.ts b/web/i18n/zh-Hans/dataset-creation.ts index ec3ce5fc62..ecf46c9d73 100644 --- a/web/i18n/zh-Hans/dataset-creation.ts +++ b/web/i18n/zh-Hans/dataset-creation.ts @@ -201,7 +201,7 @@ const translation = { resume: '恢复处理', navTo: '前往文档', sideTipTitle: '接下来做什么', - sideTipContent: '当文档完成索引处理后,知识库即可集成至应用内作为上下文使用,你可以在提示词编排页找到上下文设置。你也可以创建成可独立使用的 ChatGPT 索引插件发布。', + sideTipContent: '当文档完成索引后,您可以管理和编辑文档、运行检索测试以及修改知识库设置。知识库即可集成到应用程序内作为上下文使用,因此请调整检索设置以确保最佳性能。', modelTitle: '确认停止索引过程吗?', modelContent: '如果您需要稍后恢复处理,则从停止处继续。', modelButtonConfirm: '确认停止', diff --git a/web/i18n/zh-Hans/dataset-pipeline.ts b/web/i18n/zh-Hans/dataset-pipeline.ts index 777acee90b..8bca2839d5 100644 --- a/web/i18n/zh-Hans/dataset-pipeline.ts +++ b/web/i18n/zh-Hans/dataset-pipeline.ts @@ -85,6 +85,10 @@ const translation = { }, stepTwo: { chunkSettings: '分段设置', + previewChunks: '预览分段', + }, + stepThree: { + learnMore: '了解更多', }, characters: '字符', },