From b36f36d242a5731c629211ca257fecf350cb9017 Mon Sep 17 00:00:00 2001 From: twwu Date: Tue, 8 Jul 2025 14:14:50 +0800 Subject: [PATCH] feat: Enhance CreateFormPipeline with file selection and validation for online documents and drives --- .../create-from-pipeline/actions/index.tsx | 83 ++++++++++++---- .../online-drive/file-list/list/index.tsx | 5 +- .../documents/create-from-pipeline/hooks.ts | 21 ++++ .../documents/create-from-pipeline/index.tsx | 99 ++++++++++++++++++- web/i18n/en-US/dataset-pipeline.ts | 2 + web/i18n/zh-Hans/dataset-pipeline.ts | 2 + web/types/pipeline.tsx | 2 +- 7 files changed, 189 insertions(+), 25 deletions(-) diff --git a/web/app/components/datasets/documents/create-from-pipeline/actions/index.tsx b/web/app/components/datasets/documents/create-from-pipeline/actions/index.tsx index 490f994a0d..3693dee42f 100644 --- a/web/app/components/datasets/documents/create-from-pipeline/actions/index.tsx +++ b/web/app/components/datasets/documents/create-from-pipeline/actions/index.tsx @@ -1,45 +1,88 @@ -import React from 'react' +import React, { useMemo } from 'react' import Button from '@/app/components/base/button' import { useTranslation } from 'react-i18next' import { useParams } from 'next/navigation' import { RiArrowRightLine } from '@remixicon/react' import Link from 'next/link' +import Checkbox from '@/app/components/base/checkbox' type ActionsProps = { disabled?: boolean handleNextStep: () => void showSelect?: boolean + totalOptions?: number + selectedOptions?: number + onSelectAll?: () => void + tip?: string } const Actions = ({ disabled, handleNextStep, + showSelect = false, + totalOptions, + selectedOptions, + onSelectAll, + tip = '', }: ActionsProps) => { const { t } = useTranslation() const { datasetId } = useParams() + const indeterminate = useMemo(() => { + if (!showSelect) return false + if (selectedOptions === undefined || totalOptions === undefined) return false + return selectedOptions > 0 && selectedOptions < totalOptions + }, [showSelect, selectedOptions, totalOptions]) + + const checked = useMemo(() => { + if (!showSelect) return false + if (selectedOptions === undefined || totalOptions === undefined) return false + return selectedOptions > 0 && selectedOptions === totalOptions + }, [showSelect, selectedOptions, totalOptions]) + return ( -
- - + + - - +
) } diff --git a/web/app/components/datasets/documents/create-from-pipeline/data-source/online-drive/file-list/list/index.tsx b/web/app/components/datasets/documents/create-from-pipeline/data-source/online-drive/file-list/list/index.tsx index b4a9534367..3e290999b6 100644 --- a/web/app/components/datasets/documents/create-from-pipeline/data-source/online-drive/file-list/list/index.tsx +++ b/web/app/components/datasets/documents/create-from-pipeline/data-source/online-drive/file-list/list/index.tsx @@ -1,12 +1,11 @@ import React, { useEffect, useMemo, useRef } from 'react' -import type { OnlineDriveFile } from '@/models/pipeline' +import { type OnlineDriveFile, OnlineDriveFileType } from '@/models/pipeline' import Item from './item' import EmptyFolder from './empty-folder' import EmptySearchResult from './empty-search-result' import Loading from '@/app/components/base/loading' import { RiLoader2Line } from '@remixicon/react' import { useFileSupportTypes } from '@/service/use-common' -import { isFile } from '../../utils' import { getFileExtension } from './utils' import { useDataSourceStore } from '../../../store' @@ -84,7 +83,7 @@ const List = ({ fileList.map((file) => { const isSelected = selectedFileList.includes(file.key) const extension = getFileExtension(file.key) - const disabled = isFile(file.key) && !supportedFileTypes.includes(extension) + const disabled = file.type === OnlineDriveFileType.file && !supportedFileTypes.includes(extension) return ( { const { t } = useTranslation() @@ -83,19 +84,39 @@ export const useLocalFile = () => { } export const useOnlineDocuments = () => { + const documentsData = useDataSourceStoreWithSelector(state => state.documentsData) + const currentWorkspaceId = useDataSourceStoreWithSelector(state => state.currentWorkspaceId) const onlineDocuments = useDataSourceStoreWithSelector(state => state.onlineDocuments) const previewOnlineDocumentRef = useDataSourceStoreWithSelector(state => state.previewOnlineDocumentRef) const currentDocument = useDataSourceStoreWithSelector(state => state.currentDocument) const dataSourceStore = useDataSourceStore() + const currentWorkspace = documentsData.find(workspace => workspace.workspace_id === currentWorkspaceId) + + const PagesMapAndSelectedPagesId: DataSourceNotionPageMap = useMemo(() => { + const pagesMap = (documentsData || []).reduce((prev: DataSourceNotionPageMap, next: DataSourceNotionWorkspace) => { + next.pages.forEach((page) => { + prev[page.page_id] = { + ...page, + workspace_id: next.workspace_id, + } + }) + + return prev + }, {}) + return pagesMap + }, [documentsData]) + const hidePreviewOnlineDocument = useCallback(() => { const { setCurrentDocument } = dataSourceStore.getState() setCurrentDocument(undefined) }, [dataSourceStore]) return { + currentWorkspace, onlineDocuments, currentDocument, + PagesMapAndSelectedPagesId, previewOnlineDocumentRef, hidePreviewOnlineDocument, } diff --git a/web/app/components/datasets/documents/create-from-pipeline/index.tsx b/web/app/components/datasets/documents/create-from-pipeline/index.tsx index 8db4014475..539224883c 100644 --- a/web/app/components/datasets/documents/create-from-pipeline/index.tsx +++ b/web/app/components/datasets/documents/create-from-pipeline/index.tsx @@ -30,6 +30,8 @@ import { TransferMethod } from '@/types/app' import { useAddDocumentsSteps, useLocalFile, useOnlineDocuments, useOnlineDrive, useWebsiteCrawl } from './hooks' import DataSourceProvider from './data-source/store/provider' import { useDataSourceStore } from './data-source/store' +import { useFileSupportTypes, useFileUploadConfig } from '@/service/use-common' +import { getFileExtension } from './data-source/online-drive/file-list/list/utils' const CreateFormPipeline = () => { const { t } = useTranslation() @@ -46,6 +48,8 @@ const CreateFormPipeline = () => { const formRef = useRef(null) const { data: pipelineInfo, isFetching: isFetchingPipelineInfo } = usePublishedPipelineInfo(pipelineId || '') + const { data: fileUploadConfigResponse } = useFileUploadConfig() + const { data: supportFileTypesRes } = useFileSupportTypes() const { steps, @@ -61,8 +65,10 @@ const CreateFormPipeline = () => { hidePreviewLocalFile, } = useLocalFile() const { + currentWorkspace, onlineDocuments, currentDocument, + PagesMapAndSelectedPagesId, previewOnlineDocumentRef, hidePreviewOnlineDocument, } = useOnlineDocuments() @@ -107,6 +113,60 @@ const CreateFormPipeline = () => { return false }, [datasource, datasourceType, isShowVectorSpaceFull, fileList.length, allFileLoaded, onlineDocuments.length, websitePages.length, selectedFileList.length]) + const showSelect = useMemo(() => { + if (datasourceType === DatasourceType.onlineDocument) { + const pagesCount = currentWorkspace?.pages.length ?? 0 + return pagesCount > 0 + } + if (datasourceType === DatasourceType.onlineDrive) { + const isBucketList = onlineDriveFileList.some(file => file.type === 'bucket') + return !isBucketList && onlineDriveFileList.length > 0 + } + }, [currentWorkspace?.pages.length, datasourceType, onlineDriveFileList]) + + const supportedFileTypes = useMemo(() => { + if (!supportFileTypesRes) return [] + return Array.from(new Set(supportFileTypesRes.allowed_extensions.map(item => item.toLowerCase()))) + }, [supportFileTypesRes]) + + const fileUploadConfig = useMemo(() => fileUploadConfigResponse ?? { + file_size_limit: 15, + batch_count_limit: 5, + }, [fileUploadConfigResponse]) + + const totalOptions = useMemo(() => { + if (datasourceType === DatasourceType.onlineDocument) + return currentWorkspace?.pages.length + if (datasourceType === DatasourceType.onlineDrive) { + return onlineDriveFileList.filter((item) => { + if (item.type === 'bucket') return false + if (item.type === 'folder') return true + if (item.type === 'file') + return supportedFileTypes.includes(getFileExtension(item.key)) + return false + }).length + } + }, [currentWorkspace?.pages.length, datasourceType, onlineDriveFileList, supportedFileTypes]) + + const selectedOptions = useMemo(() => { + if (datasourceType === DatasourceType.onlineDocument) + return onlineDocuments.length + if (datasourceType === DatasourceType.onlineDrive) + return selectedFileList.length + }, [datasourceType, onlineDocuments.length, selectedFileList.length]) + + const tip = useMemo(() => { + if (datasourceType === DatasourceType.onlineDocument) + return t('datasetPipeline.addDocuments.selectOnlineDocumentTip', { count: 50 }) + if (datasourceType === DatasourceType.onlineDrive) { + return t('datasetPipeline.addDocuments.selectOnlineDriveTip', { + count: fileUploadConfig.batch_count_limit, + fileSize: fileUploadConfig.file_size_limit, + }) + } + return '' + }, [datasourceType, fileUploadConfig.batch_count_limit, fileUploadConfig.file_size_limit, t]) + const { mutateAsync: runPublishedPipeline, isIdle, isPending } = useRunPublishedPipeline() const handlePreviewChunks = useCallback(async (data: Record) => { @@ -242,6 +302,35 @@ const CreateFormPipeline = () => { onClickPreview() }, [onClickPreview, previewWebsitePageRef]) + const handleSelectAll = useCallback(() => { + const { setOnlineDocuments, setSelectedFileList, setSelectedPagesId } = dataSourceStore.getState() + if (datasourceType === DatasourceType.onlineDocument) { + const allIds = currentWorkspace?.pages.map(page => page.page_id) || [] + if (onlineDocuments.length < allIds.length) { + const selectedPages = Array.from(allIds).map(pageId => PagesMapAndSelectedPagesId[pageId]) + setOnlineDocuments(selectedPages) + setSelectedPagesId(new Set(allIds)) + } + else { + setOnlineDocuments([]) + setSelectedPagesId(new Set()) + } + } + if (datasourceType === DatasourceType.onlineDrive) { + const allKeys = onlineDriveFileList.filter((item) => { + if (item.type === 'bucket') return false + if (item.type === 'folder') return true + if (item.type === 'file') + return supportedFileTypes.includes(getFileExtension(item.key)) + return false + }).map(file => file.key) + if (selectedFileList.length < allKeys.length) + setSelectedFileList(allKeys) + else + setSelectedFileList([]) + } + }, [PagesMapAndSelectedPagesId, currentWorkspace?.pages, dataSourceStore, datasourceType, onlineDocuments.length, onlineDriveFileList, selectedFileList.length, supportedFileTypes]) + if (isFetchingPipelineInfo) { return ( @@ -295,7 +384,15 @@ const CreateFormPipeline = () => { {isShowVectorSpaceFull && ( )} - + ) } diff --git a/web/i18n/en-US/dataset-pipeline.ts b/web/i18n/en-US/dataset-pipeline.ts index d35d67c618..af0510ebf9 100644 --- a/web/i18n/en-US/dataset-pipeline.ts +++ b/web/i18n/en-US/dataset-pipeline.ts @@ -106,6 +106,8 @@ const translation = { learnMore: 'Learn more', }, characters: 'characters', + selectOnlineDocumentTip: 'Process up to {{count}} pages', + selectOnlineDriveTip: 'Process up to {{count}} files, maximum {{fileSize}} MB each', }, documentSettings: { title: 'Document Settings', diff --git a/web/i18n/zh-Hans/dataset-pipeline.ts b/web/i18n/zh-Hans/dataset-pipeline.ts index 0c2c18dd42..cd1b3a5a89 100644 --- a/web/i18n/zh-Hans/dataset-pipeline.ts +++ b/web/i18n/zh-Hans/dataset-pipeline.ts @@ -106,6 +106,8 @@ const translation = { learnMore: '了解更多', }, characters: '字符', + selectOnlineDocumentTip: '最多处理 {{count}} 页', + selectOnlineDriveTip: '最多处理 {{count}} 个文件,每个文件最大 {{fileSize}} MB', }, documentSettings: { title: '文档设置', diff --git a/web/types/pipeline.tsx b/web/types/pipeline.tsx index 080c98b47e..a4321c6707 100644 --- a/web/types/pipeline.tsx +++ b/web/types/pipeline.tsx @@ -23,7 +23,7 @@ export type OnlineDriveData = { export type DataSourceNodeCompletedResponse = { event: 'datasource_completed' - data: OnlineDriveData[] + data: any time_consuming: number }