diff --git a/web/app/components/datasets/create/step-two/index.tsx b/web/app/components/datasets/create/step-two/index.tsx index 9ecd885c51..95e2e766df 100644 --- a/web/app/components/datasets/create/step-two/index.tsx +++ b/web/app/components/datasets/create/step-two/index.tsx @@ -20,6 +20,8 @@ import { PreviewContainer } from '../../preview/container' import { ChunkContainer, QAPreview } from '../../chunk' import { PreviewHeader } from '../../preview/header' import DocumentPicker from '../../common/document-picker' +import { FormattedText } from '../../formatted-text/formatted' +import { PreviewSlice } from '../../formatted-text/flavours/preview-slice' import s from './index.module.css' import unescape from './unescape' import escape from './escape' @@ -27,7 +29,7 @@ import { OptionCard } from './option-card' import LanguageSelect from './language-select' import { DelimiterInput, MaxLengthInput, OverlapInput } from './inputs' import cn from '@/utils/classnames' -import type { CrawlOptions, CrawlResultItem, CreateDocumentReq, CustomFile, FullDocumentDetail, PreProcessingRule, ProcessRule, Rules, createDocumentResponse } from '@/models/datasets' +import type { CrawlOptions, CrawlResultItem, CreateDocumentReq, CustomFile, FullDocumentDetail, ParentMode, PreProcessingRule, ProcessRule, Rules, createDocumentResponse } from '@/models/datasets' import Button from '@/app/components/base/button' import FloatRightContainer from '@/app/components/base/float-right-container' @@ -38,7 +40,7 @@ import { ensureRerankModelSelected, isReRankModelSelected } from '@/app/componen import Toast from '@/app/components/base/toast' import type { NotionPage } from '@/models/common' import { DataSourceProvider } from '@/models/common' -import { DataSourceType, DocForm } from '@/models/datasets' +import { ChuckingMode, DataSourceType } from '@/models/datasets' import { useDatasetDetailContext } from '@/context/dataset-detail' import I18n from '@/context/i18n' import { RETRIEVE_METHOD } from '@/types/app' @@ -96,7 +98,7 @@ export enum IndexingType { const DEFAULT_SEGMENT_IDENTIFIER = '\\n\\n' type ParentChildConfig = { - chunkForContext: 'paragraph' | 'full_doc' + chunkForContext: ParentMode parent: { delimiter: string maxLength: number @@ -111,11 +113,11 @@ const defaultParentChildConfig: ParentChildConfig = { chunkForContext: 'paragraph', parent: { delimiter: '\\n\\n', - maxLength: 4000, + maxLength: 500, }, child: { delimiter: '\\n\\n', - maxLength: 4000, + maxLength: 200, }, } @@ -148,7 +150,7 @@ const StepTwo = ({ const { dataset: currentDataset, mutateDatasetRes } = useDatasetDetailContext() const isInCreatePage = !datasetId || (datasetId && !currentDataset?.data_source_type) const dataSourceType = isInCreatePage ? inCreatePageDataSourceType : currentDataset?.data_source_type - const [segmentationType, setSegmentationType] = useState(SegmentType.AUTO) + const [segmentationType, setSegmentationType] = useState(SegmentType.CUSTOM) const [segmentIdentifier, doSetSegmentIdentifier] = useState(DEFAULT_SEGMENT_IDENTIFIER) const setSegmentIdentifier = useCallback((value: string) => { doSetSegmentIdentifier(value ? escape(value) : DEFAULT_SEGMENT_IDENTIFIER) @@ -168,9 +170,14 @@ const StepTwo = ({ // QA Related const [isLanguageSelectDisabled, setIsLanguageSelectDisabled] = useState(false) - const [docForm, setDocForm] = useState( - (datasetId && documentDetail) ? documentDetail.doc_form : DocForm.TEXT, + const [docForm, setDocForm] = useState( + (datasetId && documentDetail) ? documentDetail.doc_form as ChuckingMode : ChuckingMode.text, ) + const handleChangeDocform = (value: ChuckingMode) => { + setDocForm(value) + // eslint-disable-next-line @typescript-eslint/no-use-before-define + currentEstimateMutation.reset() + } const [docLanguage, setDocLanguage] = useState( (datasetId && documentDetail) ? documentDetail.doc_language : (locale !== LanguagesSupported[1] ? 'English' : 'Chinese'), @@ -180,28 +187,42 @@ const StepTwo = ({ const getIndexing_technique = () => indexingType || indexType - const getProcessRule = () => { - const processRule: ProcessRule = { - rules: {} as any, // api will check this. It will be removed after api refactored. - mode: segmentationType, + const getProcessRule = (): ProcessRule => { + if (docForm === ChuckingMode.parentChild) { + return { + rules: { + pre_processing_rules: rules, + segmentation: { + separator: unescape( + parentChildConfig.parent.delimiter, + ), + max_tokens: parentChildConfig.parent.maxLength, + chunk_overlap: overlap, + }, + parent_mode: parentChildConfig.chunkForContext, + subchunk_segmentation: { + separator: parentChildConfig.child.delimiter, + max_tokens: parentChildConfig.child.maxLength, + }, + }, // api will check this. It will be removed after api refactored. + mode: 'hierarchical', + } as ProcessRule } - if (segmentationType === SegmentType.CUSTOM) { - const ruleObj = { + return { + rules: { pre_processing_rules: rules, segmentation: { separator: unescape(segmentIdentifier), max_tokens: maxChunkLength, chunk_overlap: overlap, }, - } - // @ts-expect-error will be removed after api refactored. - processRule.rules = ruleObj - } - return processRule + }, // api will check this. It will be removed after api refactored. + mode: segmentationType, + } as ProcessRule } const fileIndexingEstimateQuery = useFetchFileIndexingEstimateForFile({ - docForm: docForm as DocForm, + docForm, docLanguage, dataSourceType: DataSourceType.FILE, files, @@ -210,7 +231,7 @@ const StepTwo = ({ dataset_id: datasetId!, }) const notionIndexingEstimateQuery = useFetchFileIndexingEstimateForNotion({ - docForm: docForm as DocForm, + docForm, docLanguage, dataSourceType: DataSourceType.NOTION, notionPages, @@ -220,7 +241,7 @@ const StepTwo = ({ }) const websiteIndexingEstimateQuery = useFetchFileIndexingEstimateForWeb({ - docForm: docForm as DocForm, + docForm, docLanguage, dataSourceType: DataSourceType.WEB, websitePages, @@ -481,29 +502,11 @@ const StepTwo = ({ isSetting && onSave && onSave() } - const handleDocformSwitch = (isQAMode: boolean) => { - if (isQAMode) - setDocForm(DocForm.QA) - else - setDocForm(DocForm.TEXT) - } - - const previewSwitch = () => { - setIsLanguageSelectDisabled(true) - fetchEstimate() - } - - const handleSelect = (language: string) => { - setDocLanguage(language) - // Switch language, re-cutter - if (docForm === DocForm.QA) - previewSwitch() - } - const changeToEconomicalType = () => { if (!hasSetIndexType) { setIndexType(IndexingType.ECONOMICAL) - setDocForm(DocForm.TEXT) + if (docForm === ChuckingMode.qa) + handleChangeDocform(ChuckingMode.text) } } @@ -519,11 +522,6 @@ const StepTwo = ({ // eslint-disable-next-line react-hooks/exhaustive-deps }, []) - useEffect(() => { - if (indexingType === IndexingType.ECONOMICAL && docForm === DocForm.QA) - setDocForm(DocForm.TEXT) - }, [indexingType, docForm]) - useEffect(() => { // get indexing type by props if (indexingType) @@ -557,8 +555,8 @@ const StepTwo = ({ icon={{t('datasetCreation.stepTwo.general')}} activeHeaderClassName='bg-gradient-to-r from-[#EFF0F9] to-[#F9FAFB]' description={t('datasetCreation.stepTwo.generalTip')} - isActive={SegmentType.AUTO === segmentationType} - onClick={() => setSegmentationType(SegmentType.AUTO)} + isActive={docForm === ChuckingMode.qa || docForm === ChuckingMode.text} + onSelect={() => handleChangeDocform(ChuckingMode.text)} actions={ <>
+ {onArchive && ( +
+ + +
+ )}
-
+ + { + isShowDeleteConfirm && ( + + ) + } ) } diff --git a/web/app/components/datasets/documents/list.tsx b/web/app/components/datasets/documents/list.tsx index cd7162603b..ce35e78a8a 100644 --- a/web/app/components/datasets/documents/list.tsx +++ b/web/app/components/datasets/documents/list.tsx @@ -21,27 +21,28 @@ import { Globe01 } from '../../base/icons/src/vender/line/mapsAndTravel' import ChunkingModeLabel from '../common/chunking-mode-label' import s from './style.module.css' import RenameModal from './rename-modal' +import BatchAction from './detail/completed/batch-action' import cn from '@/utils/classnames' import Switch from '@/app/components/base/switch' import Divider from '@/app/components/base/divider' import Popover from '@/app/components/base/popover' import Confirm from '@/app/components/base/confirm' import Tooltip from '@/app/components/base/tooltip' -import { ToastContext } from '@/app/components/base/toast' +import Toast, { ToastContext } from '@/app/components/base/toast' import type { ColorMap, IndicatorProps } from '@/app/components/header/indicator' import Indicator from '@/app/components/header/indicator' import { asyncRunSafe } from '@/utils' import { formatNumber } from '@/utils/format' -import { archiveDocument, deleteDocument, disableDocument, enableDocument, syncDocument, syncWebsite, unArchiveDocument } from '@/service/datasets' import NotionIcon from '@/app/components/base/notion-icon' import ProgressBar from '@/app/components/base/progress-bar' -import { ChuckingMode, DataSourceType, type DocumentDisplayStatus, type SimpleDocumentDetail } from '@/models/datasets' +import { ChuckingMode, DataSourceType, DocumentActionType, type DocumentDisplayStatus, type SimpleDocumentDetail } from '@/models/datasets' import type { CommonResponse } from '@/models/common' import useTimestamp from '@/hooks/use-timestamp' import { useDatasetDetailContextWithSelector as useDatasetDetailContext } from '@/context/dataset-detail' import type { Props as PaginationProps } from '@/app/components/base/pagination' import Pagination from '@/app/components/base/pagination' import Checkbox from '@/app/components/base/checkbox' +import { useDocumentArchive, useDocumentDelete, useDocumentDisable, useDocumentEnable, useDocumentUnArchive, useSyncDocument, useSyncWebsite } from '@/service/knowledge/use-document' export const useIndexStatus = () => { const { t } = useTranslation() @@ -87,6 +88,9 @@ export const StatusItem: FC<{ const { enabled = false, archived = false, id = '' } = detail || {} const { notify } = useContext(ToastContext) const { t } = useTranslation() + const { mutateAsync: enableDocument } = useDocumentEnable() + const { mutateAsync: disableDocument } = useDocumentDisable() + const { mutateAsync: deleteDocument } = useDocumentDelete() const onOperate = async (operationName: OperationName) => { let opApi = deleteDocument @@ -99,11 +103,11 @@ export const StatusItem: FC<{ break } const [e] = await asyncRunSafe(opApi({ datasetId, documentId: id }) as Promise) - if (!e) + if (!e) { notify({ type: 'success', message: t('common.actionMsg.modifiedSuccessfully') }) - else - notify({ type: 'error', message: t('common.actionMsg.modifiedUnsuccessfully') }) - onUpdate?.(operationName) + onUpdate?.(operationName) + } + else { notify({ type: 'error', message: t('common.actionMsg.modifiedUnsuccessfully') }) } } const { run: handleSwitch } = useDebounceFn((operationName: OperationName) => { @@ -179,7 +183,13 @@ export const OperationAction: FC<{ const { notify } = useContext(ToastContext) const { t } = useTranslation() const router = useRouter() - + const { mutateAsync: archiveDocument } = useDocumentArchive() + const { mutateAsync: unArchiveDocument } = useDocumentUnArchive() + const { mutateAsync: enableDocument } = useDocumentEnable() + const { mutateAsync: disableDocument } = useDocumentDisable() + const { mutateAsync: deleteDocument } = useDocumentDelete() + const { mutateAsync: syncDocument } = useSyncDocument() + const { mutateAsync: syncWebsite } = useSyncWebsite() const isListScene = scene === 'list' const onOperate = async (operationName: OperationName) => { @@ -200,10 +210,8 @@ export const OperationAction: FC<{ case 'sync': if (data_source_type === 'notion_import') opApi = syncDocument - else opApi = syncWebsite - break default: opApi = deleteDocument @@ -211,13 +219,13 @@ export const OperationAction: FC<{ break } const [e] = await asyncRunSafe(opApi({ datasetId, documentId: id }) as Promise) - if (!e) + if (!e) { notify({ type: 'success', message: t('common.actionMsg.modifiedSuccessfully') }) - else - notify({ type: 'error', message: t('common.actionMsg.modifiedUnsuccessfully') }) + onUpdate(operationName) + } + else { notify({ type: 'error', message: t('common.actionMsg.modifiedUnsuccessfully') }) } if (operationName === 'delete') setDeleting(false) - onUpdate(operationName) } const { run: handleSwitch } = useDebounceFn((operationName: OperationName) => { @@ -454,6 +462,37 @@ const DocumentList: FC = ({ else onSelectedIdChange(uniq([...selectedIds, ...localDocs.map(doc => doc.id)])) }, [isAllSelected, localDocs, onSelectedIdChange, selectedIds]) + const { mutateAsync: archiveDocument } = useDocumentArchive() + const { mutateAsync: enableDocument } = useDocumentEnable() + const { mutateAsync: disableDocument } = useDocumentDisable() + const { mutateAsync: deleteDocument } = useDocumentDelete() + + const handleAction = (actionName: DocumentActionType) => { + return async () => { + let opApi = deleteDocument + switch (actionName) { + case DocumentActionType.archive: + opApi = archiveDocument + break + case DocumentActionType.enable: + opApi = enableDocument + break + case DocumentActionType.disable: + opApi = disableDocument + break + default: + opApi = deleteDocument + break + } + const [e] = await asyncRunSafe(opApi({ datasetId, documentIds: selectedIds }) as Promise) + + if (!e) { + Toast.notify({ type: 'success', message: t('common.actionMsg.modifiedSuccessfully') }) + onUpdate() + } + else { Toast.notify({ type: 'error', message: t('common.actionMsg.modifiedUnsuccessfully') }) } + } + } return (
@@ -576,6 +615,19 @@ const DocumentList: FC = ({ })} + {(selectedIds.length > 0) && ( + { + onSelectedIdChange([]) + }} + /> + )} {/* Show Pagination only if the total is more than the limit */} {pagination.total && pagination.total > (pagination.limit || 10) && ( & { header: ReactNode + mainClassName?: string } export const PreviewContainer: FC = forwardRef((props, ref) => { - const { children, className, header, ...rest } = props + const { children, className, header, mainClassName, ...rest } = props return
= forwardRef((props, re
{header}
-
+
{children}
diff --git a/web/i18n/en-US/dataset.ts b/web/i18n/en-US/dataset.ts index 59f105adca..75eda76a24 100644 --- a/web/i18n/en-US/dataset.ts +++ b/web/i18n/en-US/dataset.ts @@ -150,6 +150,14 @@ const translation = { nTo1RetrievalLegacy: 'N-to-1 retrieval will be officially deprecated from September. It is recommended to use the latest Multi-path retrieval to obtain better results. ', nTo1RetrievalLegacyLink: 'Learn more', nTo1RetrievalLegacyLinkText: ' N-to-1 retrieval will be officially deprecated in September.', + batchAction: { + selected: 'Selected', + enable: 'Enable', + disable: 'Disable', + archive: 'Archive', + delete: 'Delete', + cancel: 'Cancel', + }, } export default translation diff --git a/web/i18n/zh-Hans/dataset.ts b/web/i18n/zh-Hans/dataset.ts index 801a974cbc..1d4897a69f 100644 --- a/web/i18n/zh-Hans/dataset.ts +++ b/web/i18n/zh-Hans/dataset.ts @@ -150,6 +150,14 @@ const translation = { nTo1RetrievalLegacy: '9 月 1 日起我们将不再提供此能力,推荐使用最新的多路召回获得更好的检索效果。', nTo1RetrievalLegacyLink: '了解更多', nTo1RetrievalLegacyLinkText: '9 月 1 日起我们将不再提供此能力。', + batchAction: { + selected: '已选择', + enable: '启用', + disable: '禁用', + archive: '归档', + delete: '删除', + cancel: '取消', + }, } export default translation diff --git a/web/models/datasets.ts b/web/models/datasets.ts index 6ec8284b6e..1c9999008e 100644 --- a/web/models/datasets.ts +++ b/web/models/datasets.ts @@ -151,7 +151,7 @@ export type IndexingEstimateResponse = { total_price: number currency: string total_segments: number - preview: string[] + preview: Array<{ content: string; child_chunks: string[] }> qa_preview?: QA[] } @@ -304,7 +304,7 @@ export type DocumentListResponse = { export type DocumentReq = { original_document_id?: string indexing_technique?: string - doc_form: 'text_model' | 'qa_model' + doc_form: ChuckingMode doc_language: string process_rule: ProcessRule } @@ -346,7 +346,7 @@ export type NotionPage = { } export type ProcessRule = { - mode: string + mode: ChildChunkType | 'hierarchical' rules: Rules } @@ -623,3 +623,23 @@ export type ChildSegmentResponse = { page: number limit: number } + +export type UpdateDocumentParams = { + datasetId: string + documentId: string +} + +// Used in api url +export enum DocumentActionType { + enable = 'enable', + disable = 'disable', + archive = 'archive', + unArchive = 'un_archive', + delete = 'delete', +} + +export type UpdateDocumentBatchParams = { + datasetId: string + documentId?: string + documentIds?: string[] | string +} diff --git a/web/service/datasets.ts b/web/service/datasets.ts index 90411efd4e..c19eaf2a5b 100644 --- a/web/service/datasets.ts +++ b/web/service/datasets.ts @@ -171,34 +171,6 @@ export const resumeDocIndexing: Fetcher = ({ datas return patch(`/datasets/${datasetId}/documents/${documentId}/processing/resume`) } -export const deleteDocument: Fetcher = ({ datasetId, documentId }) => { - return del(`/datasets/${datasetId}/documents/${documentId}`) -} - -export const archiveDocument: Fetcher = ({ datasetId, documentId }) => { - return patch(`/datasets/${datasetId}/documents/${documentId}/status/archive`) -} - -export const unArchiveDocument: Fetcher = ({ datasetId, documentId }) => { - return patch(`/datasets/${datasetId}/documents/${documentId}/status/un_archive`) -} - -export const enableDocument: Fetcher = ({ datasetId, documentId }) => { - return patch(`/datasets/${datasetId}/documents/${documentId}/status/enable`) -} - -export const disableDocument: Fetcher = ({ datasetId, documentId }) => { - return patch(`/datasets/${datasetId}/documents/${documentId}/status/disable`) -} - -export const syncDocument: Fetcher = ({ datasetId, documentId }) => { - return get(`/datasets/${datasetId}/documents/${documentId}/notion/sync`) -} - -export const syncWebsite: Fetcher = ({ datasetId, documentId }) => { - return get(`/datasets/${datasetId}/documents/${documentId}/website-sync`) -} - export const preImportNotionPages: Fetcher<{ notion_info: DataSourceNotionWorkspace[] }, { url: string; datasetId?: string }> = ({ url, datasetId }) => { return get<{ notion_info: DataSourceNotionWorkspace[] }>(url, { params: { dataset_id: datasetId } }) } diff --git a/web/service/knowledge/use-create-dataset.ts b/web/service/knowledge/use-create-dataset.ts index d6aa97fbb9..410e20e7a7 100644 --- a/web/service/knowledge/use-create-dataset.ts +++ b/web/service/knowledge/use-create-dataset.ts @@ -3,7 +3,7 @@ import type { MutationOptions } from '@tanstack/react-query' import { useMutation } from '@tanstack/react-query' import { createDocument, createFirstDocument, fetchDefaultProcessRule, fetchFileIndexingEstimate } from '../datasets' import { type IndexingType } from '@/app/components/datasets/create/step-two' -import type { CrawlOptions, CrawlResultItem, CreateDocumentReq, CustomFile, DataSourceType, DocForm, FileIndexingEstimateResponse, IndexingEstimateParams, NotionInfo, ProcessRule, ProcessRuleResponse, createDocumentResponse } from '@/models/datasets' +import type { ChuckingMode, CrawlOptions, CrawlResultItem, CreateDocumentReq, CustomFile, DataSourceType, FileIndexingEstimateResponse, IndexingEstimateParams, NotionInfo, ProcessRule, ProcessRuleResponse, createDocumentResponse } from '@/models/datasets' import type { DataSourceProvider, NotionPage } from '@/models/common' export const getNotionInfo = ( @@ -50,7 +50,7 @@ export const getWebsiteInfo = ( } type GetFileIndexingEstimateParamsOptionBase = { - docForm: DocForm + docForm: ChuckingMode docLanguage: string indexingTechnique: IndexingType processRule: ProcessRule diff --git a/web/service/knowledge/use-document.ts b/web/service/knowledge/use-document.ts index 909f33ac67..5b200d899d 100644 --- a/web/service/knowledge/use-document.ts +++ b/web/service/knowledge/use-document.ts @@ -1,8 +1,11 @@ import { + useMutation, useQuery, } from '@tanstack/react-query' -import { get } from '../base' -import type { SimpleDocumentDetail } from '@/models/datasets' +import { del, get, patch } from '../base' +import type { SimpleDocumentDetail, UpdateDocumentBatchParams } from '@/models/datasets' +import { DocumentActionType } from '@/models/datasets' +import type { CommonResponse } from '@/models/common' const NAME_SPACE = 'knowledge/document' @@ -23,3 +26,56 @@ export const useDocumentList = (payload: { }), }) } + +const toBatchDocumentsIdParams = (documentIds: string[] | string) => { + const ids = Array.isArray(documentIds) ? documentIds : [documentIds] + return ids.map(id => `document_id=${id}`).join('&') +} + +export const useDocumentBatchAction = (action: DocumentActionType) => { + return useMutation({ + mutationFn: ({ datasetId, documentIds, documentId }: UpdateDocumentBatchParams) => { + return patch(`/datasets/${datasetId}/documents/status/${action}?${toBatchDocumentsIdParams(documentId || documentIds!)}`) + }, + }) +} + +export const useDocumentEnable = () => { + return useDocumentBatchAction(DocumentActionType.enable) +} + +export const useDocumentDisable = () => { + return useDocumentBatchAction(DocumentActionType.disable) +} + +export const useDocumentArchive = () => { + return useDocumentBatchAction(DocumentActionType.archive) +} + +export const useDocumentUnArchive = () => { + return useDocumentBatchAction(DocumentActionType.unArchive) +} + +export const useDocumentDelete = () => { + return useMutation({ + mutationFn: ({ datasetId, documentIds, documentId }: UpdateDocumentBatchParams) => { + return del(`/datasets/${datasetId}/documents?${toBatchDocumentsIdParams(documentId || documentIds!)}`) + }, + }) +} + +export const useSyncDocument = () => { + return useMutation({ + mutationFn: ({ datasetId, documentId }: UpdateDocumentBatchParams) => { + return get(`/datasets/${datasetId}/documents/${documentId}/notion/sync`) + }, + }) +} + +export const useSyncWebsite = () => { + return useMutation({ + mutationFn: ({ datasetId, documentId }: UpdateDocumentBatchParams) => { + return get(`/datasets/${datasetId}/documents/${documentId}/website-sync`) + }, + }) +}