diff --git a/web/app/components/base/badge.tsx b/web/app/components/base/badge.tsx index 722fde3237..787b005a8f 100644 --- a/web/app/components/base/badge.tsx +++ b/web/app/components/base/badge.tsx @@ -3,13 +3,15 @@ import cn from '@/utils/classnames' type BadgeProps = { className?: string - text: string + text?: string + children?: React.ReactNode uppercase?: boolean } const Badge = ({ className, text, + children, uppercase = true, }: BadgeProps) => { return ( @@ -20,7 +22,7 @@ const Badge = ({ className, )} > - {text} + {children || text} ) } diff --git a/web/app/components/base/pagination/index.tsx b/web/app/components/base/pagination/index.tsx index b64c712425..c0cc9f86ec 100644 --- a/web/app/components/base/pagination/index.tsx +++ b/web/app/components/base/pagination/index.tsx @@ -8,7 +8,7 @@ import Button from '@/app/components/base/button' import Input from '@/app/components/base/input' import cn from '@/utils/classnames' -type Props = { +export type Props = { className?: string current: number onChange: (cur: number) => void diff --git a/web/app/components/datasets/common/chunking-mode-label.tsx b/web/app/components/datasets/common/chunking-mode-label.tsx new file mode 100644 index 0000000000..7c6e924009 --- /dev/null +++ b/web/app/components/datasets/common/chunking-mode-label.tsx @@ -0,0 +1,29 @@ +'use client' +import type { FC } from 'react' +import React from 'react' +import { useTranslation } from 'react-i18next' +import Badge from '@/app/components/base/badge' +import { GeneralType, ParentChildType } from '@/app/components/base/icons/src/public/knowledge' + +type Props = { + isGeneralMode: boolean + isQAMode: boolean +} + +const ChunkingModeLabel: FC = ({ + isGeneralMode, + isQAMode, +}) => { + const { t } = useTranslation() + const TypeIcon = isGeneralMode ? GeneralType : ParentChildType + + return ( + +
+ + {isGeneralMode ? `${t('dataset.chunkingMode.general')}${isQAMode ? ' · QA' : ''}` : t('dataset.chunkingMode.parentChild')} +
+
+ ) +} +export default React.memo(ChunkingModeLabel) diff --git a/web/app/components/datasets/common/document-picker/index.tsx b/web/app/components/datasets/common/document-picker/index.tsx index a78a004f3b..0f7f0aa69a 100644 --- a/web/app/components/datasets/common/document-picker/index.tsx +++ b/web/app/components/datasets/common/document-picker/index.tsx @@ -3,8 +3,10 @@ import type { FC } from 'react' import React, { useState } from 'react' import { useBoolean } from 'ahooks' import { RiArrowDownSLine, RiArrowUpSLine } from '@remixicon/react' +import { useTranslation } from 'react-i18next' import FileIcon from '../document-file-icon' -import type { ParentMode, ProcessMode, SimpleDocumentDetail } from '@/models/datasets' +import type { ParentMode, SimpleDocumentDetail } from '@/models/datasets' +import { ProcessMode } from '@/models/datasets' import { PortalToFollowElem, PortalToFollowElemContent, @@ -32,6 +34,7 @@ const DocumentPicker: FC = ({ value, onChange, }) => { + const { t } = useTranslation() const { name, extension, @@ -49,7 +52,7 @@ const DocumentPicker: FC = ({ }, }) const documentsList = data?.data - const isParentChild = processMode === 'hierarchical' + const isParentChild = processMode === ProcessMode.parentChild const TypeIcon = isParentChild ? ParentChildType : GeneralType const [open, { @@ -75,7 +78,7 @@ const DocumentPicker: FC = ({
- {isParentChild ? 'Parent-Child' : 'General'} + {isParentChild ? t('dataset.chunkingMode.parentChild') : t('dataset.chunkingMode.general')} {isParentChild && ` · ${parentMode || '--'}`}
diff --git a/web/app/components/datasets/create/step-two/index.tsx b/web/app/components/datasets/create/step-two/index.tsx index b525fac1e2..9ecd885c51 100644 --- a/web/app/components/datasets/create/step-two/index.tsx +++ b/web/app/components/datasets/create/step-two/index.tsx @@ -4,8 +4,8 @@ import React, { useCallback, useEffect, useState } from 'react' import { useTranslation } from 'react-i18next' import { useContext } from 'use-context-selector' import { + RiAlertFill, RiArrowLeftLine, - RiCloseLine, RiSearchEyeLine, } from '@remixicon/react' import Link from 'next/link' @@ -50,12 +50,9 @@ import type { DefaultModel } from '@/app/components/header/account-setting/model import { ModelTypeEnum } from '@/app/components/header/account-setting/model-provider-page/declarations' import Checkbox from '@/app/components/base/checkbox' import RadioCard from '@/app/components/base/radio-card' -import { MessageChatSquare } from '@/app/components/base/icons/src/public/common' import { IS_CE_EDITION } from '@/config' -import Switch from '@/app/components/base/switch' import Divider from '@/app/components/base/divider' import { getNotionInfo, getWebsiteInfo, useCreateDocument, useCreateFirstDocument, useFetchDefaultProcessRule, useFetchFileIndexingEstimateForFile, useFetchFileIndexingEstimateForNotion, useFetchFileIndexingEstimateForWeb } from '@/service/knowledge/use-create-dataset' -import Loading from '@/app/components/base/loading' import Badge from '@/app/components/base/badge' import { SkeletonContanier, SkeletonPoint, SkeletonRectangle, SkeletonRow } from '@/app/components/base/skeleton' import Tooltip from '@/app/components/base/tooltip' @@ -178,7 +175,6 @@ const StepTwo = ({ const [docLanguage, setDocLanguage] = useState( (datasetId && documentDetail) ? documentDetail.doc_language : (locale !== LanguagesSupported[1] ? 'English' : 'Chinese'), ) - const [QATipHide, setQATipHide] = useState(false) const [parentChildConfig, setParentChildConfig] = useState(defaultParentChildConfig) @@ -608,31 +604,46 @@ const StepTwo = ({ - {IS_CE_EDITION &&
- { - if (docForm === DocForm.QA) - setDocForm(DocForm.TEXT) - else - setDocForm(DocForm.QA) - }} - className='mr-2' - /> -
- - Chunk using Q&A format in - -
- + {IS_CE_EDITION && <> +
+ { + if (docForm === DocForm.QA) + setDocForm(DocForm.TEXT) + else + setDocForm(DocForm.QA) + }} + className='mr-2' + /> +
+ + {t('datasetCreation.stepTwo.QALanguage')} + +
+ +
+
-
-
} + {docForm === DocForm.QA && ( +
+ + + {t('datasetCreation.stepTwo.QATip')} + +
+ )} + }
{t('datasetCreation.stepTwo.datasetSettingLink')}
)} - {IS_CE_EDITION && indexType === IndexingType.QUALIFIED && ( -
-
-
- -
-
-
{t('datasetCreation.stepTwo.QATitle')}
-
- {t('datasetCreation.stepTwo.QALanguage')} - -
-
- -
- {docForm === DocForm.QA && !QATipHide && ( -
- {t('datasetCreation.stepTwo.QATip')} - setQATipHide(true)} /> -
- )} -
- )} {/* Embedding model */} {indexType === IndexingType.QUALIFIED && (
@@ -958,11 +942,6 @@ const StepTwo = ({ )) )} - {docForm === DocForm.QA && !estimate?.qa_preview && ( -
- -
- )} {currentEstimateMutation.isIdle && (
diff --git a/web/app/components/datasets/create/step-two/option-card.tsx b/web/app/components/datasets/create/step-two/option-card.tsx index da0486d204..40be777b1c 100644 --- a/web/app/components/datasets/create/step-two/option-card.tsx +++ b/web/app/components/datasets/create/step-two/option-card.tsx @@ -22,7 +22,7 @@ type OptionCardHeaderProps = { export const OptionCardHeader: FC = (props) => { const { icon, title, description, isActive, activeClassName, effectImg } = props return
@@ -56,7 +56,7 @@ export const OptionCard: FC = (props) => { const { icon, className, title, description, isActive, children, actions, activeHeaderClassName, style, effectImg, ...rest } = props return
) => { return @@ -75,12 +72,14 @@ type IDocumentsProps = { } export const fetcher = (url: string) => get(url, {}, {}) +const DEFAULT_LIMIT = 15 const Documents: FC = ({ datasetId }) => { const { t } = useTranslation() const [inputValue, setInputValue] = useState('') // the input value const [searchValue, setSearchValue] = useState('') const [currPage, setCurrPage] = React.useState(0) + const [limit, setLimit] = useState(DEFAULT_LIMIT) const router = useRouter() const { dataset } = useDatasetDetailContext() const [notionPageSelectorModalVisible, setNotionPageSelectorModalVisible] = useState(false) @@ -94,7 +93,7 @@ const Documents: FC = ({ datasetId }) => { const query = useMemo(() => { return { page: currPage + 1, limit, keyword: debouncedSearchValue, fetch: isDataSourceNotion ? true : '' } - }, [currPage, debouncedSearchValue, isDataSourceNotion]) + }, [currPage, debouncedSearchValue, isDataSourceNotion, limit]) const { data: documentsRes, error, mutate } = useSWR( { @@ -196,7 +195,7 @@ const Documents: FC = ({ datasetId }) => { } const documentsList = isDataSourceNotion ? documentsWithProgress?.data : documentsRes?.data - + const [selectedIds, setSelectedIds] = useState([]) const { run: handleSearch } = useDebounceFn(() => { setSearchValue(inputValue) }, { wait: 500 }) @@ -246,13 +245,22 @@ const Documents: FC = ({ datasetId }) => { {isLoading ? : total > 0 - ? + ? : } - {/* Show Pagination only if the total is more than the limit */} - {(total && total > limit) - ? - : null} setNotionPageSelectorModalVisible(false)} diff --git a/web/app/components/datasets/documents/list.tsx b/web/app/components/datasets/documents/list.tsx index 4321a610cb..cd7162603b 100644 --- a/web/app/components/datasets/documents/list.tsx +++ b/web/app/components/datasets/documents/list.tsx @@ -1,9 +1,9 @@ 'use client' import type { FC } from 'react' -import React, { useCallback, useEffect, useState } from 'react' +import React, { useCallback, useEffect, useMemo, useState } from 'react' import { useBoolean, useDebounceFn } from 'ahooks' import { ArrowDownIcon } from '@heroicons/react/24/outline' -import { pick } from 'lodash-es' +import { pick, uniq } from 'lodash-es' import { RiArchive2Line, RiDeleteBinLine, @@ -18,6 +18,7 @@ import { useTranslation } from 'react-i18next' import dayjs from 'dayjs' import { Edit03 } from '../../base/icons/src/vender/solid/general' import { Globe01 } from '../../base/icons/src/vender/line/mapsAndTravel' +import ChunkingModeLabel from '../common/chunking-mode-label' import s from './style.module.css' import RenameModal from './rename-modal' import cn from '@/utils/classnames' @@ -34,9 +35,13 @@ import { formatNumber } from '@/utils/format' import { archiveDocument, deleteDocument, disableDocument, enableDocument, syncDocument, syncWebsite, unArchiveDocument } from '@/service/datasets' import NotionIcon from '@/app/components/base/notion-icon' import ProgressBar from '@/app/components/base/progress-bar' -import { DataSourceType, type DocumentDisplayStatus, type SimpleDocumentDetail } from '@/models/datasets' +import { ChuckingMode, DataSourceType, type DocumentDisplayStatus, type SimpleDocumentDetail } from '@/models/datasets' import type { CommonResponse } from '@/models/common' import useTimestamp from '@/hooks/use-timestamp' +import { useDatasetDetailContextWithSelector as useDatasetDetailContext } from '@/context/dataset-detail' +import type { Props as PaginationProps } from '@/app/components/base/pagination' +import Pagination from '@/app/components/base/pagination' +import Checkbox from '@/app/components/base/checkbox' export const useIndexStatus = () => { const { t } = useTranslation() @@ -378,17 +383,32 @@ type LocalDoc = SimpleDocumentDetail & { percent?: number } type IDocumentListProps = { embeddingAvailable: boolean documents: LocalDoc[] + selectedIds: string[] + onSelectedIdChange: (selectedIds: string[]) => void datasetId: string + pagination: PaginationProps onUpdate: () => void } /** * Document list component including basic information */ -const DocumentList: FC = ({ embeddingAvailable, documents = [], datasetId, onUpdate }) => { +const DocumentList: FC = ({ + embeddingAvailable, + documents = [], + selectedIds, + onSelectedIdChange, + datasetId, + pagination, + onUpdate, +}) => { const { t } = useTranslation() const { formatTime } = useTimestamp() const router = useRouter() + const [datasetConfig] = useDatasetDetailContext(s => [s.dataset]) + const chunkingMode = datasetConfig?.doc_form + const isGeneralMode = chunkingMode !== ChuckingMode.parentChild + const isQAMode = chunkingMode === ChuckingMode.qa const [localDocs, setLocalDocs] = useState(documents) const [enableSort, setEnableSort] = useState(false) @@ -420,17 +440,43 @@ const DocumentList: FC = ({ embeddingAvailable, documents = onUpdate() }, [onUpdate]) + const isAllSelected = useMemo(() => { + return localDocs.length > 0 && localDocs.every(doc => selectedIds.includes(doc.id)) + }, [localDocs, selectedIds]) + + const isSomeSelected = useMemo(() => { + return localDocs.some(doc => selectedIds.includes(doc.id)) + }, [localDocs, selectedIds]) + + const onSelectedAll = useCallback(() => { + if (isAllSelected) + onSelectedIdChange([]) + else + onSelectedIdChange(uniq([...selectedIds, ...localDocs.map(doc => doc.id)])) + }, [isAllSelected, localDocs, onSelectedIdChange, selectedIds]) + return ( -
+
- + + - {localDocs.map((doc) => { + {localDocs.map((doc, index) => { const isFile = doc.data_source_type === DataSourceType.FILE const fileType = isFile ? doc.data_source_detail_dict?.upload_file?.extension : '' return = ({ embeddingAvailable, documents = onClick={() => { router.push(`/datasets/${datasetId}/documents/${doc.id}`) }}> - + + -
# +
e.stopPropagation()}> + + # +
+
{t('datasetDocuments.list.table.header.fileName')}
{t('datasetDocuments.list.table.header.chunkingMode')} {t('datasetDocuments.list.table.header.words')} {t('datasetDocuments.list.table.header.hitCount')} @@ -444,7 +490,7 @@ const DocumentList: FC = ({ embeddingAvailable, documents =
{doc.position} +
e.stopPropagation()}> + + { + onSelectedIdChange( + selectedIds.includes(doc.id) + ? selectedIds.filter(id => id !== doc.id) + : [...selectedIds, doc.id], + ) + }} + /> + {/* {doc.position} */} + {index + 1} +
+
@@ -482,11 +545,16 @@ const DocumentList: FC = ({ embeddingAvailable, documents =
- +
+ {renderCount(doc.word_count)} {renderCount(doc.hit_count)} + {formatTime(doc.created_at, t('datasetHitTesting.dateTimeFormat') as string)} @@ -508,6 +576,13 @@ const DocumentList: FC = ({ embeddingAvailable, documents = })}
+ {/* Show Pagination only if the total is more than the limit */} + {pagination.total && pagination.total > (pagination.limit || 10) && ( + + )} {isShowRenameModal && currDocument && ( void }>({}) +type DatasetDetailContextValue = { + indexingTechnique?: string + dataset?: DataSet + mutateDatasetRes?: () => void +} +const DatasetDetailContext = createContext({}) export const useDatasetDetailContext = () => useContext(DatasetDetailContext) +export const useDatasetDetailContextWithSelector = (selector: (value: DatasetDetailContextValue) => any) => { + return useContextSelector(DatasetDetailContext, selector) +} export default DatasetDetailContext diff --git a/web/i18n/en-US/dataset-documents.ts b/web/i18n/en-US/dataset-documents.ts index ce58883141..266c7fb2e7 100644 --- a/web/i18n/en-US/dataset-documents.ts +++ b/web/i18n/en-US/dataset-documents.ts @@ -8,7 +8,8 @@ const translation = { addUrl: 'Add URL', table: { header: { - fileName: 'FILE NAME', + fileName: 'NAME', + chunkingMode: 'CHUNKING MODE', words: 'WORDS', hitCount: 'RETRIEVAL COUNT', uploadTime: 'UPLOAD TIME', diff --git a/web/i18n/en-US/dataset.ts b/web/i18n/en-US/dataset.ts index e89ea47c26..59f105adca 100644 --- a/web/i18n/en-US/dataset.ts +++ b/web/i18n/en-US/dataset.ts @@ -1,5 +1,9 @@ const translation = { knowledge: 'Knowledge', + chunkingMode: { + general: 'General', + parentChild: 'Parent-child', + }, externalTag: 'External', externalAPI: 'External API', externalAPIPanelTitle: 'External Knowledge API', diff --git a/web/i18n/zh-Hans/dataset-documents.ts b/web/i18n/zh-Hans/dataset-documents.ts index d49bd6587e..35288c04d6 100644 --- a/web/i18n/zh-Hans/dataset-documents.ts +++ b/web/i18n/zh-Hans/dataset-documents.ts @@ -7,7 +7,8 @@ const translation = { addUrl: '添加 URL', table: { header: { - fileName: '文件名', + fileName: '名称', + chunkingMode: '分段模式', words: '字符数', hitCount: '召回次数', uploadTime: '上传时间', diff --git a/web/i18n/zh-Hans/dataset.ts b/web/i18n/zh-Hans/dataset.ts index d057af0c16..801a974cbc 100644 --- a/web/i18n/zh-Hans/dataset.ts +++ b/web/i18n/zh-Hans/dataset.ts @@ -1,5 +1,9 @@ const translation = { knowledge: '知识库', + chunkingMode: { + general: '通用', + parentChild: '父子', + }, externalTag: '外部', externalAPI: '外部 API', externalAPIPanelTitle: '外部知识库 API', diff --git a/web/models/datasets.ts b/web/models/datasets.ts index 9c3460fea2..900783e78f 100644 --- a/web/models/datasets.ts +++ b/web/models/datasets.ts @@ -10,6 +10,12 @@ export enum DataSourceType { export type DatasetPermission = 'only_me' | 'all_team_members' | 'partial_members' +export enum ChuckingMode { + 'text' = 'text_model', // General text + 'qa' = 'qa_model', // General QA + 'parentChild' = 'hierarchical_model', // Parent-Child +} + export type DataSet = { id: string name: string @@ -23,6 +29,7 @@ export type DataSet = { updated_by: string updated_at: number app_count: number + doc_form: ChuckingMode document_count: number word_count: number provider: string @@ -170,7 +177,10 @@ export type IndexingStatusBatchResponse = { data: IndexingStatusResponse[] } -export type ProcessMode = 'custom' | 'hierarchical' +export enum ProcessMode { + general = 'custom', + parentChild = 'hierarchical', +} export type ParentMode = 'full-doc' | 'paragraph' @@ -269,6 +279,7 @@ export type InitialDocumentDetail = { export type SimpleDocumentDetail = InitialDocumentDetail & { enabled: boolean word_count: number + is_qa: boolean // TODO waiting for backend to add this field error?: string | null archived: boolean updated_at: number