diff --git a/web/app/components/datasets/documents/list.tsx b/web/app/components/datasets/documents/list.tsx index 4321a610cb..83c260c4eb 100644 --- a/web/app/components/datasets/documents/list.tsx +++ b/web/app/components/datasets/documents/list.tsx @@ -34,9 +34,10 @@ import { formatNumber } from '@/utils/format' import { archiveDocument, deleteDocument, disableDocument, enableDocument, syncDocument, syncWebsite, unArchiveDocument } from '@/service/datasets' import NotionIcon from '@/app/components/base/notion-icon' import ProgressBar from '@/app/components/base/progress-bar' -import { DataSourceType, type DocumentDisplayStatus, type SimpleDocumentDetail } from '@/models/datasets' +import { ChuckingMode, DataSourceType, type DocumentDisplayStatus, type SimpleDocumentDetail } from '@/models/datasets' import type { CommonResponse } from '@/models/common' import useTimestamp from '@/hooks/use-timestamp' +import { useDatasetDetailContextWithSelector as useDatasetDetailContext } from '@/context/dataset-detail' export const useIndexStatus = () => { const { t } = useTranslation() @@ -389,6 +390,10 @@ const DocumentList: FC = ({ embeddingAvailable, documents = const { t } = useTranslation() const { formatTime } = useTimestamp() const router = useRouter() + const [datasetConfig] = useDatasetDetailContext(s => [s.dataset]) + const chunkingMode = datasetConfig?.doc_form + const isGeneralMode = chunkingMode !== ChuckingMode.parentChild + const isQAMode = chunkingMode === ChuckingMode.qa const [localDocs, setLocalDocs] = useState(documents) const [enableSort, setEnableSort] = useState(false) @@ -431,6 +436,7 @@ const DocumentList: FC = ({ embeddingAvailable, documents = {t('datasetDocuments.list.table.header.fileName')} + {t('datasetDocuments.list.table.header.chunkingMode')} {t('datasetDocuments.list.table.header.words')} {t('datasetDocuments.list.table.header.hitCount')} @@ -453,7 +459,7 @@ const DocumentList: FC = ({ embeddingAvailable, documents = onClick={() => { router.push(`/datasets/${datasetId}/documents/${doc.id}`) }}> - {doc.position} + {doc.position}
@@ -482,11 +488,11 @@ const DocumentList: FC = ({ embeddingAvailable, documents =
- + {isGeneralMode ? `general ${isQAMode ? '. QA' : ''}` : 'ParentChilde'} {renderCount(doc.word_count)} {renderCount(doc.hit_count)} - + {formatTime(doc.created_at, t('datasetHitTesting.dateTimeFormat') as string)} diff --git a/web/context/dataset-detail.ts b/web/context/dataset-detail.ts index de046ce7a0..b880c97dc1 100644 --- a/web/context/dataset-detail.ts +++ b/web/context/dataset-detail.ts @@ -1,8 +1,15 @@ -import { createContext, useContext } from 'use-context-selector' +import { createContext, useContext, useContextSelector } from 'use-context-selector' import type { DataSet } from '@/models/datasets' - -const DatasetDetailContext = createContext<{ indexingTechnique?: string; dataset?: DataSet; mutateDatasetRes?: () => void }>({}) +type DatasetDetailContextValue = { + indexingTechnique?: string + dataset?: DataSet + mutateDatasetRes?: () => void +} +const DatasetDetailContext = createContext({}) export const useDatasetDetailContext = () => useContext(DatasetDetailContext) +export const useDatasetDetailContextWithSelector = (selector: (value: DatasetDetailContextValue) => any) => { + return useContextSelector(DatasetDetailContext, selector) +} export default DatasetDetailContext diff --git a/web/i18n/en-US/dataset-documents.ts b/web/i18n/en-US/dataset-documents.ts index ce58883141..266c7fb2e7 100644 --- a/web/i18n/en-US/dataset-documents.ts +++ b/web/i18n/en-US/dataset-documents.ts @@ -8,7 +8,8 @@ const translation = { addUrl: 'Add URL', table: { header: { - fileName: 'FILE NAME', + fileName: 'NAME', + chunkingMode: 'CHUNKING MODE', words: 'WORDS', hitCount: 'RETRIEVAL COUNT', uploadTime: 'UPLOAD TIME', diff --git a/web/i18n/zh-Hans/dataset-documents.ts b/web/i18n/zh-Hans/dataset-documents.ts index d49bd6587e..35288c04d6 100644 --- a/web/i18n/zh-Hans/dataset-documents.ts +++ b/web/i18n/zh-Hans/dataset-documents.ts @@ -7,7 +7,8 @@ const translation = { addUrl: '添加 URL', table: { header: { - fileName: '文件名', + fileName: '名称', + chunkingMode: '分段模式', words: '字符数', hitCount: '召回次数', uploadTime: '上传时间', diff --git a/web/models/datasets.ts b/web/models/datasets.ts index 9c3460fea2..900783e78f 100644 --- a/web/models/datasets.ts +++ b/web/models/datasets.ts @@ -10,6 +10,12 @@ export enum DataSourceType { export type DatasetPermission = 'only_me' | 'all_team_members' | 'partial_members' +export enum ChuckingMode { + 'text' = 'text_model', // General text + 'qa' = 'qa_model', // General QA + 'parentChild' = 'hierarchical_model', // Parent-Child +} + export type DataSet = { id: string name: string @@ -23,6 +29,7 @@ export type DataSet = { updated_by: string updated_at: number app_count: number + doc_form: ChuckingMode document_count: number word_count: number provider: string @@ -170,7 +177,10 @@ export type IndexingStatusBatchResponse = { data: IndexingStatusResponse[] } -export type ProcessMode = 'custom' | 'hierarchical' +export enum ProcessMode { + general = 'custom', + parentChild = 'hierarchical', +} export type ParentMode = 'full-doc' | 'paragraph' @@ -269,6 +279,7 @@ export type InitialDocumentDetail = { export type SimpleDocumentDetail = InitialDocumentDetail & { enabled: boolean word_count: number + is_qa: boolean // TODO waiting for backend to add this field error?: string | null archived: boolean updated_at: number