feat: add chunking mode

This commit is contained in:
Joel 2024-12-05 11:40:46 +08:00
parent 78fff31e61
commit 1578dc50ef
5 changed files with 36 additions and 10 deletions

View File

@ -34,9 +34,10 @@ import { formatNumber } from '@/utils/format'
import { archiveDocument, deleteDocument, disableDocument, enableDocument, syncDocument, syncWebsite, unArchiveDocument } from '@/service/datasets'
import NotionIcon from '@/app/components/base/notion-icon'
import ProgressBar from '@/app/components/base/progress-bar'
import { DataSourceType, type DocumentDisplayStatus, type SimpleDocumentDetail } from '@/models/datasets'
import { ChuckingMode, DataSourceType, type DocumentDisplayStatus, type SimpleDocumentDetail } from '@/models/datasets'
import type { CommonResponse } from '@/models/common'
import useTimestamp from '@/hooks/use-timestamp'
import { useDatasetDetailContextWithSelector as useDatasetDetailContext } from '@/context/dataset-detail'
export const useIndexStatus = () => {
const { t } = useTranslation()
@ -389,6 +390,10 @@ const DocumentList: FC<IDocumentListProps> = ({ embeddingAvailable, documents =
const { t } = useTranslation()
const { formatTime } = useTimestamp()
const router = useRouter()
const [datasetConfig] = useDatasetDetailContext(s => [s.dataset])
const chunkingMode = datasetConfig?.doc_form
const isGeneralMode = chunkingMode !== ChuckingMode.parentChild
const isQAMode = chunkingMode === ChuckingMode.qa
const [localDocs, setLocalDocs] = useState<LocalDoc[]>(documents)
const [enableSort, setEnableSort] = useState(false)
@ -431,6 +436,7 @@ const DocumentList: FC<IDocumentListProps> = ({ embeddingAvailable, documents =
{t('datasetDocuments.list.table.header.fileName')}
</div>
</td>
<td className='w-[120px]'>{t('datasetDocuments.list.table.header.chunkingMode')}</td>
<td className='w-24'>{t('datasetDocuments.list.table.header.words')}</td>
<td className='w-44'>{t('datasetDocuments.list.table.header.hitCount')}</td>
<td className='w-44'>
@ -453,7 +459,7 @@ const DocumentList: FC<IDocumentListProps> = ({ embeddingAvailable, documents =
onClick={() => {
router.push(`/datasets/${datasetId}/documents/${doc.id}`)
}}>
<td className='text-left align-middle text-gray-500 text-xs'>{doc.position}</td>
<td className='text-left align-middle text-text-tertiary text-xs'>{doc.position}</td>
<td>
<div className='group flex items-center justify-between'>
<span className={s.tdValue}>
@ -482,11 +488,11 @@ const DocumentList: FC<IDocumentListProps> = ({ embeddingAvailable, documents =
</Tooltip>
</div>
</div>
</td>
<td>{isGeneralMode ? `general ${isQAMode ? '. QA' : ''}` : 'ParentChilde'}</td>
<td>{renderCount(doc.word_count)}</td>
<td>{renderCount(doc.hit_count)}</td>
<td className='text-gray-500 text-[13px]'>
<td className='text-text-secondary text-[13px]'>
{formatTime(doc.created_at, t('datasetHitTesting.dateTimeFormat') as string)}
</td>
<td>

View File

@ -1,8 +1,15 @@
import { createContext, useContext } from 'use-context-selector'
import { createContext, useContext, useContextSelector } from 'use-context-selector'
import type { DataSet } from '@/models/datasets'
const DatasetDetailContext = createContext<{ indexingTechnique?: string; dataset?: DataSet; mutateDatasetRes?: () => void }>({})
type DatasetDetailContextValue = {
indexingTechnique?: string
dataset?: DataSet
mutateDatasetRes?: () => void
}
const DatasetDetailContext = createContext<DatasetDetailContextValue>({})
export const useDatasetDetailContext = () => useContext(DatasetDetailContext)
export const useDatasetDetailContextWithSelector = (selector: (value: DatasetDetailContextValue) => any) => {
return useContextSelector(DatasetDetailContext, selector)
}
export default DatasetDetailContext

View File

@ -8,7 +8,8 @@ const translation = {
addUrl: 'Add URL',
table: {
header: {
fileName: 'FILE NAME',
fileName: 'NAME',
chunkingMode: 'CHUNKING MODE',
words: 'WORDS',
hitCount: 'RETRIEVAL COUNT',
uploadTime: 'UPLOAD TIME',

View File

@ -7,7 +7,8 @@ const translation = {
addUrl: '添加 URL',
table: {
header: {
fileName: '文件名',
fileName: '名称',
chunkingMode: '分段模式',
words: '字符数',
hitCount: '召回次数',
uploadTime: '上传时间',

View File

@ -10,6 +10,12 @@ export enum DataSourceType {
export type DatasetPermission = 'only_me' | 'all_team_members' | 'partial_members'
export enum ChuckingMode {
'text' = 'text_model', // General text
'qa' = 'qa_model', // General QA
'parentChild' = 'hierarchical_model', // Parent-Child
}
export type DataSet = {
id: string
name: string
@ -23,6 +29,7 @@ export type DataSet = {
updated_by: string
updated_at: number
app_count: number
doc_form: ChuckingMode
document_count: number
word_count: number
provider: string
@ -170,7 +177,10 @@ export type IndexingStatusBatchResponse = {
data: IndexingStatusResponse[]
}
export type ProcessMode = 'custom' | 'hierarchical'
export enum ProcessMode {
general = 'custom',
parentChild = 'hierarchical',
}
export type ParentMode = 'full-doc' | 'paragraph'
@ -269,6 +279,7 @@ export type InitialDocumentDetail = {
export type SimpleDocumentDetail = InitialDocumentDetail & {
enabled: boolean
word_count: number
is_qa: boolean // TODO waiting for backend to add this field
error?: string | null
archived: boolean
updated_at: number