mirror of https://github.com/langgenius/dify.git
feat: add chunking mode
This commit is contained in:
parent
78fff31e61
commit
1578dc50ef
|
|
@ -34,9 +34,10 @@ import { formatNumber } from '@/utils/format'
|
|||
import { archiveDocument, deleteDocument, disableDocument, enableDocument, syncDocument, syncWebsite, unArchiveDocument } from '@/service/datasets'
|
||||
import NotionIcon from '@/app/components/base/notion-icon'
|
||||
import ProgressBar from '@/app/components/base/progress-bar'
|
||||
import { DataSourceType, type DocumentDisplayStatus, type SimpleDocumentDetail } from '@/models/datasets'
|
||||
import { ChuckingMode, DataSourceType, type DocumentDisplayStatus, type SimpleDocumentDetail } from '@/models/datasets'
|
||||
import type { CommonResponse } from '@/models/common'
|
||||
import useTimestamp from '@/hooks/use-timestamp'
|
||||
import { useDatasetDetailContextWithSelector as useDatasetDetailContext } from '@/context/dataset-detail'
|
||||
|
||||
export const useIndexStatus = () => {
|
||||
const { t } = useTranslation()
|
||||
|
|
@ -389,6 +390,10 @@ const DocumentList: FC<IDocumentListProps> = ({ embeddingAvailable, documents =
|
|||
const { t } = useTranslation()
|
||||
const { formatTime } = useTimestamp()
|
||||
const router = useRouter()
|
||||
const [datasetConfig] = useDatasetDetailContext(s => [s.dataset])
|
||||
const chunkingMode = datasetConfig?.doc_form
|
||||
const isGeneralMode = chunkingMode !== ChuckingMode.parentChild
|
||||
const isQAMode = chunkingMode === ChuckingMode.qa
|
||||
const [localDocs, setLocalDocs] = useState<LocalDoc[]>(documents)
|
||||
const [enableSort, setEnableSort] = useState(false)
|
||||
|
||||
|
|
@ -431,6 +436,7 @@ const DocumentList: FC<IDocumentListProps> = ({ embeddingAvailable, documents =
|
|||
{t('datasetDocuments.list.table.header.fileName')}
|
||||
</div>
|
||||
</td>
|
||||
<td className='w-[120px]'>{t('datasetDocuments.list.table.header.chunkingMode')}</td>
|
||||
<td className='w-24'>{t('datasetDocuments.list.table.header.words')}</td>
|
||||
<td className='w-44'>{t('datasetDocuments.list.table.header.hitCount')}</td>
|
||||
<td className='w-44'>
|
||||
|
|
@ -453,7 +459,7 @@ const DocumentList: FC<IDocumentListProps> = ({ embeddingAvailable, documents =
|
|||
onClick={() => {
|
||||
router.push(`/datasets/${datasetId}/documents/${doc.id}`)
|
||||
}}>
|
||||
<td className='text-left align-middle text-gray-500 text-xs'>{doc.position}</td>
|
||||
<td className='text-left align-middle text-text-tertiary text-xs'>{doc.position}</td>
|
||||
<td>
|
||||
<div className='group flex items-center justify-between'>
|
||||
<span className={s.tdValue}>
|
||||
|
|
@ -482,11 +488,11 @@ const DocumentList: FC<IDocumentListProps> = ({ embeddingAvailable, documents =
|
|||
</Tooltip>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</td>
|
||||
<td>{isGeneralMode ? `general ${isQAMode ? '. QA' : ''}` : 'ParentChilde'}</td>
|
||||
<td>{renderCount(doc.word_count)}</td>
|
||||
<td>{renderCount(doc.hit_count)}</td>
|
||||
<td className='text-gray-500 text-[13px]'>
|
||||
<td className='text-text-secondary text-[13px]'>
|
||||
{formatTime(doc.created_at, t('datasetHitTesting.dateTimeFormat') as string)}
|
||||
</td>
|
||||
<td>
|
||||
|
|
|
|||
|
|
@ -1,8 +1,15 @@
|
|||
import { createContext, useContext } from 'use-context-selector'
|
||||
import { createContext, useContext, useContextSelector } from 'use-context-selector'
|
||||
import type { DataSet } from '@/models/datasets'
|
||||
|
||||
const DatasetDetailContext = createContext<{ indexingTechnique?: string; dataset?: DataSet; mutateDatasetRes?: () => void }>({})
|
||||
type DatasetDetailContextValue = {
|
||||
indexingTechnique?: string
|
||||
dataset?: DataSet
|
||||
mutateDatasetRes?: () => void
|
||||
}
|
||||
const DatasetDetailContext = createContext<DatasetDetailContextValue>({})
|
||||
|
||||
export const useDatasetDetailContext = () => useContext(DatasetDetailContext)
|
||||
|
||||
export const useDatasetDetailContextWithSelector = (selector: (value: DatasetDetailContextValue) => any) => {
|
||||
return useContextSelector(DatasetDetailContext, selector)
|
||||
}
|
||||
export default DatasetDetailContext
|
||||
|
|
|
|||
|
|
@ -8,7 +8,8 @@ const translation = {
|
|||
addUrl: 'Add URL',
|
||||
table: {
|
||||
header: {
|
||||
fileName: 'FILE NAME',
|
||||
fileName: 'NAME',
|
||||
chunkingMode: 'CHUNKING MODE',
|
||||
words: 'WORDS',
|
||||
hitCount: 'RETRIEVAL COUNT',
|
||||
uploadTime: 'UPLOAD TIME',
|
||||
|
|
|
|||
|
|
@ -7,7 +7,8 @@ const translation = {
|
|||
addUrl: '添加 URL',
|
||||
table: {
|
||||
header: {
|
||||
fileName: '文件名',
|
||||
fileName: '名称',
|
||||
chunkingMode: '分段模式',
|
||||
words: '字符数',
|
||||
hitCount: '召回次数',
|
||||
uploadTime: '上传时间',
|
||||
|
|
|
|||
|
|
@ -10,6 +10,12 @@ export enum DataSourceType {
|
|||
|
||||
export type DatasetPermission = 'only_me' | 'all_team_members' | 'partial_members'
|
||||
|
||||
export enum ChuckingMode {
|
||||
'text' = 'text_model', // General text
|
||||
'qa' = 'qa_model', // General QA
|
||||
'parentChild' = 'hierarchical_model', // Parent-Child
|
||||
}
|
||||
|
||||
export type DataSet = {
|
||||
id: string
|
||||
name: string
|
||||
|
|
@ -23,6 +29,7 @@ export type DataSet = {
|
|||
updated_by: string
|
||||
updated_at: number
|
||||
app_count: number
|
||||
doc_form: ChuckingMode
|
||||
document_count: number
|
||||
word_count: number
|
||||
provider: string
|
||||
|
|
@ -170,7 +177,10 @@ export type IndexingStatusBatchResponse = {
|
|||
data: IndexingStatusResponse[]
|
||||
}
|
||||
|
||||
export type ProcessMode = 'custom' | 'hierarchical'
|
||||
export enum ProcessMode {
|
||||
general = 'custom',
|
||||
parentChild = 'hierarchical',
|
||||
}
|
||||
|
||||
export type ParentMode = 'full-doc' | 'paragraph'
|
||||
|
||||
|
|
@ -269,6 +279,7 @@ export type InitialDocumentDetail = {
|
|||
export type SimpleDocumentDetail = InitialDocumentDetail & {
|
||||
enabled: boolean
|
||||
word_count: number
|
||||
is_qa: boolean // TODO waiting for backend to add this field
|
||||
error?: string | null
|
||||
archived: boolean
|
||||
updated_at: number
|
||||
|
|
|
|||
Loading…
Reference in New Issue