fix: fix chunk not display in indexed document (#33942)

This commit is contained in:
wangxiaolei 2026-03-24 10:36:48 +08:00 committed by GitHub
parent 075b8bf1ae
commit fbd558762d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 92 additions and 5 deletions

View File

@ -224,6 +224,20 @@ describe('DocumentSettings', () => {
// Data source types // Data source types
describe('Data Source Types', () => { describe('Data Source Types', () => {
it('should handle upload_file_id data source format', () => {
mockDocumentDetail = {
name: 'test-document',
data_source_type: 'upload_file',
data_source_info: {
upload_file_id: '4a807f05-45d6-4fc4-b7a8-b009a4568b36',
},
}
render(<DocumentSettings {...defaultProps} />)
expect(screen.getByTestId('files-count')).toHaveTextContent('1')
})
it('should handle legacy upload_file data source', () => { it('should handle legacy upload_file data source', () => {
mockDocumentDetail = { mockDocumentDetail = {
name: 'test-document', name: 'test-document',
@ -307,6 +321,18 @@ describe('DocumentSettings', () => {
expect(screen.getByTestId('files-count')).toHaveTextContent('0') expect(screen.getByTestId('files-count')).toHaveTextContent('0')
}) })
it('should handle empty data_source_info object', () => {
mockDocumentDetail = {
name: 'test-document',
data_source_type: 'upload_file',
data_source_info: {},
}
render(<DocumentSettings {...defaultProps} />)
expect(screen.getByTestId('files-count')).toHaveTextContent('0')
})
it('should maintain structure when rerendered', () => { it('should maintain structure when rerendered', () => {
const { rerender } = render( const { rerender } = render(
<DocumentSettings datasetId="dataset-1" documentId="doc-1" />, <DocumentSettings datasetId="dataset-1" documentId="doc-1" />,
@ -317,4 +343,37 @@ describe('DocumentSettings', () => {
expect(screen.getByTestId('step-two')).toBeInTheDocument() expect(screen.getByTestId('step-two')).toBeInTheDocument()
}) })
}) })
describe('Files Extraction Regression Tests', () => {
it('should correctly extract file ID from upload_file_id format', () => {
const fileId = '4a807f05-45d6-4fc4-b7a8-b009a4568b36'
mockDocumentDetail = {
name: 'test-document.pdf',
data_source_type: 'upload_file',
data_source_info: {
upload_file_id: fileId,
},
}
render(<DocumentSettings {...defaultProps} />)
// Verify files array is populated with correct file ID
expect(screen.getByTestId('files-count')).toHaveTextContent('1')
})
it('should preserve document name when using upload_file_id format', () => {
const documentName = 'my-uploaded-document.txt'
mockDocumentDetail = {
name: documentName,
data_source_type: 'upload_file',
data_source_info: {
upload_file_id: 'some-file-id',
},
}
render(<DocumentSettings {...defaultProps} />)
expect(screen.getByTestId('files-count')).toHaveTextContent('1')
})
})
}) })

View File

@ -8,6 +8,7 @@ import type {
LegacyDataSourceInfo, LegacyDataSourceInfo,
LocalFileInfo, LocalFileInfo,
OnlineDocumentInfo, OnlineDocumentInfo,
UploadFileIdInfo,
WebsiteCrawlInfo, WebsiteCrawlInfo,
} from '@/models/datasets' } from '@/models/datasets'
import { useBoolean } from 'ahooks' import { useBoolean } from 'ahooks'
@ -61,6 +62,7 @@ const DocumentSettings = ({ datasetId, documentId }: DocumentSettingsProps) => {
const dataSourceInfo = documentDetail?.data_source_info const dataSourceInfo = documentDetail?.data_source_info
// Type guards for DataSourceInfo union
const isLegacyDataSourceInfo = (info: DataSourceInfo | undefined): info is LegacyDataSourceInfo => { const isLegacyDataSourceInfo = (info: DataSourceInfo | undefined): info is LegacyDataSourceInfo => {
return !!info && 'upload_file' in info return !!info && 'upload_file' in info
} }
@ -73,10 +75,15 @@ const DocumentSettings = ({ datasetId, documentId }: DocumentSettingsProps) => {
const isLocalFileInfo = (info: DataSourceInfo | undefined): info is LocalFileInfo => { const isLocalFileInfo = (info: DataSourceInfo | undefined): info is LocalFileInfo => {
return !!info && 'related_id' in info && 'transfer_method' in info return !!info && 'related_id' in info && 'transfer_method' in info
} }
const isUploadFileIdInfo = (info: DataSourceInfo | undefined): info is UploadFileIdInfo => {
return !!info && 'upload_file_id' in info
}
const legacyInfo = isLegacyDataSourceInfo(dataSourceInfo) ? dataSourceInfo : undefined const legacyInfo = isLegacyDataSourceInfo(dataSourceInfo) ? dataSourceInfo : undefined
const websiteInfo = isWebsiteCrawlInfo(dataSourceInfo) ? dataSourceInfo : undefined const websiteInfo = isWebsiteCrawlInfo(dataSourceInfo) ? dataSourceInfo : undefined
const onlineDocumentInfo = isOnlineDocumentInfo(dataSourceInfo) ? dataSourceInfo : undefined const onlineDocumentInfo = isOnlineDocumentInfo(dataSourceInfo) ? dataSourceInfo : undefined
const localFileInfo = isLocalFileInfo(dataSourceInfo) ? dataSourceInfo : undefined const localFileInfo = isLocalFileInfo(dataSourceInfo) ? dataSourceInfo : undefined
const uploadFileIdInfo = isUploadFileIdInfo(dataSourceInfo) ? dataSourceInfo : undefined
const currentPage = useMemo(() => { const currentPage = useMemo(() => {
if (legacyInfo) { if (legacyInfo) {
@ -101,8 +108,20 @@ const DocumentSettings = ({ datasetId, documentId }: DocumentSettingsProps) => {
}, [documentDetail?.data_source_type, documentDetail?.name, legacyInfo, onlineDocumentInfo]) }, [documentDetail?.data_source_type, documentDetail?.name, legacyInfo, onlineDocumentInfo])
const files = useMemo<CustomFile[]>(() => { const files = useMemo<CustomFile[]>(() => {
if (legacyInfo?.upload_file) // Handle upload_file_id format
return [legacyInfo.upload_file as CustomFile] if (uploadFileIdInfo) {
return [{
id: uploadFileIdInfo.upload_file_id,
name: documentDetail?.name || '',
} as unknown as CustomFile]
}
// Handle legacy upload_file format
if (legacyInfo?.upload_file) {
return [legacyInfo.upload_file as unknown as CustomFile]
}
// Handle local file info format
if (localFileInfo) { if (localFileInfo) {
const { related_id, name, extension } = localFileInfo const { related_id, name, extension } = localFileInfo
return [{ return [{
@ -111,8 +130,9 @@ const DocumentSettings = ({ datasetId, documentId }: DocumentSettingsProps) => {
extension, extension,
} as unknown as CustomFile] } as unknown as CustomFile]
} }
return [] return []
}, [legacyInfo?.upload_file, localFileInfo]) }, [uploadFileIdInfo, legacyInfo?.upload_file, localFileInfo, documentDetail?.name])
const websitePages = useMemo(() => { const websitePages = useMemo(() => {
if (!websiteInfo) if (!websiteInfo)

View File

@ -381,7 +381,11 @@ export type OnlineDriveInfo = {
type: 'file' | 'folder' type: 'file' | 'folder'
} }
export type DataSourceInfo = LegacyDataSourceInfo | LocalFileInfo | OnlineDocumentInfo | WebsiteCrawlInfo export type UploadFileIdInfo = {
upload_file_id: string
}
export type DataSourceInfo = LegacyDataSourceInfo | LocalFileInfo | OnlineDocumentInfo | WebsiteCrawlInfo | UploadFileIdInfo
export type InitialDocumentDetail = { export type InitialDocumentDetail = {
id: string id: string

View File

@ -91,11 +91,15 @@ const getFileIndexingEstimateParamsForFile = ({
processRule, processRule,
dataset_id, dataset_id,
}: GetFileIndexingEstimateParamsOptionFile): IndexingEstimateParams => { }: GetFileIndexingEstimateParamsOptionFile): IndexingEstimateParams => {
const fileIds = files
.map(file => file.id)
.filter((id): id is string => Boolean(id))
return { return {
info_list: { info_list: {
data_source_type: dataSourceType, data_source_type: dataSourceType,
file_info_list: { file_info_list: {
file_ids: files.map(file => file.id) as string[], file_ids: fileIds,
}, },
}, },
indexing_technique: indexingTechnique, indexing_technique: indexingTechnique,