mirror of
https://github.com/langgenius/dify.git
synced 2026-04-27 11:06:46 +08:00
fix: fix chunk not display in indexed document (#33942)
This commit is contained in:
parent
075b8bf1ae
commit
fbd558762d
@ -224,6 +224,20 @@ describe('DocumentSettings', () => {
|
|||||||
|
|
||||||
// Data source types
|
// Data source types
|
||||||
describe('Data Source Types', () => {
|
describe('Data Source Types', () => {
|
||||||
|
it('should handle upload_file_id data source format', () => {
|
||||||
|
mockDocumentDetail = {
|
||||||
|
name: 'test-document',
|
||||||
|
data_source_type: 'upload_file',
|
||||||
|
data_source_info: {
|
||||||
|
upload_file_id: '4a807f05-45d6-4fc4-b7a8-b009a4568b36',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
render(<DocumentSettings {...defaultProps} />)
|
||||||
|
|
||||||
|
expect(screen.getByTestId('files-count')).toHaveTextContent('1')
|
||||||
|
})
|
||||||
|
|
||||||
it('should handle legacy upload_file data source', () => {
|
it('should handle legacy upload_file data source', () => {
|
||||||
mockDocumentDetail = {
|
mockDocumentDetail = {
|
||||||
name: 'test-document',
|
name: 'test-document',
|
||||||
@ -307,6 +321,18 @@ describe('DocumentSettings', () => {
|
|||||||
expect(screen.getByTestId('files-count')).toHaveTextContent('0')
|
expect(screen.getByTestId('files-count')).toHaveTextContent('0')
|
||||||
})
|
})
|
||||||
|
|
||||||
|
it('should handle empty data_source_info object', () => {
|
||||||
|
mockDocumentDetail = {
|
||||||
|
name: 'test-document',
|
||||||
|
data_source_type: 'upload_file',
|
||||||
|
data_source_info: {},
|
||||||
|
}
|
||||||
|
|
||||||
|
render(<DocumentSettings {...defaultProps} />)
|
||||||
|
|
||||||
|
expect(screen.getByTestId('files-count')).toHaveTextContent('0')
|
||||||
|
})
|
||||||
|
|
||||||
it('should maintain structure when rerendered', () => {
|
it('should maintain structure when rerendered', () => {
|
||||||
const { rerender } = render(
|
const { rerender } = render(
|
||||||
<DocumentSettings datasetId="dataset-1" documentId="doc-1" />,
|
<DocumentSettings datasetId="dataset-1" documentId="doc-1" />,
|
||||||
@ -317,4 +343,37 @@ describe('DocumentSettings', () => {
|
|||||||
expect(screen.getByTestId('step-two')).toBeInTheDocument()
|
expect(screen.getByTestId('step-two')).toBeInTheDocument()
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
|
describe('Files Extraction Regression Tests', () => {
|
||||||
|
it('should correctly extract file ID from upload_file_id format', () => {
|
||||||
|
const fileId = '4a807f05-45d6-4fc4-b7a8-b009a4568b36'
|
||||||
|
mockDocumentDetail = {
|
||||||
|
name: 'test-document.pdf',
|
||||||
|
data_source_type: 'upload_file',
|
||||||
|
data_source_info: {
|
||||||
|
upload_file_id: fileId,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
render(<DocumentSettings {...defaultProps} />)
|
||||||
|
|
||||||
|
// Verify files array is populated with correct file ID
|
||||||
|
expect(screen.getByTestId('files-count')).toHaveTextContent('1')
|
||||||
|
})
|
||||||
|
|
||||||
|
it('should preserve document name when using upload_file_id format', () => {
|
||||||
|
const documentName = 'my-uploaded-document.txt'
|
||||||
|
mockDocumentDetail = {
|
||||||
|
name: documentName,
|
||||||
|
data_source_type: 'upload_file',
|
||||||
|
data_source_info: {
|
||||||
|
upload_file_id: 'some-file-id',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
render(<DocumentSettings {...defaultProps} />)
|
||||||
|
|
||||||
|
expect(screen.getByTestId('files-count')).toHaveTextContent('1')
|
||||||
|
})
|
||||||
|
})
|
||||||
})
|
})
|
||||||
|
|||||||
@ -8,6 +8,7 @@ import type {
|
|||||||
LegacyDataSourceInfo,
|
LegacyDataSourceInfo,
|
||||||
LocalFileInfo,
|
LocalFileInfo,
|
||||||
OnlineDocumentInfo,
|
OnlineDocumentInfo,
|
||||||
|
UploadFileIdInfo,
|
||||||
WebsiteCrawlInfo,
|
WebsiteCrawlInfo,
|
||||||
} from '@/models/datasets'
|
} from '@/models/datasets'
|
||||||
import { useBoolean } from 'ahooks'
|
import { useBoolean } from 'ahooks'
|
||||||
@ -61,6 +62,7 @@ const DocumentSettings = ({ datasetId, documentId }: DocumentSettingsProps) => {
|
|||||||
|
|
||||||
const dataSourceInfo = documentDetail?.data_source_info
|
const dataSourceInfo = documentDetail?.data_source_info
|
||||||
|
|
||||||
|
// Type guards for DataSourceInfo union
|
||||||
const isLegacyDataSourceInfo = (info: DataSourceInfo | undefined): info is LegacyDataSourceInfo => {
|
const isLegacyDataSourceInfo = (info: DataSourceInfo | undefined): info is LegacyDataSourceInfo => {
|
||||||
return !!info && 'upload_file' in info
|
return !!info && 'upload_file' in info
|
||||||
}
|
}
|
||||||
@ -73,10 +75,15 @@ const DocumentSettings = ({ datasetId, documentId }: DocumentSettingsProps) => {
|
|||||||
const isLocalFileInfo = (info: DataSourceInfo | undefined): info is LocalFileInfo => {
|
const isLocalFileInfo = (info: DataSourceInfo | undefined): info is LocalFileInfo => {
|
||||||
return !!info && 'related_id' in info && 'transfer_method' in info
|
return !!info && 'related_id' in info && 'transfer_method' in info
|
||||||
}
|
}
|
||||||
|
const isUploadFileIdInfo = (info: DataSourceInfo | undefined): info is UploadFileIdInfo => {
|
||||||
|
return !!info && 'upload_file_id' in info
|
||||||
|
}
|
||||||
|
|
||||||
const legacyInfo = isLegacyDataSourceInfo(dataSourceInfo) ? dataSourceInfo : undefined
|
const legacyInfo = isLegacyDataSourceInfo(dataSourceInfo) ? dataSourceInfo : undefined
|
||||||
const websiteInfo = isWebsiteCrawlInfo(dataSourceInfo) ? dataSourceInfo : undefined
|
const websiteInfo = isWebsiteCrawlInfo(dataSourceInfo) ? dataSourceInfo : undefined
|
||||||
const onlineDocumentInfo = isOnlineDocumentInfo(dataSourceInfo) ? dataSourceInfo : undefined
|
const onlineDocumentInfo = isOnlineDocumentInfo(dataSourceInfo) ? dataSourceInfo : undefined
|
||||||
const localFileInfo = isLocalFileInfo(dataSourceInfo) ? dataSourceInfo : undefined
|
const localFileInfo = isLocalFileInfo(dataSourceInfo) ? dataSourceInfo : undefined
|
||||||
|
const uploadFileIdInfo = isUploadFileIdInfo(dataSourceInfo) ? dataSourceInfo : undefined
|
||||||
|
|
||||||
const currentPage = useMemo(() => {
|
const currentPage = useMemo(() => {
|
||||||
if (legacyInfo) {
|
if (legacyInfo) {
|
||||||
@ -101,8 +108,20 @@ const DocumentSettings = ({ datasetId, documentId }: DocumentSettingsProps) => {
|
|||||||
}, [documentDetail?.data_source_type, documentDetail?.name, legacyInfo, onlineDocumentInfo])
|
}, [documentDetail?.data_source_type, documentDetail?.name, legacyInfo, onlineDocumentInfo])
|
||||||
|
|
||||||
const files = useMemo<CustomFile[]>(() => {
|
const files = useMemo<CustomFile[]>(() => {
|
||||||
if (legacyInfo?.upload_file)
|
// Handle upload_file_id format
|
||||||
return [legacyInfo.upload_file as CustomFile]
|
if (uploadFileIdInfo) {
|
||||||
|
return [{
|
||||||
|
id: uploadFileIdInfo.upload_file_id,
|
||||||
|
name: documentDetail?.name || '',
|
||||||
|
} as unknown as CustomFile]
|
||||||
|
}
|
||||||
|
|
||||||
|
// Handle legacy upload_file format
|
||||||
|
if (legacyInfo?.upload_file) {
|
||||||
|
return [legacyInfo.upload_file as unknown as CustomFile]
|
||||||
|
}
|
||||||
|
|
||||||
|
// Handle local file info format
|
||||||
if (localFileInfo) {
|
if (localFileInfo) {
|
||||||
const { related_id, name, extension } = localFileInfo
|
const { related_id, name, extension } = localFileInfo
|
||||||
return [{
|
return [{
|
||||||
@ -111,8 +130,9 @@ const DocumentSettings = ({ datasetId, documentId }: DocumentSettingsProps) => {
|
|||||||
extension,
|
extension,
|
||||||
} as unknown as CustomFile]
|
} as unknown as CustomFile]
|
||||||
}
|
}
|
||||||
|
|
||||||
return []
|
return []
|
||||||
}, [legacyInfo?.upload_file, localFileInfo])
|
}, [uploadFileIdInfo, legacyInfo?.upload_file, localFileInfo, documentDetail?.name])
|
||||||
|
|
||||||
const websitePages = useMemo(() => {
|
const websitePages = useMemo(() => {
|
||||||
if (!websiteInfo)
|
if (!websiteInfo)
|
||||||
|
|||||||
@ -381,7 +381,11 @@ export type OnlineDriveInfo = {
|
|||||||
type: 'file' | 'folder'
|
type: 'file' | 'folder'
|
||||||
}
|
}
|
||||||
|
|
||||||
export type DataSourceInfo = LegacyDataSourceInfo | LocalFileInfo | OnlineDocumentInfo | WebsiteCrawlInfo
|
export type UploadFileIdInfo = {
|
||||||
|
upload_file_id: string
|
||||||
|
}
|
||||||
|
|
||||||
|
export type DataSourceInfo = LegacyDataSourceInfo | LocalFileInfo | OnlineDocumentInfo | WebsiteCrawlInfo | UploadFileIdInfo
|
||||||
|
|
||||||
export type InitialDocumentDetail = {
|
export type InitialDocumentDetail = {
|
||||||
id: string
|
id: string
|
||||||
|
|||||||
@ -91,11 +91,15 @@ const getFileIndexingEstimateParamsForFile = ({
|
|||||||
processRule,
|
processRule,
|
||||||
dataset_id,
|
dataset_id,
|
||||||
}: GetFileIndexingEstimateParamsOptionFile): IndexingEstimateParams => {
|
}: GetFileIndexingEstimateParamsOptionFile): IndexingEstimateParams => {
|
||||||
|
const fileIds = files
|
||||||
|
.map(file => file.id)
|
||||||
|
.filter((id): id is string => Boolean(id))
|
||||||
|
|
||||||
return {
|
return {
|
||||||
info_list: {
|
info_list: {
|
||||||
data_source_type: dataSourceType,
|
data_source_type: dataSourceType,
|
||||||
file_info_list: {
|
file_info_list: {
|
||||||
file_ids: files.map(file => file.id) as string[],
|
file_ids: fileIds,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
indexing_technique: indexingTechnique,
|
indexing_technique: indexingTechnique,
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user