Merge branch 'feat/parent-child-retrieval' of https://github.com/langgenius/dify into feat/parent-child-retrieval

This commit is contained in:
twwu 2024-12-05 15:44:58 +08:00
commit 734659c873
14 changed files with 218 additions and 94 deletions

View File

@ -3,13 +3,15 @@ import cn from '@/utils/classnames'
type BadgeProps = {
className?: string
text: string
text?: string
children?: React.ReactNode
uppercase?: boolean
}
const Badge = ({
className,
text,
children,
uppercase = true,
}: BadgeProps) => {
return (
@ -20,7 +22,7 @@ const Badge = ({
className,
)}
>
{text}
{children || text}
</div>
)
}

View File

@ -8,7 +8,7 @@ import Button from '@/app/components/base/button'
import Input from '@/app/components/base/input'
import cn from '@/utils/classnames'
type Props = {
export type Props = {
className?: string
current: number
onChange: (cur: number) => void

View File

@ -0,0 +1,29 @@
'use client'
import type { FC } from 'react'
import React from 'react'
import { useTranslation } from 'react-i18next'
import Badge from '@/app/components/base/badge'
import { GeneralType, ParentChildType } from '@/app/components/base/icons/src/public/knowledge'
type Props = {
isGeneralMode: boolean
isQAMode: boolean
}
const ChunkingModeLabel: FC<Props> = ({
isGeneralMode,
isQAMode,
}) => {
const { t } = useTranslation()
const TypeIcon = isGeneralMode ? GeneralType : ParentChildType
return (
<Badge>
<div className='flex items-center h-full space-x-0.5 text-text-tertiary'>
<TypeIcon className='w-3 h-3' />
<span className='system-2xs-medium-uppercase'>{isGeneralMode ? `${t('dataset.chunkingMode.general')}${isQAMode ? ' · QA' : ''}` : t('dataset.chunkingMode.parentChild')}</span>
</div>
</Badge>
)
}
export default React.memo(ChunkingModeLabel)

View File

@ -3,8 +3,10 @@ import type { FC } from 'react'
import React, { useState } from 'react'
import { useBoolean } from 'ahooks'
import { RiArrowDownSLine, RiArrowUpSLine } from '@remixicon/react'
import { useTranslation } from 'react-i18next'
import FileIcon from '../document-file-icon'
import type { ParentMode, ProcessMode, SimpleDocumentDetail } from '@/models/datasets'
import type { ParentMode, SimpleDocumentDetail } from '@/models/datasets'
import { ProcessMode } from '@/models/datasets'
import {
PortalToFollowElem,
PortalToFollowElemContent,
@ -32,6 +34,7 @@ const DocumentPicker: FC<Props> = ({
value,
onChange,
}) => {
const { t } = useTranslation()
const {
name,
extension,
@ -49,7 +52,7 @@ const DocumentPicker: FC<Props> = ({
},
})
const documentsList = data?.data
const isParentChild = processMode === 'hierarchical'
const isParentChild = processMode === ProcessMode.parentChild
const TypeIcon = isParentChild ? ParentChildType : GeneralType
const [open, {
@ -75,7 +78,7 @@ const DocumentPicker: FC<Props> = ({
<div className='flex items-center h-3 text-text-tertiary space-x-0.5'>
<TypeIcon className='w-3 h-3' />
<span className={cn('system-2xs-medium-uppercase', isParentChild && 'mt-0.5' /* to icon problem cause not ver align */)}>
{isParentChild ? 'Parent-Child' : 'General'}
{isParentChild ? t('dataset.chunkingMode.parentChild') : t('dataset.chunkingMode.general')}
{isParentChild && ` · ${parentMode || '--'}`}
</span>
</div>

View File

@ -4,8 +4,8 @@ import React, { useCallback, useEffect, useState } from 'react'
import { useTranslation } from 'react-i18next'
import { useContext } from 'use-context-selector'
import {
RiAlertFill,
RiArrowLeftLine,
RiCloseLine,
RiSearchEyeLine,
} from '@remixicon/react'
import Link from 'next/link'
@ -50,12 +50,9 @@ import type { DefaultModel } from '@/app/components/header/account-setting/model
import { ModelTypeEnum } from '@/app/components/header/account-setting/model-provider-page/declarations'
import Checkbox from '@/app/components/base/checkbox'
import RadioCard from '@/app/components/base/radio-card'
import { MessageChatSquare } from '@/app/components/base/icons/src/public/common'
import { IS_CE_EDITION } from '@/config'
import Switch from '@/app/components/base/switch'
import Divider from '@/app/components/base/divider'
import { getNotionInfo, getWebsiteInfo, useCreateDocument, useCreateFirstDocument, useFetchDefaultProcessRule, useFetchFileIndexingEstimateForFile, useFetchFileIndexingEstimateForNotion, useFetchFileIndexingEstimateForWeb } from '@/service/knowledge/use-create-dataset'
import Loading from '@/app/components/base/loading'
import Badge from '@/app/components/base/badge'
import { SkeletonContanier, SkeletonPoint, SkeletonRectangle, SkeletonRow } from '@/app/components/base/skeleton'
import Tooltip from '@/app/components/base/tooltip'
@ -178,7 +175,6 @@ const StepTwo = ({
const [docLanguage, setDocLanguage] = useState<string>(
(datasetId && documentDetail) ? documentDetail.doc_language : (locale !== LanguagesSupported[1] ? 'English' : 'Chinese'),
)
const [QATipHide, setQATipHide] = useState(false)
const [parentChildConfig, setParentChildConfig] = useState<ParentChildConfig>(defaultParentChildConfig)
@ -608,31 +604,46 @@ const StepTwo = ({
</div>
</div>
</div>
{IS_CE_EDITION && <div className='flex items-center'>
<Checkbox
checked={docForm === DocForm.QA}
onCheck={() => {
if (docForm === DocForm.QA)
setDocForm(DocForm.TEXT)
else
setDocForm(DocForm.QA)
}}
className='mr-2'
/>
<div className='flex items-center gap-1'>
<TextLabel>
Chunk using Q&A format in
</TextLabel>
<div className='z-50 relative'>
<LanguageSelect
currentLanguage={docLanguage || locale}
onSelect={setDocLanguage}
disabled={isLanguageSelectDisabled}
/>
{IS_CE_EDITION && <>
<div className='flex items-center'>
<Checkbox
checked={docForm === DocForm.QA}
onCheck={() => {
if (docForm === DocForm.QA)
setDocForm(DocForm.TEXT)
else
setDocForm(DocForm.QA)
}}
className='mr-2'
/>
<div className='flex items-center gap-1'>
<TextLabel>
{t('datasetCreation.stepTwo.QALanguage')}
</TextLabel>
<div className='z-50 relative'>
<LanguageSelect
currentLanguage={docLanguage || locale}
onSelect={setDocLanguage}
disabled={isLanguageSelectDisabled}
/>
</div>
<Tooltip popupContent={t('datasetCreation.stepTwo.QATip')} />
</div>
<Tooltip popupContent={t('datasetCreation.stepTwo.qaTip')} />
</div>
</div>}
{docForm === DocForm.QA && (
<div
style={{
background: 'linear-gradient(92deg, rgba(247, 144, 9, 0.1) 0%, rgba(255, 255, 255, 0.00) 100%)',
}}
className='h-10 flex items-center gap-2 rounded-xl border-components-panel-border border shadow-shadow-shadow-3 px-3 text-xs'
>
<RiAlertFill className='size-4 text-text-warning-secondary' />
<span className='text-sm font-medium text-text-primary'>
{t('datasetCreation.stepTwo.QATip')}
</span>
</div>
)}
</>}
</div>
</OptionCard>
<OptionCard
@ -826,33 +837,6 @@ const StepTwo = ({
<Link className='text-text-accent' href={`/datasets/${datasetId}/settings`}>{t('datasetCreation.stepTwo.datasetSettingLink')}</Link>
</div>
)}
{IS_CE_EDITION && indexType === IndexingType.QUALIFIED && (
<div className='mt-2 rounded-xl bg-gray-50 border border-gray-100'>
<div className='flex justify-between items-center px-5 py-4'>
<div className='flex justify-center items-center w-8 h-8 rounded-lg bg-indigo-50'>
<MessageChatSquare className='w-4 h-4' />
</div>
<div className='grow mx-3'>
<div className='mb-0.5 text-md font-medium text-gray-900'>{t('datasetCreation.stepTwo.QATitle')}</div>
<div className='inline-flex items-center text-[13px] leading-[18px] text-gray-500'>
<span className='pr-1'>{t('datasetCreation.stepTwo.QALanguage')}</span>
<LanguageSelect currentLanguage={docLanguage} onSelect={handleSelect} disabled={isLanguageSelectDisabled} />
</div>
</div>
<Switch
defaultValue={docForm === DocForm.QA}
onChange={handleDocformSwitch}
size='md'
/>
</div>
{docForm === DocForm.QA && !QATipHide && (
<div className='flex justify-between items-center px-5 py-2 bg-orange-50 border-t border-amber-100 rounded-b-xl text-[13px] leading-[18px] text-medium text-amber-500'>
{t('datasetCreation.stepTwo.QATip')}
<RiCloseLine className='w-4 h-4 text-gray-500 cursor-pointer' onClick={() => setQATipHide(true)} />
</div>
)}
</div>
)}
{/* Embedding model */}
{indexType === IndexingType.QUALIFIED && (
<div className='mt-6 my-2'>
@ -958,11 +942,6 @@ const StepTwo = ({
</ChunkContainer>
))
)}
{docForm === DocForm.QA && !estimate?.qa_preview && (
<div className='flex items-center justify-center h-[200px]'>
<Loading type='area' />
</div>
)}
{currentEstimateMutation.isIdle && (
<div className='h-full w-full flex items-center justify-center'>
<div className='flex flex-col items-center justify-center gap-3'>

View File

@ -22,7 +22,7 @@ type OptionCardHeaderProps = {
export const OptionCardHeader: FC<OptionCardHeaderProps> = (props) => {
const { icon, title, description, isActive, activeClassName, effectImg } = props
return <div className={classNames(
'flex h-full overflow-hidden relative',
'flex h-full overflow-hidden rounded-xl relative',
isActive && activeClassName,
)}>
<div className='size-14 flex items-center justify-center relative overflow-hidden'>
@ -56,7 +56,7 @@ export const OptionCard: FC<OptionCardProps> = (props) => {
const { icon, className, title, description, isActive, children, actions, activeHeaderClassName, style, effectImg, ...rest } = props
return <div
className={classNames(
'rounded-xl overflow-hidden',
'rounded-xl',
isActive ? 'border-components-option-card-option-selected-border bg-components-panel-bg' : 'border-components-option-card-option-border bg-components-option-card-option-bg',
className,
)}

View File

@ -13,7 +13,6 @@ import s from './style.module.css'
import Loading from '@/app/components/base/loading'
import Button from '@/app/components/base/button'
import Input from '@/app/components/base/input'
import Pagination from '@/app/components/base/pagination'
import { get } from '@/service/base'
import { createDocument, fetchDocuments } from '@/service/datasets'
import { useDatasetDetailContext } from '@/context/dataset-detail'
@ -22,8 +21,6 @@ import type { NotionPage } from '@/models/common'
import type { CreateDocumentReq } from '@/models/datasets'
import { DataSourceType } from '@/models/datasets'
import RetryButton from '@/app/components/base/retry-button'
// Custom page count is not currently supported.
const limit = 15
const FolderPlusIcon = ({ className }: React.SVGProps<SVGElement>) => {
return <svg width="20" height="20" viewBox="0 0 20 20" fill="none" xmlns="http://www.w3.org/2000/svg" className={className ?? ''}>
@ -75,12 +72,14 @@ type IDocumentsProps = {
}
export const fetcher = (url: string) => get(url, {}, {})
const DEFAULT_LIMIT = 15
const Documents: FC<IDocumentsProps> = ({ datasetId }) => {
const { t } = useTranslation()
const [inputValue, setInputValue] = useState<string>('') // the input value
const [searchValue, setSearchValue] = useState<string>('')
const [currPage, setCurrPage] = React.useState<number>(0)
const [limit, setLimit] = useState<number>(DEFAULT_LIMIT)
const router = useRouter()
const { dataset } = useDatasetDetailContext()
const [notionPageSelectorModalVisible, setNotionPageSelectorModalVisible] = useState(false)
@ -94,7 +93,7 @@ const Documents: FC<IDocumentsProps> = ({ datasetId }) => {
const query = useMemo(() => {
return { page: currPage + 1, limit, keyword: debouncedSearchValue, fetch: isDataSourceNotion ? true : '' }
}, [currPage, debouncedSearchValue, isDataSourceNotion])
}, [currPage, debouncedSearchValue, isDataSourceNotion, limit])
const { data: documentsRes, error, mutate } = useSWR(
{
@ -196,7 +195,7 @@ const Documents: FC<IDocumentsProps> = ({ datasetId }) => {
}
const documentsList = isDataSourceNotion ? documentsWithProgress?.data : documentsRes?.data
const [selectedIds, setSelectedIds] = useState<string[]>([])
const { run: handleSearch } = useDebounceFn(() => {
setSearchValue(inputValue)
}, { wait: 500 })
@ -246,13 +245,22 @@ const Documents: FC<IDocumentsProps> = ({ datasetId }) => {
{isLoading
? <Loading type='app' />
: total > 0
? <List embeddingAvailable={embeddingAvailable} documents={documentsList || []} datasetId={datasetId} onUpdate={mutate} />
? <List
embeddingAvailable={embeddingAvailable}
documents={documentsList || []}
datasetId={datasetId} onUpdate={mutate}
selectedIds={selectedIds}
onSelectedIdChange={setSelectedIds}
pagination={{
total,
limit,
onLimitChange: setLimit,
current: currPage,
onChange: setCurrPage,
}}
/>
: <EmptyElement canAdd={embeddingAvailable} onClick={routeToDocCreate} type={isDataSourceNotion ? 'sync' : 'upload'} />
}
{/* Show Pagination only if the total is more than the limit */}
{(total && total > limit)
? <Pagination current={currPage} onChange={setCurrPage} total={total} limit={limit} />
: null}
<NotionPageSelectorModal
isShow={notionPageSelectorModalVisible}
onClose={() => setNotionPageSelectorModalVisible(false)}

View File

@ -1,9 +1,9 @@
'use client'
import type { FC } from 'react'
import React, { useCallback, useEffect, useState } from 'react'
import React, { useCallback, useEffect, useMemo, useState } from 'react'
import { useBoolean, useDebounceFn } from 'ahooks'
import { ArrowDownIcon } from '@heroicons/react/24/outline'
import { pick } from 'lodash-es'
import { pick, uniq } from 'lodash-es'
import {
RiArchive2Line,
RiDeleteBinLine,
@ -18,6 +18,7 @@ import { useTranslation } from 'react-i18next'
import dayjs from 'dayjs'
import { Edit03 } from '../../base/icons/src/vender/solid/general'
import { Globe01 } from '../../base/icons/src/vender/line/mapsAndTravel'
import ChunkingModeLabel from '../common/chunking-mode-label'
import s from './style.module.css'
import RenameModal from './rename-modal'
import cn from '@/utils/classnames'
@ -34,9 +35,13 @@ import { formatNumber } from '@/utils/format'
import { archiveDocument, deleteDocument, disableDocument, enableDocument, syncDocument, syncWebsite, unArchiveDocument } from '@/service/datasets'
import NotionIcon from '@/app/components/base/notion-icon'
import ProgressBar from '@/app/components/base/progress-bar'
import { DataSourceType, type DocumentDisplayStatus, type SimpleDocumentDetail } from '@/models/datasets'
import { ChuckingMode, DataSourceType, type DocumentDisplayStatus, type SimpleDocumentDetail } from '@/models/datasets'
import type { CommonResponse } from '@/models/common'
import useTimestamp from '@/hooks/use-timestamp'
import { useDatasetDetailContextWithSelector as useDatasetDetailContext } from '@/context/dataset-detail'
import type { Props as PaginationProps } from '@/app/components/base/pagination'
import Pagination from '@/app/components/base/pagination'
import Checkbox from '@/app/components/base/checkbox'
export const useIndexStatus = () => {
const { t } = useTranslation()
@ -378,17 +383,32 @@ type LocalDoc = SimpleDocumentDetail & { percent?: number }
type IDocumentListProps = {
embeddingAvailable: boolean
documents: LocalDoc[]
selectedIds: string[]
onSelectedIdChange: (selectedIds: string[]) => void
datasetId: string
pagination: PaginationProps
onUpdate: () => void
}
/**
* Document list component including basic information
*/
const DocumentList: FC<IDocumentListProps> = ({ embeddingAvailable, documents = [], datasetId, onUpdate }) => {
const DocumentList: FC<IDocumentListProps> = ({
embeddingAvailable,
documents = [],
selectedIds,
onSelectedIdChange,
datasetId,
pagination,
onUpdate,
}) => {
const { t } = useTranslation()
const { formatTime } = useTimestamp()
const router = useRouter()
const [datasetConfig] = useDatasetDetailContext(s => [s.dataset])
const chunkingMode = datasetConfig?.doc_form
const isGeneralMode = chunkingMode !== ChuckingMode.parentChild
const isQAMode = chunkingMode === ChuckingMode.qa
const [localDocs, setLocalDocs] = useState<LocalDoc[]>(documents)
const [enableSort, setEnableSort] = useState(false)
@ -420,17 +440,43 @@ const DocumentList: FC<IDocumentListProps> = ({ embeddingAvailable, documents =
onUpdate()
}, [onUpdate])
const isAllSelected = useMemo(() => {
return localDocs.length > 0 && localDocs.every(doc => selectedIds.includes(doc.id))
}, [localDocs, selectedIds])
const isSomeSelected = useMemo(() => {
return localDocs.some(doc => selectedIds.includes(doc.id))
}, [localDocs, selectedIds])
const onSelectedAll = useCallback(() => {
if (isAllSelected)
onSelectedIdChange([])
else
onSelectedIdChange(uniq([...selectedIds, ...localDocs.map(doc => doc.id)]))
}, [isAllSelected, localDocs, onSelectedIdChange, selectedIds])
return (
<div className='w-full h-full overflow-x-auto'>
<div className='relative w-full h-full overflow-x-auto'>
<table className={`min-w-[700px] max-w-full w-full border-collapse border-0 text-sm mt-3 ${s.documentTable}`}>
<thead className="h-8 leading-8 border-b border-gray-200 text-gray-500 font-medium text-xs uppercase">
<tr>
<td className='w-12'>#</td>
<td className='w-12'>
<div className='flex items-center' onClick={e => e.stopPropagation()}>
<Checkbox
className='shrink-0 mr-2'
checked={isAllSelected}
mixed={!isAllSelected && isSomeSelected}
onCheck={onSelectedAll}
/>
#
</div>
</td>
<td>
<div className='flex'>
{t('datasetDocuments.list.table.header.fileName')}
</div>
</td>
<td className='w-[130px]'>{t('datasetDocuments.list.table.header.chunkingMode')}</td>
<td className='w-24'>{t('datasetDocuments.list.table.header.words')}</td>
<td className='w-44'>{t('datasetDocuments.list.table.header.hitCount')}</td>
<td className='w-44'>
@ -444,7 +490,7 @@ const DocumentList: FC<IDocumentListProps> = ({ embeddingAvailable, documents =
</tr>
</thead>
<tbody className="text-gray-700">
{localDocs.map((doc) => {
{localDocs.map((doc, index) => {
const isFile = doc.data_source_type === DataSourceType.FILE
const fileType = isFile ? doc.data_source_detail_dict?.upload_file?.extension : ''
return <tr
@ -453,7 +499,24 @@ const DocumentList: FC<IDocumentListProps> = ({ embeddingAvailable, documents =
onClick={() => {
router.push(`/datasets/${datasetId}/documents/${doc.id}`)
}}>
<td className='text-left align-middle text-gray-500 text-xs'>{doc.position}</td>
<td className='text-left align-middle text-text-tertiary text-xs'>
<div className='flex items-center' onClick={e => e.stopPropagation()}>
<Checkbox
className='shrink-0 mr-2'
checked={selectedIds.includes(doc.id)}
onCheck={() => {
onSelectedIdChange(
selectedIds.includes(doc.id)
? selectedIds.filter(id => id !== doc.id)
: [...selectedIds, doc.id],
)
}}
/>
{/* {doc.position} */}
{index + 1}
</div>
</td>
<td>
<div className='group flex items-center justify-between'>
<span className={s.tdValue}>
@ -482,11 +545,16 @@ const DocumentList: FC<IDocumentListProps> = ({ embeddingAvailable, documents =
</Tooltip>
</div>
</div>
</td>
<td>
<ChunkingModeLabel
isGeneralMode={isGeneralMode}
isQAMode={isQAMode}
/>
</td>
<td>{renderCount(doc.word_count)}</td>
<td>{renderCount(doc.hit_count)}</td>
<td className='text-gray-500 text-[13px]'>
<td className='text-text-secondary text-[13px]'>
{formatTime(doc.created_at, t('datasetHitTesting.dateTimeFormat') as string)}
</td>
<td>
@ -508,6 +576,13 @@ const DocumentList: FC<IDocumentListProps> = ({ embeddingAvailable, documents =
})}
</tbody>
</table>
{/* Show Pagination only if the total is more than the limit */}
{pagination.total && pagination.total > (pagination.limit || 10) && (
<Pagination
{...pagination}
className='absolute bottom-0 left-0 w-full px-0 pb-0'
/>
)}
{isShowRenameModal && currDocument && (
<RenameModal

View File

@ -1,8 +1,15 @@
import { createContext, useContext } from 'use-context-selector'
import { createContext, useContext, useContextSelector } from 'use-context-selector'
import type { DataSet } from '@/models/datasets'
const DatasetDetailContext = createContext<{ indexingTechnique?: string; dataset?: DataSet; mutateDatasetRes?: () => void }>({})
type DatasetDetailContextValue = {
indexingTechnique?: string
dataset?: DataSet
mutateDatasetRes?: () => void
}
const DatasetDetailContext = createContext<DatasetDetailContextValue>({})
export const useDatasetDetailContext = () => useContext(DatasetDetailContext)
export const useDatasetDetailContextWithSelector = (selector: (value: DatasetDetailContextValue) => any) => {
return useContextSelector(DatasetDetailContext, selector)
}
export default DatasetDetailContext

View File

@ -8,7 +8,8 @@ const translation = {
addUrl: 'Add URL',
table: {
header: {
fileName: 'FILE NAME',
fileName: 'NAME',
chunkingMode: 'CHUNKING MODE',
words: 'WORDS',
hitCount: 'RETRIEVAL COUNT',
uploadTime: 'UPLOAD TIME',

View File

@ -1,5 +1,9 @@
const translation = {
knowledge: 'Knowledge',
chunkingMode: {
general: 'General',
parentChild: 'Parent-child',
},
externalTag: 'External',
externalAPI: 'External API',
externalAPIPanelTitle: 'External Knowledge API',

View File

@ -7,7 +7,8 @@ const translation = {
addUrl: '添加 URL',
table: {
header: {
fileName: '文件名',
fileName: '名称',
chunkingMode: '分段模式',
words: '字符数',
hitCount: '召回次数',
uploadTime: '上传时间',

View File

@ -1,5 +1,9 @@
const translation = {
knowledge: '知识库',
chunkingMode: {
general: '通用',
parentChild: '父子',
},
externalTag: '外部',
externalAPI: '外部 API',
externalAPIPanelTitle: '外部知识库 API',

View File

@ -10,6 +10,12 @@ export enum DataSourceType {
export type DatasetPermission = 'only_me' | 'all_team_members' | 'partial_members'
export enum ChuckingMode {
'text' = 'text_model', // General text
'qa' = 'qa_model', // General QA
'parentChild' = 'hierarchical_model', // Parent-Child
}
export type DataSet = {
id: string
name: string
@ -23,6 +29,7 @@ export type DataSet = {
updated_by: string
updated_at: number
app_count: number
doc_form: ChuckingMode
document_count: number
word_count: number
provider: string
@ -170,7 +177,10 @@ export type IndexingStatusBatchResponse = {
data: IndexingStatusResponse[]
}
export type ProcessMode = 'custom' | 'hierarchical'
export enum ProcessMode {
general = 'custom',
parentChild = 'hierarchical',
}
export type ParentMode = 'full-doc' | 'paragraph'
@ -269,6 +279,7 @@ export type InitialDocumentDetail = {
export type SimpleDocumentDetail = InitialDocumentDetail & {
enabled: boolean
word_count: number
is_qa: boolean // TODO waiting for backend to add this field
error?: string | null
archived: boolean
updated_at: number