Merge branch 'feat/parent-child-retrieval' of https://github.com/langgenius/dify into feat/parent-child-retrieval

This commit is contained in:
AkaraChen 2024-12-13 14:57:18 +08:00
commit b2322aca27
12 changed files with 155 additions and 39 deletions

View File

@ -1,4 +1,4 @@
import React, { type FC, useState } from 'react'
import React, { type FC, useMemo, useState } from 'react'
import { useTranslation } from 'react-i18next'
import {
RiCloseLine,
@ -56,18 +56,33 @@ const ChildSegmentDetail: FC<IChildSegmentDetailProps> = ({
onUpdate(chunkId, childChunkInfo?.id || '', content)
}
const wordCountText = useMemo(() => {
const count = content.length
return `${formatNumber(count)} ${t('datasetDocuments.segment.characters', { count })}`
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [content.length])
const EditTimeText = useMemo(() => {
const timeText = formatTime({
date: (childChunkInfo?.updated_at ?? 0) * 1000,
dateFormat: 'MM/DD/YYYY h:mm:ss',
})
return `${t('datasetDocuments.segment.editedAt')} ${timeText}`
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [childChunkInfo?.updated_at])
return (
<div className={'flex flex-col h-full'}>
<div className={classNames('flex items-center justify-between', fullScreen ? 'py-3 pr-4 pl-6 border border-divider-subtle' : 'pt-3 pr-3 pl-4')}>
<div className='flex flex-col'>
<div className='text-text-primary system-xl-semibold'>{'Edit Child Chunk'}</div>
<div className='text-text-primary system-xl-semibold'>{t('datasetDocuments.segment.editChildChunk')}</div>
<div className='flex items-center gap-x-2'>
<SegmentIndexTag positionId={childChunkInfo?.position || ''} labelPrefix='Child-Chunk' />
<SegmentIndexTag positionId={childChunkInfo?.position || ''} labelPrefix={t('datasetDocuments.segment.childChunk') as string} />
<Dot />
<span className='text-text-tertiary system-xs-medium'>{formatNumber(content.length)} {t('datasetDocuments.segment.characters')}</span>
<span className='text-text-tertiary system-xs-medium'>{wordCountText}</span>
<Dot />
<span className='text-text-tertiary system-xs-medium'>
{`Edited at ${formatTime({ date: (childChunkInfo?.created_at ?? 0) * 1000, dateFormat: 'MM/DD/YYYY h:mm:ss' })}`}
{EditTimeText}
</span>
</div>
</div>

View File

@ -1,11 +1,13 @@
import { type FC, useMemo, useState } from 'react'
import { RiArrowDownSLine, RiArrowRightSLine } from '@remixicon/react'
import { useTranslation } from 'react-i18next'
import { EditSlice } from '../../../formatted-text/flavours/edit-slice'
import { useDocumentContext } from '../index'
import type { ChildChunkDetail } from '@/models/datasets'
import Input from '@/app/components/base/input'
import classNames from '@/utils/classnames'
import Divider from '@/app/components/base/divider'
import { formatNumber } from '@/utils/format'
type IChildSegmentCardProps = {
childChunks: ChildChunkDetail[]
@ -30,6 +32,7 @@ const ChildSegmentList: FC<IChildSegmentCardProps> = ({
total,
inputValue,
}) => {
const { t } = useTranslation()
const parentMode = useDocumentContext(s => s.parentMode)
const [collapsed, setCollapsed] = useState(true)
@ -50,10 +53,25 @@ const ChildSegmentList: FC<IChildSegmentCardProps> = ({
return enabled ? '' : 'opacity-50 group-hover/card:opacity-100'
}, [enabled])
const totalText = useMemo(() => {
const text = isFullDocMode
? !total
? '--'
: formatNumber(total)
: formatNumber(childChunks.length)
const count = isFullDocMode
? text === '--'
? 0
: total
: childChunks.length
return `${isFullDocMode ? count : childChunks.length} ${t('datasetDocuments.segment.childChunks', { count })}`
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [isFullDocMode, total, childChunks.length])
return (
<div className={classNames('flex flex-col', contentOpacity, isParagraphMode ? 'p-1 pb-2' : 'px-3 grow overflow-y-hidden')}>
<div className={classNames('flex flex-col', contentOpacity, isParagraphMode ? 'p-1 pb-2' : 'px-3 grow')}>
{isFullDocMode ? <Divider type='horizontal' className='h-[1px] bg-divider-subtle my-1' /> : null}
<div className={classNames('flex items-center justify-between', isFullDocMode ? 'pt-2 pb-3' : '')}>
<div className={classNames('flex items-center justify-between', isFullDocMode ? 'pt-2 pb-3 sticky top-0 left-0 bg-components-panel-bg' : '')}>
<div className={classNames('h-7 flex items-center pl-1 pr-3 rounded-lg', (isParagraphMode && collapsed) ? 'bg-dataset-child-chunk-expand-btn-bg' : '')} onClick={(event) => {
event.stopPropagation()
toggleCollapse()
@ -67,16 +85,16 @@ const ChildSegmentList: FC<IChildSegmentCardProps> = ({
: (<RiArrowDownSLine className='w-4 h-4 text-text-secondary mr-0.5' />)
: null
}
<span className='text-text-secondary system-sm-semibold-uppercase'>{`${total} CHILD CHUNKS`}</span>
<span className='text-text-secondary system-sm-semibold-uppercase'>{totalText}</span>
<span className={classNames('text-text-quaternary text-xs font-medium pl-1.5', isParagraphMode ? 'hidden group-hover/card:inline-block' : '')}>·</span>
<button
className={classNames('px-1.5 py-1 text-components-button-secondary-accent-text system-xs-semibold', isParagraphMode ? 'hidden group-hover/card:inline-block' : '')}
className={classNames('px-1.5 py-1 text-components-button-secondary-accent-text system-xs-semibold-uppercase', isParagraphMode ? 'hidden group-hover/card:inline-block' : '')}
onClick={(event) => {
event.stopPropagation()
handleAddNewChildChunk?.(parentChunkId)
}}
>
ADD
{t('common.operation.add')}
</button>
</div>
{isFullDocMode
@ -91,14 +109,14 @@ const ChildSegmentList: FC<IChildSegmentCardProps> = ({
: null}
</div>
{(isFullDocMode || !collapsed)
? <div className={classNames('flex gap-x-0.5', isFullDocMode ? 'grow overflow-y-auto' : '')}>
? <div className={classNames('flex gap-x-0.5', isFullDocMode ? 'grow' : '')}>
{isParagraphMode && <Divider type='vertical' className='h-auto w-[2px] mx-[7px] bg-text-accent-secondary' />}
<div className={classNames('w-full !leading-5 flex flex-col', isParagraphMode ? 'gap-y-2' : 'gap-y-3')}>
{childChunks.map((childChunk) => {
const edited = childChunk.updated_at !== childChunk.created_at
return <EditSlice
key={childChunk.id}
label={`C-${childChunk.position}${edited ? ' · EDITED' : ''}`}
label={`C-${childChunk.position}${edited ? ` · ${t('datasetDocuments.segment.edited')}` : ''}`}
text={childChunk.content}
onDelete={() => onDelete?.(childChunk.segment_id, childChunk.id)}
className='line-clamp-3'

View File

@ -1,4 +1,5 @@
import React, { type FC } from 'react'
import { useTranslation } from 'react-i18next'
import { RiLineHeight } from '@remixicon/react'
import Tooltip from '@/app/components/base/tooltip'
import { Collapse } from '@/app/components/base/icons/src/public/knowledge'
@ -12,9 +13,11 @@ const DisplayToggle: FC<DisplayToggleProps> = ({
isCollapsed,
toggleCollapsed,
}) => {
const { t } = useTranslation()
return (
<Tooltip
popupContent={isCollapsed ? 'Expand chunks' : 'Collapse chunks'}
popupContent={isCollapsed ? t('datasetDocuments.segment.expandChunks') : t('datasetDocuments.segment.collapseChunks')}
popupClassName='text-text-secondary system-xs-medium border-[0.5px] border-components-panel-border'
>
<button

View File

@ -328,8 +328,14 @@ const Completed: FC<ICompletedProps> = ({
}, [segments, isAllSelected, selectedSegmentIds])
const totalText = useMemo(() => {
return segmentListData?.total ? formatNumber(segmentListData.total) : '--'
}, [segmentListData?.total])
const total = segmentListData?.total ? formatNumber(segmentListData.total) : '--'
const count = total === '--' ? 0 : segmentListData!.total
const translationKey = (mode === 'hierarchical' && parentMode === 'paragraph')
? 'datasetDocuments.segment.parentChunks'
: 'datasetDocuments.segment.chunks'
return `${total} ${t(translationKey, { count })}`
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [segmentListData?.total, mode, parentMode])
const toggleFullScreen = useCallback(() => {
setFullScreen(!fullScreen)
@ -480,7 +486,7 @@ const Completed: FC<ICompletedProps> = ({
mixed={!isAllSelected && isSomeSelected}
onCheck={onSelectedAll}
/>
<div className={cn('system-sm-semibold-uppercase pl-5', s.totalText)}>{totalText} {t('datasetDocuments.segment.chunks')}</div>
<div className={cn('system-sm-semibold-uppercase pl-5', s.totalText)}>{totalText}</div>
<SimpleSelect
onSelect={onChangeStatus}
items={[
@ -505,7 +511,7 @@ const Completed: FC<ICompletedProps> = ({
{/* Segment list */}
{
isFullDocMode
? <>
? <div className='grow relative overflow-x-hidden overflow-y-auto'>
<SegmentCard
detail={segments[0]}
onClick={() => onClickCard(segments[0])}
@ -522,7 +528,7 @@ const Completed: FC<ICompletedProps> = ({
total={childChunkListData?.total || 0}
inputValue={inputValue}
/>
</>
</div>
: <SegmentList
ref={segmentListRef}
embeddingAvailable={embeddingAvailable}

View File

@ -102,17 +102,21 @@ const NewChildSegmentModal: FC<NewChildSegmentModalProps> = ({
}
}
const wordCountText = useMemo(() => {
const count = content.length
return `${formatNumber(count)} ${t('datasetDocuments.segment.characters', { count })}`
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [content.length])
return (
<div className={'flex flex-col h-full'}>
<div className={classNames('flex items-center justify-between', fullScreen ? 'py-3 pr-4 pl-6 border border-divider-subtle' : 'pt-3 pr-3 pl-4')}>
<div className='flex flex-col'>
<div className='text-text-primary system-xl-semibold'>{
t('datasetDocuments.segment.addChildChunk')
}</div>
<div className='text-text-primary system-xl-semibold'>{t('datasetDocuments.segment.addChildChunk')}</div>
<div className='flex items-center gap-x-2'>
<SegmentIndexTag label={'New Child Chunk'} />
<SegmentIndexTag label={t('datasetDocuments.segment.newChildChunk') as string} />
<Dot />
<span className='text-text-tertiary system-xs-medium'>{formatNumber(content.length)} {t('datasetDocuments.segment.characters')}</span>
<span className='text-text-tertiary system-xs-medium'>{wordCountText}</span>
</div>
</div>
<div className='flex items-center'>

View File

@ -111,6 +111,17 @@ const SegmentCard: FC<ISegmentCardProps> = ({
return content
}
const wordCountText = useMemo(() => {
const total = formatNumber(word_count)
return `${total} ${t('datasetDocuments.segment.characters', { count: word_count })}`
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [word_count])
const labelPrefix = useMemo(() => {
return isParentChildMode ? t('datasetDocuments.segment.parentChunk') : t('datasetDocuments.segment.chunk')
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [isParentChildMode])
return (
<div
className={cn('w-full px-3 rounded-xl group/card', isFullDocMode ? '' : 'pt-2.5 pb-2 hover:bg-dataset-chunk-detail-card-hover-bg', className)}
@ -119,15 +130,15 @@ const SegmentCard: FC<ISegmentCardProps> = ({
<div className='h-5 relative flex items-center justify-between'>
<>
<div className='flex items-center gap-x-2'>
<SegmentIndexTag positionId={position} className={textOpacity} labelPrefix={`${isParentChildMode ? 'Parent-' : ''}Chunk`} />
<SegmentIndexTag positionId={position} className={textOpacity} labelPrefix={labelPrefix} />
<Dot />
<div className={cn('text-text-tertiary system-xs-medium', textOpacity)}>{`${formatNumber(word_count)} Characters`}</div>
<div className={cn('text-text-tertiary system-xs-medium', textOpacity)}>{wordCountText}</div>
<Dot />
<div className={cn('text-text-tertiary system-xs-medium', textOpacity)}>{`${formatNumber(hit_count)} Retrieval Count`}</div>
<div className={cn('text-text-tertiary system-xs-medium', textOpacity)}>{`${formatNumber(hit_count)} ${t('datasetDocuments.segment.hitCount')}`}</div>
{chunkEdited && (
<>
<Dot />
<Badge text='edited' uppercase className={textOpacity} />
<Badge text={t('datasetDocuments.segment.edited') as string} uppercase className={textOpacity} />
</>
)}
</div>

View File

@ -80,15 +80,32 @@ const SegmentDetail: FC<ISegmentDetailProps> = ({
return mode === 'hierarchical'
}, [mode])
const titleText = useMemo(() => {
return isEditMode ? t('datasetDocuments.segment.editChunk') : t('datasetDocuments.segment.chunkDetail')
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [isEditMode])
const wordCountText = useMemo(() => {
const total = formatNumber(isEditMode ? question.length : segInfo!.word_count as number)
const count = isEditMode ? question.length : segInfo!.word_count as number
return `${total} ${t('datasetDocuments.segment.characters', { count })}`
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [isEditMode, question.length, segInfo?.word_count])
const labelPrefix = useMemo(() => {
return isParentChildMode ? t('datasetDocuments.segment.parentChunk') : t('datasetDocuments.segment.chunk')
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [isParentChildMode])
return (
<div className={'flex flex-col h-full'}>
<div className={classNames('flex items-center justify-between', fullScreen ? 'py-3 pr-4 pl-6 border border-divider-subtle' : 'pt-3 pr-3 pl-4')}>
<div className='flex flex-col'>
<div className='text-text-primary system-xl-semibold'>{isEditMode ? 'Edit Chunk' : 'Chunk Detail'}</div>
<div className='text-text-primary system-xl-semibold'>{titleText}</div>
<div className='flex items-center gap-x-2'>
<SegmentIndexTag positionId={segInfo?.position || ''} labelPrefix={`${isParentChildMode ? 'Parent-' : ''}Chunk`} />
<SegmentIndexTag positionId={segInfo?.position || ''} labelPrefix={labelPrefix} />
<Dot />
<span className='text-text-tertiary system-xs-medium'>{formatNumber(isEditMode ? question.length : segInfo?.word_count as number)} {t('datasetDocuments.segment.characters')}</span>
<span className='text-text-tertiary system-xs-medium'>{wordCountText}</span>
</div>
</div>
<div className='flex items-center'>

View File

@ -1,4 +1,4 @@
import { memo, useRef, useState } from 'react'
import { memo, useMemo, useRef, useState } from 'react'
import type { FC } from 'react'
import { useTranslation } from 'react-i18next'
import { useContext } from 'use-context-selector'
@ -108,6 +108,12 @@ const NewSegmentModal: FC<NewSegmentModalProps> = ({
}
}
const wordCountText = useMemo(() => {
const count = question.length
return `${formatNumber(count)} ${t('datasetDocuments.segment.characters', { count })}`
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [question.length])
return (
<div className={'flex flex-col h-full'}>
<div className={classNames('flex items-center justify-between', fullScreen ? 'py-3 pr-4 pl-6 border border-divider-subtle' : 'pt-3 pr-3 pl-4')}>
@ -120,7 +126,7 @@ const NewSegmentModal: FC<NewSegmentModalProps> = ({
<div className='flex items-center gap-x-2'>
<SegmentIndexTag label={'New Chunk'} />
<Dot />
<span className='text-text-tertiary system-xs-medium'>{formatNumber(question.length)} {t('datasetDocuments.segment.characters')}</span>
<span className='text-text-tertiary system-xs-medium'>{wordCountText}</span>
</div>
</div>
<div className='flex items-center'>

View File

@ -49,7 +49,7 @@ const ResultItem: FC<Props> = ({
className={cn('w-fit group-hover:opacity-100')}
/>
<Dot />
<div className='system-xs-medium text-text-tertiary'>{word_count} {t('datasetDocuments.segment.characters')}</div>
<div className='system-xs-medium text-text-tertiary'>{word_count} {t('datasetDocuments.segment.characters', { count: word_count })}</div>
</div>
<Score value={score} />
</div>

View File

@ -42,7 +42,7 @@ const HitDetail: FC<IHitDetailProps> = ({ segInfo }) => {
/>
<div className={cn(s.commonIcon, s.typeSquareIcon)} />
<span className={cn('mr-6', s.numberInfo)}>
{segInfo?.word_count} {t('datasetDocuments.segment.characters')}
{segInfo?.word_count} {t('datasetDocuments.segment.characters', { count: segInfo?.word_count || 0 })}
</span>
<div className={cn(s.commonIcon, s.targetIcon)} />
<span className={s.numberInfo}>

View File

@ -333,11 +333,21 @@ const translation = {
},
segment: {
paragraphs: 'Paragraphs',
chunks: 'CHUNKS',
chunks_one: 'CHUNK',
chunks_other: 'CHUNKS',
parentChunks_one: 'PARENT CHUNK',
parentChunks_other: 'PARENT CHUNKS',
childChunks_one: 'CHILD CHUNK',
childChunks_other: 'CHILD CHUNKS',
chunk: 'Chunk',
parentChunk: 'Parent-Chunk',
childChunk: 'Child-Chunk',
newChildChunk: 'New Child Chunk',
keywords: 'KEYWORDS',
addKeyWord: 'Add keyword',
keywordError: 'The maximum length of keyword is 20',
characters: 'characters',
characters_one: 'character',
characters_other: 'characters',
hitCount: 'Retrieval count',
vectorHash: 'Vector hash: ',
questionPlaceholder: 'Add question here',
@ -354,12 +364,20 @@ const translation = {
delete: 'Delete this chunk ?',
chunkAdded: '1 chunk added',
childChunkAdded: '1 child chunk added',
editChunk: 'Edit Chunk',
editParentChunk: 'Edit Parent Chunk',
editChildChunk: 'Edit Child Chunk',
chunkDetail: 'Chunk Detail',
regenerationConfirmTitle: 'Do you want to regenerate child chunks?',
regenerationConfirmMessage: 'Regenerating child chunks will overwrite the current child chunks, including edited chunks and newly added chunks. The regeneration cannot be undone.',
regeneratingTitle: 'Regenerating child chunks',
regeneratingMessage: 'This may take a moment, please wait...',
regenerationSuccessTitle: 'Regeneration completed',
regenerationSuccessMessage: 'You can close this window.',
edited: 'EDITED',
editedAt: 'Edited at',
expandChunks: 'Expand chunks',
collapseChunks: 'Collapse chunks',
},
}

View File

@ -331,11 +331,21 @@ const translation = {
},
segment: {
paragraphs: '段落',
chunks: '段落',
chunks_one: '分段',
chunks_other: '分段',
parentChunks_one: '父分段',
parentChunks_other: '父分段',
childChunks_one: '子分段',
childChunks_other: '子分段',
chunk: '分段',
parentChunk: '父分段',
childChunk: '子分段',
newChildChunk: '新子分段',
keywords: '关键词',
addKeyWord: '添加关键词',
keywordError: '关键词最大长度为 20',
characters: '字符',
characters_one: '字符',
characters_other: '字符',
hitCount: '召回次数',
vectorHash: '向量哈希:',
questionPlaceholder: '在这里添加问题',
@ -348,16 +358,24 @@ const translation = {
newQaSegment: '新问答分段',
addChunk: '新增分段',
addChildChunk: '新增子分段',
addAnother: '续新增',
addAnother: '续新增',
delete: '删除这个分段?',
chunkAdded: '新增一个分段',
childChunkAdded: '新增一个子分段',
editChunk: '编辑分段',
editParentChunk: '编辑父分段',
editChildChunk: '编辑子分段',
chunkDetail: '分段详情',
regenerationConfirmTitle: '是否需要重新生成子分段?',
regenerationConfirmMessage: '重新生成的子分段将会覆盖当前的子分段,包括编辑过的分段和新添加的分段。重新生成操作无法撤销。',
regeneratingTitle: '正在生成子分段',
regeneratingMessage: '生成子分段需要一些时间,请耐心等待...',
regenerationSuccessTitle: '子分段已重新生成',
regenerationSuccessMessage: '可以关闭窗口',
edited: '已编辑',
editedAt: '编辑于',
expandChunks: '展开分段',
collapseChunks: '折叠分段',
},
}