feat: implement chunk structure card and related hooks for dataset creation; update translations and refactor pipeline template fetching

This commit is contained in:
twwu 2025-06-11 16:38:42 +08:00
parent 45c76c1d68
commit f995436eec
15 changed files with 190 additions and 45 deletions

View File

@ -8,19 +8,21 @@ const Header = () => {
const { t } = useTranslation()
return (
<Link
className='system-md-semibold relative flex px-16 pb-2 pt-5 text-text-primary'
href={'/datasets'}
replace
>
<div className='system-md-semibold relative flex px-16 pb-2 pt-5 text-text-primary'>
<span>{t('datasetPipeline.creation.title')}</span>
<Button
variant='secondary-accent'
className='absolute bottom-0 left-5 size-9 rounded-full p-0'
<Link
className='absolute bottom-0 left-5'
href={'/datasets'}
replace
>
<RiArrowLeftLine className='size-5 ' />
</Button>
</Link>
<Button
variant='secondary-accent'
className='size-9 rounded-full p-0'
>
<RiArrowLeftLine className='size-5 ' />
</Button>
</Link>
</div>
)
}

View File

@ -0,0 +1,66 @@
import React from 'react'
import cn from '@/utils/classnames'
import type { Option } from './types'
import { EffectColor } from './types'
const HEADER_EFFECT_MAP: Record<EffectColor, string> = {
[EffectColor.indigo]: 'bg-util-colors-indigo-indigo-600 opacity-80',
[EffectColor.blueLight]: 'bg-util-colors-blue-light-blue-light-500 opacity-80',
[EffectColor.green]: 'bg-util-colors-teal-teal-600 opacity-80',
[EffectColor.none]: '',
}
const IconBackgroundColorMap: Record<EffectColor, string> = {
[EffectColor.indigo]: 'bg-components-icon-bg-indigo-solid',
[EffectColor.blueLight]: 'bg-components-icon-bg-blue-light-solid',
[EffectColor.green]: 'bg-components-icon-bg-teal-solid',
[EffectColor.none]: '',
}
type ChunkStructureCardProps = {
className?: string
} & Option
const ChunkStructureCard = ({
className,
icon,
title,
description,
effectColor,
}: ChunkStructureCardProps) => {
return (
<div className={cn(
'relative flex overflow-hidden rounded-xl border-[0.5px] border-components-panel-border-subtle bg-components-panel-bg p-2 shadow-xs shadow-shadow-shadow-3',
className,
)}>
<div className={cn(
'absolute -left-1 -top-1 size-14 rounded-full blur-[80px]',
`${HEADER_EFFECT_MAP[effectColor]}`,
)} />
<div className='p-1'>
<div className={cn(
'flex size-6 shrink-0 items-center justify-center rounded-lg border-[0.5px] border-divider-subtle text-text-primary-on-surface shadow-md shadow-shadow-shadow-5',
`${IconBackgroundColorMap[effectColor]}`,
)}>
{icon}
</div>
</div>
<div className='flex grow flex-col gap-y-0.5 py-px'>
<div className='flex items-center gap-x-1'>
<span className='system-sm-medium text-text-secondary'>
{title}
</span>
</div>
{
description && (
<div className='system-xs-regular text-text-tertiary'>
{description}
</div>
)
}
</div>
</div>
)
}
export default React.memo(ChunkStructureCard) as typeof ChunkStructureCard

View File

@ -0,0 +1,36 @@
import { GeneralChunk, ParentChildChunk, QuestionAndAnswer } from '@/app/components/base/icons/src/vender/knowledge'
import { useTranslation } from 'react-i18next'
import { EffectColor, type Option } from './types'
import { ChunkingMode } from '@/models/datasets'
export const useChunkStructureConfig = () => {
const { t } = useTranslation()
const GeneralOption: Option = {
icon: <GeneralChunk className='size-4' />,
title: 'General',
description: t('datasetCreation.stepTwo.generalTip'),
effectColor: EffectColor.indigo,
}
const ParentChildOption: Option = {
icon: <ParentChildChunk className='size-4' />,
title: 'Parent-Child',
description: t('datasetCreation.stepTwo.parentChildTip'),
effectColor: EffectColor.blueLight,
}
const QuestionAnswerOption: Option = {
icon: <QuestionAndAnswer className='size-4' />,
title: 'Q&A',
description: t('datasetCreation.stepTwo.qaTip'),
effectColor: EffectColor.green,
}
const chunkStructureConfig: Record<ChunkingMode, Option> = {
[ChunkingMode.text]: GeneralOption,
[ChunkingMode.parentChild]: ParentChildOption,
[ChunkingMode.qa]: QuestionAnswerOption,
}
return chunkStructureConfig
}

View File

@ -1,4 +1,4 @@
import React from 'react'
import React, { useMemo } from 'react'
import AppIcon from '@/app/components/base/app-icon'
import { usePipelineTemplateById } from '@/service/use-pipeline'
import type { AppIconType } from '@/types/app'
@ -7,6 +7,8 @@ import Button from '@/app/components/base/button'
import { useTranslation } from 'react-i18next'
import Tooltip from '@/app/components/base/tooltip'
import Loading from '@/app/components/base/loading'
import { useChunkStructureConfig } from './hooks'
import ChunkStructureCard from './chunk-structure-card'
import WorkflowPreview from '@/app/components/workflow/workflow-preview'
type DetailsProps = {
@ -23,16 +25,22 @@ const Details = ({
onClose,
}: DetailsProps) => {
const { t } = useTranslation()
const { data: pipelineTemplateInfo } = usePipelineTemplateById(id, type, true)
const appIcon = React.useMemo(() => {
const { data: pipelineTemplateInfo } = usePipelineTemplateById({
template_id: id,
type,
}, true)
const appIcon = useMemo(() => {
if (!pipelineTemplateInfo)
return { type: 'emoji', icon: '📙', background: '#FFF4ED' }
const iconInfo = pipelineTemplateInfo.icon
const iconInfo = pipelineTemplateInfo.icon_info
return iconInfo.icon_type === 'image'
? { type: 'image', url: iconInfo.icon_url || '', fileId: iconInfo.icon || '' }
: { type: 'icon', icon: iconInfo.icon || '', background: iconInfo.icon_background || '' }
}, [pipelineTemplateInfo])
const chunkStructureConfig = useChunkStructureConfig()
if (!pipelineTemplateInfo) {
return (
<Loading type='app' />
@ -42,9 +50,7 @@ const Details = ({
return (
<div className='flex h-full'>
<div className='flex grow items-center justify-center p-3 pr-0'>
<WorkflowPreview
{...pipelineTemplateInfo.export_data.workflow.graph}
/>
<WorkflowPreview {...pipelineTemplateInfo.graph} />
</div>
<div className='relative flex w-[360px] shrink-0 flex-col'>
<button
@ -68,7 +74,9 @@ const Details = ({
{pipelineTemplateInfo.name}
</div>
<div className='system-2xs-medium-uppercase text-text-tertiary'>
{`By ${pipelineTemplateInfo.author}`}
{t('datasetPipeline.details.createdBy', {
author: pipelineTemplateInfo.created_by,
})}
</div>
</div>
</div>
@ -86,14 +94,16 @@ const Details = ({
</Button>
</div>
<div className='flex flex-col gap-y-1 px-4 py-2'>
<div className='flex items-center gap-x-0.5'>
<div className='flex h-6 items-center gap-x-0.5'>
<span className='system-sm-semibold-uppercase text-text-secondary'>
{t('datasetPipeline.details.structure')}
</span>
<Tooltip
popupClassName='max-w-[240px]'
popupContent={t('datasetPipeline.details.structureTooltip')}
/>
</div>
<ChunkStructureCard {...chunkStructureConfig[pipelineTemplateInfo.chunk_structure]} />
</div>
</div>
</div>

View File

@ -0,0 +1,15 @@
import type { ReactNode } from 'react'
export enum EffectColor {
indigo = 'indigo',
blueLight = 'blue-light',
green = 'green',
none = 'none',
}
export type Option = {
icon: ReactNode
title: string
description?: string
effectColor: EffectColor
}

View File

@ -40,7 +40,10 @@ const TemplateCard = ({
const [showDetailModal, setShowDetailModal] = useState(false)
const [showCreateModal, setShowCreateModal] = useState(false)
const { refetch: getPipelineTemplateInfo } = usePipelineTemplateById(pipeline.id, type, false)
const { refetch: getPipelineTemplateInfo } = usePipelineTemplateById({
template_id: pipeline.id,
type,
}, false)
const { mutateAsync: createEmptyDataset } = useCreatePipelineDataset()
const { handleCheckPluginDependencies } = usePluginDependencies()

View File

@ -5,14 +5,17 @@ import {
} from '@/app/components/base/icons/src/vender/knowledge'
import { EffectColor, type Option } from './types'
import { ChunkingMode } from '@/models/datasets'
import { useTranslation } from 'react-i18next'
export const useChunkStructure = () => {
const { t } = useTranslation()
const GeneralOption: Option = {
id: ChunkingMode.text,
icon: <GeneralChunk className='size-[18px]' />,
iconActiveColor: 'text-util-colors-indigo-indigo-600',
title: 'General',
description: 'General text chunking mode, the chunks retrieved and recalled are the same.',
description: t('datasetCreation.stepTwo.generalTip'),
effectColor: EffectColor.indigo,
showEffectColor: true,
}
@ -21,7 +24,7 @@ export const useChunkStructure = () => {
icon: <ParentChildChunk className='size-[18px]' />,
iconActiveColor: 'text-util-colors-blue-light-blue-light-500',
title: 'Parent-Child',
description: 'When using the parent-child mode, the child-chunk is used for retrieval and the parent-chunk is used for recall as context.',
description: t('datasetCreation.stepTwo.parentChildTip'),
effectColor: EffectColor.blueLight,
showEffectColor: true,
}
@ -29,7 +32,7 @@ export const useChunkStructure = () => {
id: ChunkingMode.qa,
icon: <QuestionAndAnswer className='size-[18px]' />,
title: 'Q&A',
description: 'When using structured Q&A data, you can create documents that pair questions with answers. These documents are indexed based on the question portion, allowing the system to retrieve relevant answers based on query similarity',
description: t('datasetCreation.stepTwo.qaTip'),
}
const options = [

View File

@ -41,7 +41,7 @@ export const useChunkStructure = () => {
id: ChunkStructureEnum.question_answer,
icon: <QuestionAndAnswer className='h-[18px] w-[18px] text-text-tertiary' />,
title: 'Question-Answer',
description: 'Question-answer text chunking mode, the chunks retrieved and recalled are different.',
description: t('datasetCreation.stepTwo.qaTip'),
}
const optionMap: Record<ChunkStructureEnum, Option> = {

View File

@ -123,6 +123,7 @@ const translation = {
paragraphTip: 'This mode splits the text in to paragraphs based on delimiters and the maximum chunk length, using the split text as the parent chunk for retrieval.',
fullDoc: 'Full Doc',
fullDocTip: 'The entire document is used as the parent chunk and retrieved directly. Please note that for performance reasons, text exceeding 10000 tokens will be automatically truncated.',
qaTip: 'When using structured Q&A data, you can create documents that pair questions with answers. These documents are indexed based on the question portion, allowing the system to retrieve relevant answers based on query similarity.',
separator: 'Delimiter',
separatorTip: 'A delimiter is the character used to separate text. \\n\\n and \\n are commonly used delimiters for separating paragraphs and lines. Combined with commas (\\n\\n,\\n), paragraphs will be segmented by lines when exceeding the maximum chunk length. You can also use special delimiters defined by yourself (e.g. ***).',
separatorPlaceholder: '\\n\\n for paragraphs; \\n for lines',

View File

@ -45,6 +45,7 @@ const translation = {
errorTip: 'Failed to export pipeline DSL',
},
details: {
createdBy: 'By {{author}}',
structure: 'Structure',
structureTooltip: 'Chunk Structure determines how documents are split and indexed—offering General, Parent-Child, and Q&A modes—and is unique to each knowledge base.',
},

View File

@ -123,6 +123,7 @@ const translation = {
paragraphTip: '此模式根据分隔符和最大块长度将文本拆分为段落,使用拆分文本作为检索的父块',
fullDoc: '全文',
fullDocTip: '整个文档用作父块并直接检索。请注意,出于性能原因,超过 10000 个标记的文本将被自动截断。',
qaTip: '使用 Q&A 模式时,块将被拆分为问题和答案对。检索时将使用问题部分进行检索,答案部分将作为上下文返回。',
separator: '分段标识符',
separatorTip: '分隔符是用于分隔文本的字符。\\n\\n 和 \\n 是常用于分隔段落和行的分隔符。用逗号连接分隔符(\\n\\n,\\n当段落超过最大块长度时会按行进行分割。你也可以使用自定义的特殊分隔符例如 ***)。',
separatorPlaceholder: '\\n\\n 用于分段;\\n 用于分行',

View File

@ -45,6 +45,7 @@ const translation = {
errorTip: '导出流水线 DSL 失败',
},
details: {
createdBy: '由 {{author}} 创建',
structure: '文档结构',
structureTooltip: '文档结构决定了文档的拆分和索引方式Dify 提供了通用、父子和问答模式,每个知识库的文档结构是唯一的。',
},

View File

@ -4,7 +4,7 @@ import type { Tag } from '@/app/components/base/tag-management/constant'
import type { IndexingType } from '@/app/components/datasets/create/step-two'
import type { MetadataFilteringVariableType } from '@/app/components/workflow/nodes/knowledge-retrieval/types'
import type { MetadataItemWithValue } from '@/app/components/datasets/metadata/types'
import { ExternalKnowledgeBase, General, Graph, ParentChild, Qa } from '@/app/components/base/icons/src/public/knowledge/dataset-card'
import { ExternalKnowledgeBase, General, ParentChild, Qa } from '@/app/components/base/icons/src/public/knowledge/dataset-card'
import { GeneralChunk, ParentChildChunk, QuestionAndAnswer } from '@/app/components/base/icons/src/vender/knowledge'
export enum DataSourceType {
@ -23,7 +23,7 @@ export enum ChunkingMode {
text = 'text_model', // General text
qa = 'qa_model', // General QA
parentChild = 'hierarchical_model', // Parent-Child
graph = 'graph', // Graph
// graph = 'graph', // todo: Graph RAG
}
export type MetadataInDoc = {
@ -720,7 +720,7 @@ export const DOC_FORM_ICON_WITH_BG: Record<ChunkingMode | 'external', React.Comp
[ChunkingMode.text]: General,
[ChunkingMode.qa]: Qa,
[ChunkingMode.parentChild]: ParentChild,
[ChunkingMode.graph]: Graph,
// [ChunkingMode.graph]: Graph, // todo: Graph RAG
external: ExternalKnowledgeBase,
}
@ -734,7 +734,7 @@ export const DOC_FORM_TEXT: Record<ChunkingMode, string> = {
[ChunkingMode.text]: 'general',
[ChunkingMode.qa]: 'qa',
[ChunkingMode.parentChild]: 'parentChild',
[ChunkingMode.graph]: 'graph',
// [ChunkingMode.graph]: 'graph', // todo: Graph RAG
}
export type CreateDatasetReq = {

View File

@ -29,24 +29,24 @@ export type PipelineTemplateListResponse = {
pipeline_templates: PipelineTemplate[]
}
export type PipelineTemplateByIdRequest = {
template_id: string
type: 'built-in' | 'customized'
}
export type PipelineTemplateByIdResponse = {
id: string
name: string
icon: IconInfo
icon_info: IconInfo
description: string
author: string // todo: TBD
structure: string // todo: TBD
export_data: {
workflow: {
graph: {
nodes: Node[]
edges: Edge[]
viewport: Viewport
}
environment_variables?: EnvironmentVariable[]
rag_pipeline_variables?: RAGPipelineVariables
}
chunk_structure: ChunkingMode
export_data: string // DSL content
graph: {
nodes: Node[]
edges: Edge[]
viewport: Viewport
}
created_by: string
}
export type CreateFormData = {

View File

@ -16,6 +16,7 @@ import type {
PipelinePreProcessingParamsResponse,
PipelineProcessingParamsRequest,
PipelineProcessingParamsResponse,
PipelineTemplateByIdRequest,
PipelineTemplateByIdResponse,
PipelineTemplateListParams,
PipelineTemplateListResponse,
@ -42,11 +43,16 @@ export const usePipelineTemplateList = (params: PipelineTemplateListParams) => {
})
}
export const usePipelineTemplateById = (templateId: string, type: string, enabled: boolean) => {
export const usePipelineTemplateById = (params: PipelineTemplateByIdRequest, enabled: boolean) => {
const { template_id, type } = params
return useQuery<PipelineTemplateByIdResponse>({
queryKey: [NAME_SPACE, 'template', templateId],
queryKey: [NAME_SPACE, 'template', template_id],
queryFn: () => {
return get<PipelineTemplateByIdResponse>(`/rag/pipeline/templates/${templateId}?type=${type}`)
return get<PipelineTemplateByIdResponse>(`/rag/pipeline/templates/${template_id}`, {
params: {
type,
},
})
},
enabled,
})