From f995436eec6ae16afebeeb46a3fb17770fb16301 Mon Sep 17 00:00:00 2001 From: twwu Date: Wed, 11 Jun 2025 16:38:42 +0800 Subject: [PATCH] feat: implement chunk structure card and related hooks for dataset creation; update translations and refactor pipeline template fetching --- .../datasets/create-from-pipeline/header.tsx | 24 +++---- .../details/chunk-structure-card.tsx | 66 +++++++++++++++++++ .../list/template-card/details/hooks.tsx | 36 ++++++++++ .../{details.tsx => details/index.tsx} | 28 +++++--- .../list/template-card/details/types.ts | 15 +++++ .../list/template-card/index.tsx | 5 +- .../settings/chunk-structure/hooks.tsx | 9 ++- .../components/chunk-structure/hooks.tsx | 2 +- web/i18n/en-US/dataset-creation.ts | 1 + web/i18n/en-US/dataset-pipeline.ts | 1 + web/i18n/zh-Hans/dataset-creation.ts | 1 + web/i18n/zh-Hans/dataset-pipeline.ts | 1 + web/models/datasets.ts | 8 +-- web/models/pipeline.ts | 26 ++++---- web/service/use-pipeline.ts | 12 +++- 15 files changed, 190 insertions(+), 45 deletions(-) create mode 100644 web/app/components/datasets/create-from-pipeline/list/template-card/details/chunk-structure-card.tsx create mode 100644 web/app/components/datasets/create-from-pipeline/list/template-card/details/hooks.tsx rename web/app/components/datasets/create-from-pipeline/list/template-card/{details.tsx => details/index.tsx} (82%) create mode 100644 web/app/components/datasets/create-from-pipeline/list/template-card/details/types.ts diff --git a/web/app/components/datasets/create-from-pipeline/header.tsx b/web/app/components/datasets/create-from-pipeline/header.tsx index 278b7c76e6..aded84e836 100644 --- a/web/app/components/datasets/create-from-pipeline/header.tsx +++ b/web/app/components/datasets/create-from-pipeline/header.tsx @@ -8,19 +8,21 @@ const Header = () => { const { t } = useTranslation() return ( - +
{t('datasetPipeline.creation.title')} - - + + +
) } diff --git a/web/app/components/datasets/create-from-pipeline/list/template-card/details/chunk-structure-card.tsx b/web/app/components/datasets/create-from-pipeline/list/template-card/details/chunk-structure-card.tsx new file mode 100644 index 0000000000..f3ed24c8f3 --- /dev/null +++ b/web/app/components/datasets/create-from-pipeline/list/template-card/details/chunk-structure-card.tsx @@ -0,0 +1,66 @@ +import React from 'react' +import cn from '@/utils/classnames' +import type { Option } from './types' +import { EffectColor } from './types' + +const HEADER_EFFECT_MAP: Record = { + [EffectColor.indigo]: 'bg-util-colors-indigo-indigo-600 opacity-80', + [EffectColor.blueLight]: 'bg-util-colors-blue-light-blue-light-500 opacity-80', + [EffectColor.green]: 'bg-util-colors-teal-teal-600 opacity-80', + [EffectColor.none]: '', +} + +const IconBackgroundColorMap: Record = { + [EffectColor.indigo]: 'bg-components-icon-bg-indigo-solid', + [EffectColor.blueLight]: 'bg-components-icon-bg-blue-light-solid', + [EffectColor.green]: 'bg-components-icon-bg-teal-solid', + [EffectColor.none]: '', +} + +type ChunkStructureCardProps = { + className?: string +} & Option + +const ChunkStructureCard = ({ + className, + icon, + title, + description, + effectColor, +}: ChunkStructureCardProps) => { + return ( +
+
+
+
+ {icon} +
+
+
+
+ + {title} + +
+ { + description && ( +
+ {description} +
+ ) + } +
+
+ ) +} + +export default React.memo(ChunkStructureCard) as typeof ChunkStructureCard diff --git a/web/app/components/datasets/create-from-pipeline/list/template-card/details/hooks.tsx b/web/app/components/datasets/create-from-pipeline/list/template-card/details/hooks.tsx new file mode 100644 index 0000000000..8b8e2fd121 --- /dev/null +++ b/web/app/components/datasets/create-from-pipeline/list/template-card/details/hooks.tsx @@ -0,0 +1,36 @@ +import { GeneralChunk, ParentChildChunk, QuestionAndAnswer } from '@/app/components/base/icons/src/vender/knowledge' +import { useTranslation } from 'react-i18next' +import { EffectColor, type Option } from './types' +import { ChunkingMode } from '@/models/datasets' + +export const useChunkStructureConfig = () => { + const { t } = useTranslation() + + const GeneralOption: Option = { + icon: , + title: 'General', + description: t('datasetCreation.stepTwo.generalTip'), + effectColor: EffectColor.indigo, + } + const ParentChildOption: Option = { + icon: , + title: 'Parent-Child', + description: t('datasetCreation.stepTwo.parentChildTip'), + effectColor: EffectColor.blueLight, + } + const QuestionAnswerOption: Option = { + icon: , + title: 'Q&A', + description: t('datasetCreation.stepTwo.qaTip'), + effectColor: EffectColor.green, + + } + + const chunkStructureConfig: Record = { + [ChunkingMode.text]: GeneralOption, + [ChunkingMode.parentChild]: ParentChildOption, + [ChunkingMode.qa]: QuestionAnswerOption, + } + + return chunkStructureConfig +} diff --git a/web/app/components/datasets/create-from-pipeline/list/template-card/details.tsx b/web/app/components/datasets/create-from-pipeline/list/template-card/details/index.tsx similarity index 82% rename from web/app/components/datasets/create-from-pipeline/list/template-card/details.tsx rename to web/app/components/datasets/create-from-pipeline/list/template-card/details/index.tsx index edc3bc9f0e..8e1de45073 100644 --- a/web/app/components/datasets/create-from-pipeline/list/template-card/details.tsx +++ b/web/app/components/datasets/create-from-pipeline/list/template-card/details/index.tsx @@ -1,4 +1,4 @@ -import React from 'react' +import React, { useMemo } from 'react' import AppIcon from '@/app/components/base/app-icon' import { usePipelineTemplateById } from '@/service/use-pipeline' import type { AppIconType } from '@/types/app' @@ -7,6 +7,8 @@ import Button from '@/app/components/base/button' import { useTranslation } from 'react-i18next' import Tooltip from '@/app/components/base/tooltip' import Loading from '@/app/components/base/loading' +import { useChunkStructureConfig } from './hooks' +import ChunkStructureCard from './chunk-structure-card' import WorkflowPreview from '@/app/components/workflow/workflow-preview' type DetailsProps = { @@ -23,16 +25,22 @@ const Details = ({ onClose, }: DetailsProps) => { const { t } = useTranslation() - const { data: pipelineTemplateInfo } = usePipelineTemplateById(id, type, true) - const appIcon = React.useMemo(() => { + const { data: pipelineTemplateInfo } = usePipelineTemplateById({ + template_id: id, + type, + }, true) + + const appIcon = useMemo(() => { if (!pipelineTemplateInfo) return { type: 'emoji', icon: '📙', background: '#FFF4ED' } - const iconInfo = pipelineTemplateInfo.icon + const iconInfo = pipelineTemplateInfo.icon_info return iconInfo.icon_type === 'image' ? { type: 'image', url: iconInfo.icon_url || '', fileId: iconInfo.icon || '' } : { type: 'icon', icon: iconInfo.icon || '', background: iconInfo.icon_background || '' } }, [pipelineTemplateInfo]) + const chunkStructureConfig = useChunkStructureConfig() + if (!pipelineTemplateInfo) { return ( @@ -42,9 +50,7 @@ const Details = ({ return (
- +
@@ -86,14 +94,16 @@ const Details = ({
-
+
{t('datasetPipeline.details.structure')}
+
diff --git a/web/app/components/datasets/create-from-pipeline/list/template-card/details/types.ts b/web/app/components/datasets/create-from-pipeline/list/template-card/details/types.ts new file mode 100644 index 0000000000..ade8068510 --- /dev/null +++ b/web/app/components/datasets/create-from-pipeline/list/template-card/details/types.ts @@ -0,0 +1,15 @@ +import type { ReactNode } from 'react' + +export enum EffectColor { + indigo = 'indigo', + blueLight = 'blue-light', + green = 'green', + none = 'none', +} + +export type Option = { + icon: ReactNode + title: string + description?: string + effectColor: EffectColor +} diff --git a/web/app/components/datasets/create-from-pipeline/list/template-card/index.tsx b/web/app/components/datasets/create-from-pipeline/list/template-card/index.tsx index c5d88237c8..74bbdded9d 100644 --- a/web/app/components/datasets/create-from-pipeline/list/template-card/index.tsx +++ b/web/app/components/datasets/create-from-pipeline/list/template-card/index.tsx @@ -40,7 +40,10 @@ const TemplateCard = ({ const [showDetailModal, setShowDetailModal] = useState(false) const [showCreateModal, setShowCreateModal] = useState(false) - const { refetch: getPipelineTemplateInfo } = usePipelineTemplateById(pipeline.id, type, false) + const { refetch: getPipelineTemplateInfo } = usePipelineTemplateById({ + template_id: pipeline.id, + type, + }, false) const { mutateAsync: createEmptyDataset } = useCreatePipelineDataset() const { handleCheckPluginDependencies } = usePluginDependencies() diff --git a/web/app/components/datasets/settings/chunk-structure/hooks.tsx b/web/app/components/datasets/settings/chunk-structure/hooks.tsx index 6ac75caee3..8be66c7f98 100644 --- a/web/app/components/datasets/settings/chunk-structure/hooks.tsx +++ b/web/app/components/datasets/settings/chunk-structure/hooks.tsx @@ -5,14 +5,17 @@ import { } from '@/app/components/base/icons/src/vender/knowledge' import { EffectColor, type Option } from './types' import { ChunkingMode } from '@/models/datasets' +import { useTranslation } from 'react-i18next' export const useChunkStructure = () => { + const { t } = useTranslation() + const GeneralOption: Option = { id: ChunkingMode.text, icon: , iconActiveColor: 'text-util-colors-indigo-indigo-600', title: 'General', - description: 'General text chunking mode, the chunks retrieved and recalled are the same.', + description: t('datasetCreation.stepTwo.generalTip'), effectColor: EffectColor.indigo, showEffectColor: true, } @@ -21,7 +24,7 @@ export const useChunkStructure = () => { icon: , iconActiveColor: 'text-util-colors-blue-light-blue-light-500', title: 'Parent-Child', - description: 'When using the parent-child mode, the child-chunk is used for retrieval and the parent-chunk is used for recall as context.', + description: t('datasetCreation.stepTwo.parentChildTip'), effectColor: EffectColor.blueLight, showEffectColor: true, } @@ -29,7 +32,7 @@ export const useChunkStructure = () => { id: ChunkingMode.qa, icon: , title: 'Q&A', - description: 'When using structured Q&A data, you can create documents that pair questions with answers. These documents are indexed based on the question portion, allowing the system to retrieve relevant answers based on query similarity', + description: t('datasetCreation.stepTwo.qaTip'), } const options = [ diff --git a/web/app/components/workflow/nodes/knowledge-base/components/chunk-structure/hooks.tsx b/web/app/components/workflow/nodes/knowledge-base/components/chunk-structure/hooks.tsx index cfb8210a86..ab6e586ad4 100644 --- a/web/app/components/workflow/nodes/knowledge-base/components/chunk-structure/hooks.tsx +++ b/web/app/components/workflow/nodes/knowledge-base/components/chunk-structure/hooks.tsx @@ -41,7 +41,7 @@ export const useChunkStructure = () => { id: ChunkStructureEnum.question_answer, icon: , title: 'Question-Answer', - description: 'Question-answer text chunking mode, the chunks retrieved and recalled are different.', + description: t('datasetCreation.stepTwo.qaTip'), } const optionMap: Record = { diff --git a/web/i18n/en-US/dataset-creation.ts b/web/i18n/en-US/dataset-creation.ts index dd1c6657dc..7cc1bff8f3 100644 --- a/web/i18n/en-US/dataset-creation.ts +++ b/web/i18n/en-US/dataset-creation.ts @@ -123,6 +123,7 @@ const translation = { paragraphTip: 'This mode splits the text in to paragraphs based on delimiters and the maximum chunk length, using the split text as the parent chunk for retrieval.', fullDoc: 'Full Doc', fullDocTip: 'The entire document is used as the parent chunk and retrieved directly. Please note that for performance reasons, text exceeding 10000 tokens will be automatically truncated.', + qaTip: 'When using structured Q&A data, you can create documents that pair questions with answers. These documents are indexed based on the question portion, allowing the system to retrieve relevant answers based on query similarity.', separator: 'Delimiter', separatorTip: 'A delimiter is the character used to separate text. \\n\\n and \\n are commonly used delimiters for separating paragraphs and lines. Combined with commas (\\n\\n,\\n), paragraphs will be segmented by lines when exceeding the maximum chunk length. You can also use special delimiters defined by yourself (e.g. ***).', separatorPlaceholder: '\\n\\n for paragraphs; \\n for lines', diff --git a/web/i18n/en-US/dataset-pipeline.ts b/web/i18n/en-US/dataset-pipeline.ts index 9e1aafb45e..01fe05e007 100644 --- a/web/i18n/en-US/dataset-pipeline.ts +++ b/web/i18n/en-US/dataset-pipeline.ts @@ -45,6 +45,7 @@ const translation = { errorTip: 'Failed to export pipeline DSL', }, details: { + createdBy: 'By {{author}}', structure: 'Structure', structureTooltip: 'Chunk Structure determines how documents are split and indexed—offering General, Parent-Child, and Q&A modes—and is unique to each knowledge base.', }, diff --git a/web/i18n/zh-Hans/dataset-creation.ts b/web/i18n/zh-Hans/dataset-creation.ts index 8ea3163217..1bbfb64c2e 100644 --- a/web/i18n/zh-Hans/dataset-creation.ts +++ b/web/i18n/zh-Hans/dataset-creation.ts @@ -123,6 +123,7 @@ const translation = { paragraphTip: '此模式根据分隔符和最大块长度将文本拆分为段落,使用拆分文本作为检索的父块', fullDoc: '全文', fullDocTip: '整个文档用作父块并直接检索。请注意,出于性能原因,超过 10000 个标记的文本将被自动截断。', + qaTip: '使用 Q&A 模式时,块将被拆分为问题和答案对。检索时将使用问题部分进行检索,答案部分将作为上下文返回。', separator: '分段标识符', separatorTip: '分隔符是用于分隔文本的字符。\\n\\n 和 \\n 是常用于分隔段落和行的分隔符。用逗号连接分隔符(\\n\\n,\\n),当段落超过最大块长度时,会按行进行分割。你也可以使用自定义的特殊分隔符(例如 ***)。', separatorPlaceholder: '\\n\\n 用于分段;\\n 用于分行', diff --git a/web/i18n/zh-Hans/dataset-pipeline.ts b/web/i18n/zh-Hans/dataset-pipeline.ts index 912edbe5df..12181bed30 100644 --- a/web/i18n/zh-Hans/dataset-pipeline.ts +++ b/web/i18n/zh-Hans/dataset-pipeline.ts @@ -45,6 +45,7 @@ const translation = { errorTip: '导出流水线 DSL 失败', }, details: { + createdBy: '由 {{author}} 创建', structure: '文档结构', structureTooltip: '文档结构决定了文档的拆分和索引方式,Dify 提供了通用、父子和问答模式,每个知识库的文档结构是唯一的。', }, diff --git a/web/models/datasets.ts b/web/models/datasets.ts index 965eb8a709..9102a73a5d 100644 --- a/web/models/datasets.ts +++ b/web/models/datasets.ts @@ -4,7 +4,7 @@ import type { Tag } from '@/app/components/base/tag-management/constant' import type { IndexingType } from '@/app/components/datasets/create/step-two' import type { MetadataFilteringVariableType } from '@/app/components/workflow/nodes/knowledge-retrieval/types' import type { MetadataItemWithValue } from '@/app/components/datasets/metadata/types' -import { ExternalKnowledgeBase, General, Graph, ParentChild, Qa } from '@/app/components/base/icons/src/public/knowledge/dataset-card' +import { ExternalKnowledgeBase, General, ParentChild, Qa } from '@/app/components/base/icons/src/public/knowledge/dataset-card' import { GeneralChunk, ParentChildChunk, QuestionAndAnswer } from '@/app/components/base/icons/src/vender/knowledge' export enum DataSourceType { @@ -23,7 +23,7 @@ export enum ChunkingMode { text = 'text_model', // General text qa = 'qa_model', // General QA parentChild = 'hierarchical_model', // Parent-Child - graph = 'graph', // Graph + // graph = 'graph', // todo: Graph RAG } export type MetadataInDoc = { @@ -720,7 +720,7 @@ export const DOC_FORM_ICON_WITH_BG: Record = { [ChunkingMode.text]: 'general', [ChunkingMode.qa]: 'qa', [ChunkingMode.parentChild]: 'parentChild', - [ChunkingMode.graph]: 'graph', + // [ChunkingMode.graph]: 'graph', // todo: Graph RAG } export type CreateDatasetReq = { diff --git a/web/models/pipeline.ts b/web/models/pipeline.ts index 1eb8d060e4..1525150548 100644 --- a/web/models/pipeline.ts +++ b/web/models/pipeline.ts @@ -29,24 +29,24 @@ export type PipelineTemplateListResponse = { pipeline_templates: PipelineTemplate[] } +export type PipelineTemplateByIdRequest = { + template_id: string + type: 'built-in' | 'customized' +} + export type PipelineTemplateByIdResponse = { id: string name: string - icon: IconInfo + icon_info: IconInfo description: string - author: string // todo: TBD - structure: string // todo: TBD - export_data: { - workflow: { - graph: { - nodes: Node[] - edges: Edge[] - viewport: Viewport - } - environment_variables?: EnvironmentVariable[] - rag_pipeline_variables?: RAGPipelineVariables - } + chunk_structure: ChunkingMode + export_data: string // DSL content + graph: { + nodes: Node[] + edges: Edge[] + viewport: Viewport } + created_by: string } export type CreateFormData = { diff --git a/web/service/use-pipeline.ts b/web/service/use-pipeline.ts index 324662cb00..a141a04ca9 100644 --- a/web/service/use-pipeline.ts +++ b/web/service/use-pipeline.ts @@ -16,6 +16,7 @@ import type { PipelinePreProcessingParamsResponse, PipelineProcessingParamsRequest, PipelineProcessingParamsResponse, + PipelineTemplateByIdRequest, PipelineTemplateByIdResponse, PipelineTemplateListParams, PipelineTemplateListResponse, @@ -42,11 +43,16 @@ export const usePipelineTemplateList = (params: PipelineTemplateListParams) => { }) } -export const usePipelineTemplateById = (templateId: string, type: string, enabled: boolean) => { +export const usePipelineTemplateById = (params: PipelineTemplateByIdRequest, enabled: boolean) => { + const { template_id, type } = params return useQuery({ - queryKey: [NAME_SPACE, 'template', templateId], + queryKey: [NAME_SPACE, 'template', template_id], queryFn: () => { - return get(`/rag/pipeline/templates/${templateId}?type=${type}`) + return get(`/rag/pipeline/templates/${template_id}`, { + params: { + type, + }, + }) }, enabled, })