dify/api/services/rag_pipeline/transform/file_parent_child.json
2025-07-16 01:50:37 +08:00

1 line
17 KiB
JSON

{"nodes": [{"data": {"chunk_structure": "hierarchical_model", "embedding_model": "text-embedding-ada-002", "embedding_model_provider": "langgenius/openai/openai", "index_chunk_variable_selector": ["1752575473519", "result"], "indexing_technique": "high_quality", "keyword_number": 10, "retrieval_model": {"score_threshold": 0.5, "score_threshold_enabled": false, "search_method": "semantic_search", "top_k": 3, "vector_setting": {"embedding_model_name": "text-embedding-ada-002", "embedding_provider_name": "langgenius/openai/openai"}}, "selected": false, "title": "\u77e5\u8bc6\u5e93", "type": "knowledge-index"}, "height": 114, "id": "1752477924228", "position": {"x": 994.3774545394483, "y": 281.3910724383104}, "positionAbsolute": {"x": 994.3774545394483, "y": 281.3910724383104}, "selected": false, "sourcePosition": "right", "targetPosition": "left", "type": "custom", "width": 242}, {"data": {"datasource_configurations": {}, "datasource_label": "File", "datasource_name": "upload-file", "datasource_parameters": {}, "fileExtensions": ["txt", "markdown", "mdx", "pdf", "html", "xlsx", "xls", "vtt", "properties", "doc", "docx", "csv", "eml", "msg", "pptx", "xml", "epub", "ppt", "md"], "plugin_id": "langgenius/file", "provider_name": "file", "provider_type": "local_file", "selected": false, "title": "File", "type": "datasource"}, "height": 52, "id": "1752479895761", "position": {"x": -839.8603427660498, "y": 251.3910724383104}, "positionAbsolute": {"x": -839.8603427660498, "y": 251.3910724383104}, "selected": false, "sourcePosition": "right", "targetPosition": "left", "type": "custom", "width": 242}, {"data": {"is_team_authorization": true, "output_schema": {"properties": {"documents": {"description": "the documents extracted from the file", "items": {"type": "object"}, "type": "array"}, "images": {"description": "The images extracted from the file", "items": {"type": "object"}, "type": "array"}}, "type": "object"}, "paramSchemas": [{"auto_generate": null, "default": null, "form": "llm", "human_description": {"en_US": "the file to be parsed(support pdf, ppt, pptx, doc, docx, png, jpg, jpeg)", "ja_JP": "the file to be parsed(support pdf, ppt, pptx, doc, docx, png, jpg, jpeg)", "pt_BR": "o arquivo a ser analisado (suporta pdf, ppt, pptx, doc, docx, png, jpg, jpeg)", "zh_Hans": "\u7528\u4e8e\u89e3\u6790\u7684\u6587\u4ef6(\u652f\u6301 pdf, ppt, pptx, doc, docx, png, jpg, jpeg)"}, "label": {"en_US": "file", "ja_JP": "file", "pt_BR": "file", "zh_Hans": "file"}, "llm_description": "the file to be parsed (support pdf, ppt, pptx, doc, docx, png, jpg, jpeg)", "max": null, "min": null, "name": "file", "options": [], "placeholder": null, "precision": null, "required": true, "scope": null, "template": null, "type": "file"}], "params": {"file": ""}, "provider_id": "langgenius/dify_extractor/dify_extractor", "provider_name": "langgenius/dify_extractor/dify_extractor", "provider_type": "builtin", "selected": false, "title": "Dify\u6587\u672c\u63d0\u53d6\u5668", "tool_configurations": {}, "tool_description": "Dify Extractor", "tool_label": "Dify\u6587\u672c\u63d0\u53d6\u5668", "tool_name": "dify_extractor", "tool_parameters": {"file": {"type": "variable", "value": ["1752479895761", "file"]}}, "type": "tool"}, "height": 52, "id": "1752480460682", "position": {"x": -108.28652292656551, "y": 281.3910724383104}, "positionAbsolute": {"x": -108.28652292656551, "y": 281.3910724383104}, "selected": false, "sourcePosition": "right", "targetPosition": "left", "type": "custom", "width": 242}, {"data": {"is_array_file": false, "selected": false, "title": "\u6587\u6863\u63d0\u53d6\u5668", "type": "document-extractor", "variable_selector": ["1752479895761", "file"]}, "height": 90, "id": "1752481112180", "position": {"x": -108.28652292656551, "y": 390.6576481692478}, "positionAbsolute": {"x": -108.28652292656551, "y": 390.6576481692478}, "selected": false, "sourcePosition": "right", "targetPosition": "left", "type": "custom", "width": 242}, {"data": {"cases": [{"id": "24e47cad-f1e2-4f74-9884-3f49d5bb37b7", "case_id": "24e47cad-f1e2-4f74-9884-3f49d5bb37b7", "logical_operator": "or", "conditions": [{"comparison_operator": "is", "id": "9da88d93-3ff6-463f-abfd-6bcafbf2554d", "value": ".xlsx", "varType": "file", "variable_selector": ["1752479895761", "file", "extension"]}, {"comparison_operator": "is", "id": "d0e88f5e-dfe3-4bae-af0c-dbec267500de", "value": ".xls", "varType": "file", "variable_selector": ["1752479895761", "file", "extension"]}, {"comparison_operator": "is", "id": "a957e91e-1ed7-4c6b-9c80-2f0948858f1d", "value": ".md", "varType": "file", "variable_selector": ["1752479895761", "file", "extension"]}, {"comparison_operator": "is", "id": "870c3c39-8d3f-474a-ab8b-9c0ccf53db73", "value": ".markdown", "varType": "file", "variable_selector": ["1752479895761", "file", "extension"]}, {"comparison_operator": "is", "id": "f9541513-1e71-4dc1-9db5-35dc84a39e3c", "value": ".mdx", "varType": "file", "variable_selector": ["1752479895761", "file", "extension"]}, {"comparison_operator": "is", "id": "4c7f455b-ac20-40ca-9495-6cc44ffcb35d", "value": ".html", "varType": "file", "variable_selector": ["1752479895761", "file", "extension"]}, {"comparison_operator": "is", "id": "2e12d9c7-8057-4a09-8851-f9fd1d0718d1", "value": ".htm", "varType": "file", "variable_selector": ["1752479895761", "file", "extension"]}, {"comparison_operator": "is", "id": "73a995a9-d8b9-4aef-89f7-306e2ddcbce2", "value": ".docx", "varType": "file", "variable_selector": ["1752479895761", "file", "extension"]}, {"comparison_operator": "is", "id": "8a2e8772-0426-458b-a1f9-9eaaec0f27c8", "value": ".csv", "varType": "file", "variable_selector": ["1752479895761", "file", "extension"]}, {"comparison_operator": "is", "id": "aa2cb6b6-a2fc-462a-a9f5-c9c3f33a1602", "value": ".txt", "varType": "file", "variable_selector": ["1752479895761", "file", "extension"]}]}], "selected": false, "title": "\u6761\u4ef6\u5206\u652f", "type": "if-else"}, "height": 358, "id": "1752481129417", "position": {"x": -512.2335487893622, "y": 251.3910724383104}, "positionAbsolute": {"x": -512.2335487893622, "y": 251.3910724383104}, "selected": false, "sourcePosition": "right", "targetPosition": "left", "type": "custom", "width": 242}, {"data": {"advanced_settings": {"group_enabled": false, "groups": [{"groupId": "f4cf07b4-914d-4544-8ef8-0c5d9e4f21a7", "group_name": "Group1", "output_type": "string", "variables": [["1752481112180", "text"], ["1752480460682", "text"]]}]}, "output_type": "string", "selected": false, "title": "\u53d8\u91cf\u805a\u5408\u5668", "type": "variable-aggregator", "variables": [["1752481112180", "text"], ["1752480460682", "text"]]}, "height": 129, "id": "1752482022496", "position": {"x": 319.441649575055, "y": 281.3910724383104}, "positionAbsolute": {"x": 319.441649575055, "y": 281.3910724383104}, "selected": false, "sourcePosition": "right", "targetPosition": "left", "type": "custom", "width": 242}, {"id": "1752575473519", "type": "custom", "data": {"tool_parameters": {"input_text": {"type": "mixed", "value": "{{#1752482022496.output#}}"}, "max_length": {"type": "variable", "value": ["rag", "shared", "max_chunk_length"]}, "separator": {"type": "mixed", "value": "{{#rag.shared.delimiter#}}"}, "subchunk_max_length": {"type": "variable", "value": ["rag", "shared", "child_max_chunk_length"]}, "subchunk_separator": {"type": "mixed", "value": "{{#rag.shared.child_delimiter#}}"}, "parent_mode": {"type": "variable", "value": ["rag", "shared", "parent_mode"]}, "remove_extra_spaces": {"type": "mixed", "value": "{{#rag.shared.replace_consecutive_spaces#}}"}, "remove_urls_emails": {"type": "mixed", "value": "{{#rag.shared.delete_urls_email#}}"}}, "tool_configurations": {}, "type": "tool", "title": "\u7236\u5b50\u5206\u5757\u5904\u7406\u5668", "provider_id": "langgenius/parent_child_chunk/parent_child_chunk", "provider_type": "builtin", "provider_name": "langgenius/parent_child_chunk/parent_child_chunk", "tool_name": "parent_child_chunk", "tool_label": "\u7236\u5b50\u5206\u5757\u5904\u7406\u5668", "tool_description": "\u5c06\u6587\u6863\u5904\u7406\u4e3a\u7236\u5b50\u5206\u5757\u7ed3\u6784", "is_team_authorization": true, "output_schema": {"properties": {"result": {"description": "Parent child chunks result", "items": {"type": "object"}, "type": "array"}}, "type": "object"}, "paramSchemas": [{"name": "input_text", "label": {"en_US": "Input text", "zh_Hans": "\u8f93\u5165\u6587\u672c", "pt_BR": "Input text", "ja_JP": "Input text"}, "placeholder": null, "scope": null, "auto_generate": null, "template": null, "required": true, "default": null, "min": null, "max": null, "precision": null, "options": [], "type": "string", "human_description": {"en_US": "The text you want to chunk.", "zh_Hans": "\u4f60\u60f3\u8981\u5206\u5757\u7684\u6587\u672c\u3002", "pt_BR": "The text you want to chunk.", "ja_JP": "The text you want to chunk."}, "form": "llm", "llm_description": "The text you want to chunk."}, {"name": "max_length", "label": {"en_US": "Maximum Length", "zh_Hans": "\u6700\u5927\u957f\u5ea6", "pt_BR": "Comprimento M\u00e1ximo", "ja_JP": "Maximum Length"}, "placeholder": null, "scope": null, "auto_generate": null, "template": null, "required": false, "default": 1024, "min": null, "max": null, "precision": null, "options": [], "type": "number", "human_description": {"en_US": "Maximum length for chunking", "zh_Hans": "\u7528\u4e8e\u5206\u5757\u7684\u6700\u5927\u957f\u5ea6", "pt_BR": "Comprimento m\u00e1ximo para divis\u00e3o", "ja_JP": "Maximum length for chunking"}, "form": "llm", "llm_description": "Maximum length allowed per chunk"}, {"name": "separator", "label": {"en_US": "Chunk Separator", "zh_Hans": "\u5206\u5757\u5206\u9694\u7b26", "pt_BR": "Separador de Divis\u00e3o", "ja_JP": "Chunk Separator"}, "placeholder": null, "scope": null, "auto_generate": null, "template": null, "required": false, "default": "\n\n", "min": null, "max": null, "precision": null, "options": [], "type": "string", "human_description": {"en_US": "Separator used for chunking", "zh_Hans": "\u7528\u4e8e\u5206\u5757\u7684\u5206\u9694\u7b26", "pt_BR": "Separador usado para divis\u00e3o", "ja_JP": "Separator used for chunking"}, "form": "llm", "llm_description": "The separator used to split chunks"}, {"name": "subchunk_max_length", "label": {"en_US": "Subchunk Maximum Length", "zh_Hans": "\u5b50\u5206\u5757\u6700\u5927\u957f\u5ea6", "pt_BR": "Comprimento M\u00e1ximo de Subdivis\u00e3o", "ja_JP": "Subchunk Maximum Length"}, "placeholder": null, "scope": null, "auto_generate": null, "template": null, "required": false, "default": 512, "min": null, "max": null, "precision": null, "options": [], "type": "number", "human_description": {"en_US": "Maximum length for subchunking", "zh_Hans": "\u7528\u4e8e\u5b50\u5206\u5757\u7684\u6700\u5927\u957f\u5ea6", "pt_BR": "Comprimento m\u00e1ximo para subdivis\u00e3o", "ja_JP": "Maximum length for subchunking"}, "form": "llm", "llm_description": "Maximum length allowed per subchunk"}, {"name": "subchunk_separator", "label": {"en_US": "Subchunk Separator", "zh_Hans": "\u5b50\u5206\u5757\u5206\u9694\u7b26", "pt_BR": "Separador de Subdivis\u00e3o", "ja_JP": "Subchunk Separator"}, "placeholder": null, "scope": null, "auto_generate": null, "template": null, "required": false, "default": ". ", "min": null, "max": null, "precision": null, "options": [], "type": "string", "human_description": {"en_US": "Separator used for subchunking", "zh_Hans": "\u7528\u4e8e\u5b50\u5206\u5757\u7684\u5206\u9694\u7b26", "pt_BR": "Separador usado para subdivis\u00e3o", "ja_JP": "Separator used for subchunking"}, "form": "llm", "llm_description": "The separator used to split subchunks"}, {"name": "parent_mode", "label": {"en_US": "Parent Mode", "zh_Hans": "\u7236\u5757\u6a21\u5f0f", "pt_BR": "Modo Pai", "ja_JP": "Parent Mode"}, "placeholder": null, "scope": null, "auto_generate": null, "template": null, "required": true, "default": "paragraph", "min": null, "max": null, "precision": null, "options": [{"value": "paragraph", "label": {"en_US": "Paragraph", "zh_Hans": "\u6bb5\u843d", "pt_BR": "Par\u00e1grafo", "ja_JP": "Paragraph"}, "icon": ""}, {"value": "full_doc", "label": {"en_US": "Full Document", "zh_Hans": "\u5168\u6587", "pt_BR": "Documento Completo", "ja_JP": "Full Document"}, "icon": ""}], "type": "select", "human_description": {"en_US": "Split text into paragraphs based on separator and maximum chunk length, using split text as parent block or entire document as parent block and directly retrieve.", "zh_Hans": "\u6839\u636e\u5206\u9694\u7b26\u548c\u6700\u5927\u5757\u957f\u5ea6\u5c06\u6587\u672c\u62c6\u5206\u4e3a\u6bb5\u843d\uff0c\u4f7f\u7528\u62c6\u5206\u6587\u672c\u4f5c\u4e3a\u68c0\u7d22\u7684\u7236\u5757\u6216\u6574\u4e2a\u6587\u6863\u7528\u4f5c\u7236\u5757\u5e76\u76f4\u63a5\u68c0\u7d22\u3002", "pt_BR": "Dividir texto em par\u00e1grafos com base no separador e no comprimento m\u00e1ximo do bloco, usando o texto dividido como bloco pai ou documento completo como bloco pai e diretamente recuper\u00e1-lo.", "ja_JP": "Split text into paragraphs based on separator and maximum chunk length, using split text as parent block or entire document as parent block and directly retrieve."}, "form": "llm", "llm_description": "Split text into paragraphs based on separator and maximum chunk length, using split text as parent block or entire document as parent block and directly retrieve."}, {"name": "remove_extra_spaces", "label": {"en_US": "Remove Extra Spaces", "zh_Hans": "\u79fb\u9664\u591a\u4f59\u7a7a\u683c", "pt_BR": "Remover Espa\u00e7os Extras", "ja_JP": "Remove Extra Spaces"}, "placeholder": null, "scope": null, "auto_generate": null, "template": null, "required": false, "default": 0, "min": null, "max": null, "precision": null, "options": [], "type": "boolean", "human_description": {"en_US": "Whether to remove extra spaces in the text", "zh_Hans": "\u662f\u5426\u79fb\u9664\u6587\u672c\u4e2d\u7684\u591a\u4f59\u7a7a\u683c", "pt_BR": "Se deve remover espa\u00e7os extras no texto", "ja_JP": "Whether to remove extra spaces in the text"}, "form": "llm", "llm_description": "Whether to remove extra spaces in the text"}, {"name": "remove_urls_emails", "label": {"en_US": "Remove URLs and Emails", "zh_Hans": "\u79fb\u9664URL\u548c\u7535\u5b50\u90ae\u4ef6\u5730\u5740", "pt_BR": "Remover URLs e E-mails", "ja_JP": "Remove URLs and Emails"}, "placeholder": null, "scope": null, "auto_generate": null, "template": null, "required": false, "default": 0, "min": null, "max": null, "precision": null, "options": [], "type": "boolean", "human_description": {"en_US": "Whether to remove URLs and emails in the text", "zh_Hans": "\u662f\u5426\u79fb\u9664\u6587\u672c\u4e2d\u7684URL\u548c\u7535\u5b50\u90ae\u4ef6\u5730\u5740", "pt_BR": "Se deve remover URLs e e-mails no texto", "ja_JP": "Whether to remove URLs and emails in the text"}, "form": "llm", "llm_description": "Whether to remove URLs and emails in the text"}], "params": {"input_text": "", "max_length": "", "separator": "", "subchunk_max_length": "", "subchunk_separator": "", "parent_mode": "", "remove_extra_spaces": "", "remove_urls_emails": ""}, "selected": false}, "position": {"x": 637.9241611063885, "y": 281.3910724383104}, "targetPosition": "left", "sourcePosition": "right", "positionAbsolute": {"x": 637.9241611063885, "y": 281.3910724383104}, "width": 242, "height": 52, "selected": true}], "edges": [{"data": {"isInIteration": false, "isInLoop": false, "sourceType": "datasource", "targetType": "if-else"}, "id": "1752479895761-source-1752481129417-target", "source": "1752479895761", "sourceHandle": "source", "target": "1752481129417", "targetHandle": "target", "type": "custom", "zIndex": 0}, {"data": {"isInLoop": false, "sourceType": "if-else", "targetType": "tool"}, "id": "1752481129417-24e47cad-f1e2-4f74-9884-3f49d5bb37b7-1752480460682-target", "source": "1752481129417", "sourceHandle": "24e47cad-f1e2-4f74-9884-3f49d5bb37b7", "target": "1752480460682", "targetHandle": "target", "type": "custom", "zIndex": 0}, {"data": {"isInLoop": false, "sourceType": "if-else", "targetType": "document-extractor"}, "id": "1752481129417-false-1752481112180-target", "source": "1752481129417", "sourceHandle": "false", "target": "1752481112180", "targetHandle": "target", "type": "custom", "zIndex": 0}, {"data": {"isInIteration": false, "isInLoop": false, "sourceType": "tool", "targetType": "variable-aggregator"}, "id": "1752480460682-source-1752482022496-target", "source": "1752480460682", "sourceHandle": "source", "target": "1752482022496", "targetHandle": "target", "type": "custom", "zIndex": 0}, {"data": {"isInLoop": false, "sourceType": "document-extractor", "targetType": "variable-aggregator"}, "id": "1752481112180-source-1752482022496-target", "source": "1752481112180", "sourceHandle": "source", "target": "1752482022496", "targetHandle": "target", "type": "custom", "zIndex": 0}, {"id": "1752482022496-source-1752575473519-target", "type": "custom", "source": "1752482022496", "sourceHandle": "source", "target": "1752575473519", "targetHandle": "target", "data": {"sourceType": "variable-aggregator", "targetType": "tool", "isInIteration": false, "isInLoop": false}, "zIndex": 0}, {"id": "1752575473519-source-1752477924228-target", "type": "custom", "source": "1752575473519", "target": "1752477924228", "sourceHandle": "source", "targetHandle": "target", "data": {"sourceType": "tool", "targetType": "knowledge-index", "isInLoop": false}, "zIndex": 0}], "viewport": {"x": 948.6766333808323, "y": -102.06757184183238, "zoom": 0.8375774577380971}}