From aaa5b0e2957126c1e3f8abd387ca3c21fcf4c751 Mon Sep 17 00:00:00 2001 From: jyong <718720800@qq.com> Date: Wed, 16 Jul 2025 18:05:40 +0800 Subject: [PATCH] r2 transform --- .../rag_pipeline/rag_pipeline_dsl_service.py | 24 +++++++++ .../rag_pipeline_transform_service.py | 54 +++++++++++++++++++ .../transform/file-general-economy.yml | 8 +-- .../transform/file-general-high-quality.yml | 8 +-- .../transform/file-parentchild.yml | 8 +-- .../transform/file_general_economy.json | 1 - .../transform/file_general_high_quality.json | 1 - .../transform/file_parent_child.json | 1 - .../transform/notion-general-economy.yml | 8 ++- .../transform/notion-general-high-quality.yml | 8 ++- .../transform/notion-parentchild.yml | 8 ++- .../transform/notion_general_economy.json | 1 - .../notion_general_high_quality.json | 1 - .../transform/notion_parent_child.json | 1 - .../transform/web_crawl_general_economy.json | 1 - .../web_crawl_general_high_quality.json | 1 - .../transform/web_crawl_parent_child.json | 1 - .../website-crawl-general-economy.yml | 12 ++++- .../website-crawl-general-high-quality.yml | 12 ++++- .../transform/website-crawl-parentchild.yml | 12 ++++- 20 files changed, 138 insertions(+), 33 deletions(-) delete mode 100644 api/services/rag_pipeline/transform/file_general_economy.json delete mode 100644 api/services/rag_pipeline/transform/file_general_high_quality.json delete mode 100644 api/services/rag_pipeline/transform/file_parent_child.json delete mode 100644 api/services/rag_pipeline/transform/notion_general_economy.json delete mode 100644 api/services/rag_pipeline/transform/notion_general_high_quality.json delete mode 100644 api/services/rag_pipeline/transform/notion_parent_child.json delete mode 100644 api/services/rag_pipeline/transform/web_crawl_general_economy.json delete mode 100644 api/services/rag_pipeline/transform/web_crawl_general_high_quality.json delete mode 100644 api/services/rag_pipeline/transform/web_crawl_parent_child.json diff --git a/api/services/rag_pipeline/rag_pipeline_dsl_service.py b/api/services/rag_pipeline/rag_pipeline_dsl_service.py index c130799a3d..d1b76f14fe 100644 --- a/api/services/rag_pipeline/rag_pipeline_dsl_service.py +++ b/api/services/rag_pipeline/rag_pipeline_dsl_service.py @@ -22,7 +22,9 @@ from sqlalchemy.orm import Session from core.helper import ssrf_proxy from core.model_runtime.utils.encoders import jsonable_encoder from core.plugin.entities.plugin import PluginDependency +from core.workflow.nodes.datasource.entities import DatasourceNodeData from core.workflow.nodes.enums import NodeType +from core.workflow.nodes.knowledge_index.entities import KnowledgeIndexNodeData from core.workflow.nodes.knowledge_retrieval.entities import KnowledgeRetrievalNodeData from core.workflow.nodes.llm.entities import LLMNodeData from core.workflow.nodes.parameter_extractor.entities import ParameterExtractorNodeData @@ -725,6 +727,10 @@ class RagPipelineDslService: dependencies.append( DependenciesAnalysisService.analyze_tool_dependency(tool_entity.provider_id), ) + case NodeType.DATASOURCE.value: + datasource_entity = DatasourceNodeData(**node["data"]) + if datasource_entity.provider_type != "local_file": + dependencies.append(datasource_entity.plugin_id) case NodeType.LLM.value: llm_entity = LLMNodeData(**node["data"]) dependencies.append( @@ -744,6 +750,24 @@ class RagPipelineDslService: parameter_extractor_entity.model.provider ), ) + case NodeType.KNOWLEDGE_INDEX.value: + knowledge_index_entity = KnowledgeConfiguration(**node["data"]) + if knowledge_index_entity.indexing_technique == "high_quality": + if knowledge_index_entity.embedding_model_provider: + dependencies.append( + DependenciesAnalysisService.analyze_model_provider_dependency( + knowledge_index_entity.embedding_model_provider + ), + ) + if knowledge_index_entity.retrieval_model.reranking_mode == "reranking_model": + if knowledge_index_entity.retrieval_model.reranking_enable: + if knowledge_index_entity.retrieval_model.reranking_model and knowledge_index_entity.retrieval_model.reranking_mode == "reranking_model": + if knowledge_index_entity.retrieval_model.reranking_model.reranking_provider_name: + dependencies.append( + DependenciesAnalysisService.analyze_model_provider_dependency( + knowledge_index_entity.retrieval_model.reranking_model.reranking_provider_name + ), + ) case NodeType.KNOWLEDGE_RETRIEVAL.value: knowledge_retrieval_entity = KnowledgeRetrievalNodeData(**node["data"]) if knowledge_retrieval_entity.retrieval_mode == "multiple": diff --git a/api/services/rag_pipeline/rag_pipeline_transform_service.py b/api/services/rag_pipeline/rag_pipeline_transform_service.py index 2b0221539f..40077ab0ef 100644 --- a/api/services/rag_pipeline/rag_pipeline_transform_service.py +++ b/api/services/rag_pipeline/rag_pipeline_transform_service.py @@ -7,6 +7,10 @@ import yaml from flask_login import current_user from constants import DOCUMENT_EXTENSIONS +from core.plugin.entities.plugin import PluginInstallationSource +from core.plugin.impl.datasource import PluginDatasourceManager +from core.plugin.impl.plugin import PluginInstaller +from core.tools.tool_manager import ToolManager from extensions.ext_database import db from factories import variable_factory from models.dataset import Dataset, Pipeline @@ -33,6 +37,8 @@ class RagPipelineTransformService: return retrieval_model = dataset.retrieval_model pipeline_yaml = self._get_transform_yaml(doc_form, datasource_type, indexing_technique) + # deal dependencies + self._deal_dependencies(pipeline_yaml, dataset.tenant_id) # Extract app data workflow_data = pipeline_yaml.get("workflow") graph = workflow_data.get("graph", {}) @@ -221,3 +227,51 @@ class RagPipelineTransformService: pipeline.workflow_id = published_workflow.id db.session.add(pipeline) return pipeline + + def _deal_dependencies(self, pipeline_yaml: dict, tenant_id: str): + installer_manager = PluginInstaller() + installed_plugins = installer_manager.list_plugins(tenant_id) + + datasource_manager = PluginDatasourceManager() + + tool_manager = ToolManager() + + installed_plugins_ids = [plugin.plugin_id for plugin in installed_plugins] + dependencies = pipeline_yaml.get("dependencies", []) + need_install_plugin_unique_identifiers = [] + for dependency in dependencies: + if dependency.get("type") == "marketplace": + plugin_unique_identifier = dependency.get("value", {}).get("plugin_unique_identifier") + plugin_id = plugin_unique_identifier.split(":")[0] + if plugin_id not in installed_plugins_ids: + if plugin_id == "langgenius/notion_datasource": + datasource = datasource_manager.fetch_datasource_provider(tenant_id, f"{plugin_id}/notion") + need_install_plugin_unique_identifiers.append(datasource.plugin_unique_identifier) + elif plugin_id == "langgenius/firecrawl_datasource": + datasource = datasource_manager.fetch_datasource_provider(tenant_id, f"{plugin_id}/firecrawl") + need_install_plugin_unique_identifiers.append(datasource.plugin_unique_identifier) + elif plugin_id == "langgenius/jina_datasource": + datasource = datasource_manager.fetch_datasource_provider(tenant_id, f"{plugin_id}/jina") + need_install_plugin_unique_identifiers.append(datasource.plugin_unique_identifier) + elif plugin_id == "langgenius/dify_extractor": + tool = tool_manager.get_plugin_provider(f"{plugin_id}/dify_extractor", tenant_id) + need_install_plugin_unique_identifiers.append(tool.plugin_unique_identifier) + elif plugin_id == "langgenius/general_chunk": + tool = tool_manager.get_plugin_provider(f"{plugin_id}/general_chunk", tenant_id) + need_install_plugin_unique_identifiers.append(tool.plugin_unique_identifier) + elif plugin_id == "langgenius/parent_child_chunk": + tool = tool_manager.get_plugin_provider(f"{plugin_id}/parent_child_chunk", tenant_id) + need_install_plugin_unique_identifiers.append(tool.plugin_unique_identifier) + dependency["value"]["current_identifier"] = plugin_unique_identifier + if need_install_plugin_unique_identifiers: + installer_manager.install_from_identifiers( + tenant_id, + need_install_plugin_unique_identifiers, + PluginInstallationSource.Marketplace, + metas=[ + { + "plugin_unique_identifier": identifier, + } + for identifier in need_install_plugin_unique_identifiers + ], + ) \ No newline at end of file diff --git a/api/services/rag_pipeline/transform/file-general-economy.yml b/api/services/rag_pipeline/transform/file-general-economy.yml index f88c0f3cdc..3cb455624c 100644 --- a/api/services/rag_pipeline/transform/file-general-economy.yml +++ b/api/services/rag_pipeline/transform/file-general-economy.yml @@ -1,12 +1,12 @@ dependencies: - current_identifier: null - type: package + type: marketplace value: - plugin_unique_identifier: langgenius/general_chunk:0.0.2@0856fa64f7b0dc937c982f12d45b3a1ad91ba8aacc0d28a1b436e6c94a77e298 + plugin_unique_identifier: langgenius/general_chunker:0.0.1@e3da408b7277866404c3f884d599261f9d0b9003ea4ef7eb3b64489bdf39d18b - current_identifier: null - type: package + type: marketplace value: - plugin_unique_identifier: langgenius/dify_extractor:0.0.4@0cb3f06230a377c4c037fa7b5e21f4f4e362e5f24a59ed7bf4950ff75e6f1e61 + plugin_unique_identifier: langgenius/dify_extractor:0.0.1@50103421d4e002f059b662d21ad2d7a1cf34869abdbe320299d7e382516ebb1c kind: rag_pipeline rag_pipeline: description: '' diff --git a/api/services/rag_pipeline/transform/file-general-high-quality.yml b/api/services/rag_pipeline/transform/file-general-high-quality.yml index 42174d1986..7ab98fec65 100644 --- a/api/services/rag_pipeline/transform/file-general-high-quality.yml +++ b/api/services/rag_pipeline/transform/file-general-high-quality.yml @@ -1,12 +1,12 @@ dependencies: - current_identifier: null - type: package + type: marketplace value: - plugin_unique_identifier: langgenius/general_chunk:0.0.2@0856fa64f7b0dc937c982f12d45b3a1ad91ba8aacc0d28a1b436e6c94a77e298 + plugin_unique_identifier: langgenius/qa_chunk:0.0.1@ef14ad7edce1d293ef52f14429a9acb39fa146a7b91d63a31cda905539908453 - current_identifier: null - type: package + type: marketplace value: - plugin_unique_identifier: langgenius/dify_extractor:0.0.4@0cb3f06230a377c4c037fa7b5e21f4f4e362e5f24a59ed7bf4950ff75e6f1e61 + plugin_unique_identifier: langgenius/dify_extractor:0.0.1@50103421d4e002f059b662d21ad2d7a1cf34869abdbe320299d7e382516ebb1c kind: rag_pipeline rag_pipeline: description: '' diff --git a/api/services/rag_pipeline/transform/file-parentchild.yml b/api/services/rag_pipeline/transform/file-parentchild.yml index 4135ab3aa4..414b8deca9 100644 --- a/api/services/rag_pipeline/transform/file-parentchild.yml +++ b/api/services/rag_pipeline/transform/file-parentchild.yml @@ -1,12 +1,12 @@ dependencies: - current_identifier: null - type: package + type: marketplace value: - plugin_unique_identifier: langgenius/parent_child_chunk:0.0.1@f8f9ba1f3bcda159ebc0168baa755c2181b923da8157ebb439b8046019f5b510 + plugin_unique_identifier: langgenius/parentchild_chunker:0.0.1@b1a28a27e33fec442ce494da2a7814edd7eb9d646c81f38bccfcf1133d486e40 - current_identifier: null - type: package + type: marketplace value: - plugin_unique_identifier: langgenius/dify_extractor:0.0.4@0cb3f06230a377c4c037fa7b5e21f4f4e362e5f24a59ed7bf4950ff75e6f1e61 + plugin_unique_identifier: langgenius/dify_extractor:0.0.1@50103421d4e002f059b662d21ad2d7a1cf34869abdbe320299d7e382516ebb1c kind: rag_pipeline rag_pipeline: description: '' diff --git a/api/services/rag_pipeline/transform/file_general_economy.json b/api/services/rag_pipeline/transform/file_general_economy.json deleted file mode 100644 index 4d26d2be9c..0000000000 --- a/api/services/rag_pipeline/transform/file_general_economy.json +++ /dev/null @@ -1 +0,0 @@ -{"nodes": [{"id": "1752477924228", "type": "custom", "data": {"index_chunk_variable_selector": ["1752482151668", "result"], "keyword_number": 10, "retrieval_model": {"top_k": 3, "score_threshold_enabled": false, "score_threshold": 0.5, "search_method": "keyword_search", "vector_setting": {"embedding_provider_name": "langgenius/openai/openai", "embedding_model_name": "text-embedding-ada-002"}}, "type": "knowledge-index", "title": "\u77e5\u8bc6\u5e93", "selected": false, "chunk_structure": "text_model", "indexing_technique": "economy", "embedding_model": "text-embedding-ada-002", "embedding_model_provider": "langgenius/openai/openai"}, "position": {"x": 1076.4656678451215, "y": 281.3910724383104}, "targetPosition": "left", "sourcePosition": "right", "positionAbsolute": {"x": 1076.4656678451215, "y": 281.3910724383104}, "width": 242, "height": 114, "selected": true}, {"id": "1752479895761", "type": "custom", "data": {"datasource_parameters": {}, "datasource_configurations": {}, "type": "datasource", "title": "File", "plugin_id": "langgenius/file", "provider_type": "local_file", "provider_name": "file", "datasource_name": "upload-file", "datasource_label": "File", "selected": false, "fileExtensions": ["txt", "markdown", "mdx", "pdf", "html", "xlsx", "xls", "vtt", "properties", "doc", "docx", "csv", "eml", "msg", "pptx", "xml", "epub", "ppt", "md"]}, "position": {"x": -839.8603427660498, "y": 251.3910724383104}, "targetPosition": "left", "sourcePosition": "right", "positionAbsolute": {"x": -839.8603427660498, "y": 251.3910724383104}, "width": 242, "height": 52, "selected": false}, {"id": "1752480460682", "type": "custom", "data": {"tool_parameters": {"file": {"type": "variable", "value": ["1752479895761", "file"]}}, "tool_configurations": {}, "type": "tool", "title": "Dify\u6587\u672c\u63d0\u53d6\u5668", "provider_id": "langgenius/dify_extractor/dify_extractor", "provider_type": "builtin", "provider_name": "langgenius/dify_extractor/dify_extractor", "tool_name": "dify_extractor", "tool_label": "Dify\u6587\u672c\u63d0\u53d6\u5668", "tool_description": "Dify Extractor", "is_team_authorization": true, "output_schema": {"properties": {"documents": {"description": "the documents extracted from the file", "items": {"type": "object"}, "type": "array"}, "images": {"description": "The images extracted from the file", "items": {"type": "object"}, "type": "array"}}, "type": "object"}, "paramSchemas": [{"name": "file", "label": {"en_US": "file", "zh_Hans": "file", "pt_BR": "file", "ja_JP": "file"}, "placeholder": null, "scope": null, "auto_generate": null, "template": null, "required": true, "default": null, "min": null, "max": null, "precision": null, "options": [], "type": "file", "human_description": {"en_US": "the file to be parsed(support pdf, ppt, pptx, doc, docx, png, jpg, jpeg)", "zh_Hans": "\u7528\u4e8e\u89e3\u6790\u7684\u6587\u4ef6(\u652f\u6301 pdf, ppt, pptx, doc, docx, png, jpg, jpeg)", "pt_BR": "o arquivo a ser analisado (suporta pdf, ppt, pptx, doc, docx, png, jpg, jpeg)", "ja_JP": "the file to be parsed(support pdf, ppt, pptx, doc, docx, png, jpg, jpeg)"}, "form": "llm", "llm_description": "the file to be parsed (support pdf, ppt, pptx, doc, docx, png, jpg, jpeg)"}], "params": {"file": ""}, "selected": false}, "position": {"x": -108.28652292656551, "y": 281.3910724383104}, "targetPosition": "left", "sourcePosition": "right", "positionAbsolute": {"x": -108.28652292656551, "y": 281.3910724383104}, "width": 242, "height": 52, "selected": false}, {"id": "1752481112180", "type": "custom", "data": {"variable_selector": ["1752479895761", "file"], "is_array_file": false, "type": "document-extractor", "title": "\u6587\u6863\u63d0\u53d6\u5668", "selected": false}, "position": {"x": -108.28652292656551, "y": 390.6576481692478}, "targetPosition": "left", "sourcePosition": "right", "positionAbsolute": {"x": -108.28652292656551, "y": 390.6576481692478}, "width": 242, "height": 90, "selected": false}, {"id": "1752481129417", "type": "custom", "data": {"cases": [{"id": "24e47cad-f1e2-4f74-9884-3f49d5bb37b7", "case_id": "24e47cad-f1e2-4f74-9884-3f49d5bb37b7", "logical_operator": "or", "conditions": [{"id": "9da88d93-3ff6-463f-abfd-6bcafbf2554d", "varType": "file", "variable_selector": ["1752479895761", "file", "extension"], "comparison_operator": "is", "value": ".xlsx"}, {"id": "d0e88f5e-dfe3-4bae-af0c-dbec267500de", "varType": "file", "variable_selector": ["1752479895761", "file", "extension"], "comparison_operator": "is", "value": ".xls"}, {"id": "a957e91e-1ed7-4c6b-9c80-2f0948858f1d", "varType": "file", "variable_selector": ["1752479895761", "file", "extension"], "comparison_operator": "is", "value": ".md"}, {"id": "870c3c39-8d3f-474a-ab8b-9c0ccf53db73", "varType": "file", "variable_selector": ["1752479895761", "file", "extension"], "comparison_operator": "is", "value": ".markdown"}, {"id": "f9541513-1e71-4dc1-9db5-35dc84a39e3c", "varType": "file", "variable_selector": ["1752479895761", "file", "extension"], "comparison_operator": "is", "value": ".mdx"}, {"id": "4c7f455b-ac20-40ca-9495-6cc44ffcb35d", "varType": "file", "variable_selector": ["1752479895761", "file", "extension"], "comparison_operator": "is", "value": ".html"}, {"id": "2e12d9c7-8057-4a09-8851-f9fd1d0718d1", "varType": "file", "variable_selector": ["1752479895761", "file", "extension"], "comparison_operator": "is", "value": ".htm"}, {"id": "73a995a9-d8b9-4aef-89f7-306e2ddcbce2", "varType": "file", "variable_selector": ["1752479895761", "file", "extension"], "comparison_operator": "is", "value": ".docx"}, {"id": "8a2e8772-0426-458b-a1f9-9eaaec0f27c8", "varType": "file", "variable_selector": ["1752479895761", "file", "extension"], "comparison_operator": "is", "value": ".csv"}, {"id": "aa2cb6b6-a2fc-462a-a9f5-c9c3f33a1602", "varType": "file", "variable_selector": ["1752479895761", "file", "extension"], "comparison_operator": "is", "value": ".txt"}]}], "type": "if-else", "title": "\u6761\u4ef6\u5206\u652f", "selected": false}, "position": {"x": -489.57009543377865, "y": 251.3910724383104}, "targetPosition": "left", "sourcePosition": "right", "positionAbsolute": {"x": -489.57009543377865, "y": 251.3910724383104}, "width": 242, "height": 358, "selected": false}, {"id": "1752482022496", "type": "custom", "data": {"output_type": "string", "variables": [["1752481112180", "text"], ["1752480460682", "text"]], "type": "variable-aggregator", "title": "\u53d8\u91cf\u805a\u5408\u5668", "selected": false, "advanced_settings": {"group_enabled": false, "groups": [{"output_type": "string", "variables": [["1752481112180", "text"], ["1752480460682", "text"]], "group_name": "Group1", "groupId": "f4cf07b4-914d-4544-8ef8-0c5d9e4f21a7"}]}}, "position": {"x": 319.441649575055, "y": 281.3910724383104}, "targetPosition": "left", "sourcePosition": "right", "positionAbsolute": {"x": 319.441649575055, "y": 281.3910724383104}, "width": 242, "height": 129, "selected": false}, {"id": "1752482151668", "type": "custom", "data": {"tool_parameters": {"input_variable": {"type": "mixed", "value": "{{#1752482022496.output#}}"}, "delimiter": {"type": "mixed", "value": "{{#rag.shared.delimiter#}}"}, "max_chunk_length": {"type": "variable", "value": ["rag", "shared", "max_chunk_length"]}, "chunk_overlap_length": {"type": "variable", "value": ["rag", "shared", "chunk_overlap"]}, "replace_consecutive_spaces_newlines_tabs": {"type": "mixed", "value": "{{#rag.shared.replace_consecutive_spaces#}}"}, "delete_all_urls_and_email_addresses": {"type": "mixed", "value": "{{#rag.shared.delete_urls_email#}}"}}, "tool_configurations": {}, "type": "tool", "title": "\u901a\u7528\u6587\u672c\u5206\u5757", "provider_id": "langgenius/general_chunk/general_chunk", "provider_type": "builtin", "provider_name": "langgenius/general_chunk/general_chunk", "tool_name": "general_chunk", "tool_label": "\u901a\u7528\u6587\u672c\u5206\u5757", "tool_description": "\u4e00\u4e2a\u7528\u4e8e\u901a\u7528\u6587\u672c\u5206\u5757\u6a21\u5f0f\u7684\u5de5\u5177\uff0c\u68c0\u7d22\u548c\u53ec\u56de\u7684\u5757\u662f\u76f8\u540c\u7684\u3002", "is_team_authorization": true, "output_schema": {"properties": {"result": {"description": "The result of the general chunk tool.", "properties": {"general_chunks": {"items": {"description": "The chunk of the text.", "type": "string"}, "type": "array"}}, "type": "object"}}, "type": "object"}, "paramSchemas": [{"name": "input_variable", "label": {"en_US": "Input Variable", "zh_Hans": "\u8f93\u5165\u53d8\u91cf", "pt_BR": "Input Variable", "ja_JP": "Input Variable"}, "placeholder": null, "scope": null, "auto_generate": null, "template": null, "required": true, "default": null, "min": null, "max": null, "precision": null, "options": [], "type": "string", "human_description": {"en_US": "The text you want to chunk.", "zh_Hans": "\u4f60\u60f3\u8981\u5206\u5757\u7684\u6587\u672c\u3002", "pt_BR": "The text you want to chunk.", "ja_JP": "The text you want to chunk."}, "form": "llm", "llm_description": "The text you want to chunk."}, {"name": "delimiter", "label": {"en_US": "Delimiter", "zh_Hans": "\u5206\u9694\u7b26", "pt_BR": "Delimiter", "ja_JP": "Delimiter"}, "placeholder": null, "scope": null, "auto_generate": null, "template": null, "required": true, "default": null, "min": null, "max": null, "precision": null, "options": [], "type": "string", "human_description": {"en_US": "The delimiter of the chunks.", "zh_Hans": "\u5757\u7684\u5206\u9694\u7b26\u3002", "pt_BR": "The delimiter of the chunks.", "ja_JP": "The delimiter of the chunks."}, "form": "llm", "llm_description": "The delimiter of the chunks, the format of the delimiter must be a string."}, {"name": "max_chunk_length", "label": {"en_US": "Maximum Chunk Length", "zh_Hans": "\u6700\u5927\u5757\u7684\u957f\u5ea6", "pt_BR": "Maximum Chunk Length", "ja_JP": "Maximum Chunk Length"}, "placeholder": null, "scope": null, "auto_generate": null, "template": null, "required": true, "default": null, "min": null, "max": null, "precision": null, "options": [], "type": "number", "human_description": {"en_US": "The maximum chunk length.", "zh_Hans": "\u6700\u5927\u5757\u7684\u957f\u5ea6\u3002", "pt_BR": "The maximum chunk length.", "ja_JP": "The maximum chunk length."}, "form": "llm", "llm_description": "The maximum chunk length, the format of the chunk size must be an integer."}, {"name": "chunk_overlap_length", "label": {"en_US": "Chunk Overlap Length", "zh_Hans": "\u5757\u7684\u91cd\u53e0\u957f\u5ea6", "pt_BR": "Chunk Overlap Length", "ja_JP": "Chunk Overlap Length"}, "placeholder": null, "scope": null, "auto_generate": null, "template": null, "required": false, "default": null, "min": null, "max": null, "precision": null, "options": [], "type": "number", "human_description": {"en_US": "The chunk overlap length.", "zh_Hans": "\u5757\u7684\u91cd\u53e0\u957f\u5ea6\u3002", "pt_BR": "The chunk overlap length.", "ja_JP": "The chunk overlap length."}, "form": "llm", "llm_description": "The chunk overlap length, the format of the chunk overlap length must be an integer."}, {"name": "replace_consecutive_spaces_newlines_tabs", "label": {"en_US": "Replace Consecutive Spaces, Newlines and Tabs", "zh_Hans": "\u66ff\u6362\u8fde\u7eed\u7684\u7a7a\u683c\u3001\u6362\u884c\u7b26\u548c\u5236\u8868\u7b26", "pt_BR": "Replace Consecutive Spaces, Newlines and Tabs", "ja_JP": "Replace Consecutive Spaces, Newlines and Tabs"}, "placeholder": null, "scope": null, "auto_generate": null, "template": null, "required": false, "default": null, "min": null, "max": null, "precision": null, "options": [], "type": "boolean", "human_description": {"en_US": "Replace consecutive spaces, newlines and tabs", "zh_Hans": "\u66ff\u6362\u8fde\u7eed\u7684\u7a7a\u683c\u3001\u6362\u884c\u7b26\u548c\u5236\u8868\u7b26", "pt_BR": "Replace consecutive spaces, newlines and tabs", "ja_JP": "Replace consecutive spaces, newlines and tabs"}, "form": "llm", "llm_description": "Replace consecutive spaces, newlines and tabs, the format of the replace must be a boolean."}, {"name": "delete_all_urls_and_email_addresses", "label": {"en_US": "Delete All URLs and Email Addresses", "zh_Hans": "\u5220\u9664\u6240\u6709URL\u548c\u7535\u5b50\u90ae\u4ef6\u5730\u5740", "pt_BR": "Delete All URLs and Email Addresses", "ja_JP": "Delete All URLs and Email Addresses"}, "placeholder": null, "scope": null, "auto_generate": null, "template": null, "required": false, "default": null, "min": null, "max": null, "precision": null, "options": [], "type": "boolean", "human_description": {"en_US": "Delete all URLs and email addresses", "zh_Hans": "\u5220\u9664\u6240\u6709URL\u548c\u7535\u5b50\u90ae\u4ef6\u5730\u5740", "pt_BR": "Delete all URLs and email addresses", "ja_JP": "Delete all URLs and email addresses"}, "form": "llm", "llm_description": "Delete all URLs and email addresses, the format of the delete must be a boolean."}], "params": {"input_variable": "", "delimiter": "", "max_chunk_length": "", "chunk_overlap_length": "", "replace_consecutive_spaces_newlines_tabs": "", "delete_all_urls_and_email_addresses": ""}, "selected": false}, "position": {"x": 693.5300771507484, "y": 281.3910724383104}, "targetPosition": "left", "sourcePosition": "right", "positionAbsolute": {"x": 693.5300771507484, "y": 281.3910724383104}, "width": 242, "height": 52, "selected": false}], "edges": [{"id": "1752479895761-source-1752481129417-target", "type": "custom", "source": "1752479895761", "sourceHandle": "source", "target": "1752481129417", "targetHandle": "target", "data": {"sourceType": "datasource", "targetType": "if-else", "isInIteration": false, "isInLoop": false}, "zIndex": 0}, {"id": "1752481129417-24e47cad-f1e2-4f74-9884-3f49d5bb37b7-1752480460682-target", "type": "custom", "source": "1752481129417", "target": "1752480460682", "sourceHandle": "24e47cad-f1e2-4f74-9884-3f49d5bb37b7", "targetHandle": "target", "data": {"sourceType": "if-else", "targetType": "tool", "isInLoop": false}, "zIndex": 0}, {"id": "1752481129417-false-1752481112180-target", "type": "custom", "source": "1752481129417", "target": "1752481112180", "sourceHandle": "false", "targetHandle": "target", "data": {"sourceType": "if-else", "targetType": "document-extractor", "isInLoop": false}, "zIndex": 0}, {"id": "1752480460682-source-1752482022496-target", "type": "custom", "source": "1752480460682", "sourceHandle": "source", "target": "1752482022496", "targetHandle": "target", "data": {"sourceType": "tool", "targetType": "variable-aggregator", "isInIteration": false, "isInLoop": false}, "zIndex": 0}, {"id": "1752481112180-source-1752482022496-target", "type": "custom", "source": "1752481112180", "target": "1752482022496", "sourceHandle": "source", "targetHandle": "target", "data": {"sourceType": "document-extractor", "targetType": "variable-aggregator", "isInLoop": false}, "zIndex": 0}, {"id": "1752482022496-source-1752482151668-target", "type": "custom", "source": "1752482022496", "sourceHandle": "source", "target": "1752482151668", "targetHandle": "target", "data": {"sourceType": "variable-aggregator", "targetType": "tool", "isInIteration": false, "isInLoop": false}, "zIndex": 0}, {"id": "1752482151668-source-1752477924228-target", "type": "custom", "source": "1752482151668", "sourceHandle": "source", "target": "1752477924228", "targetHandle": "target", "data": {"sourceType": "tool", "targetType": "knowledge-index", "isInIteration": false, "isInLoop": false}, "zIndex": 0}], "viewport": {"x": 701.4999626224237, "y": 128.33739021504016, "zoom": 0.48941689643726966}} \ No newline at end of file diff --git a/api/services/rag_pipeline/transform/file_general_high_quality.json b/api/services/rag_pipeline/transform/file_general_high_quality.json deleted file mode 100644 index 82ac85ff41..0000000000 --- a/api/services/rag_pipeline/transform/file_general_high_quality.json +++ /dev/null @@ -1 +0,0 @@ -{"nodes": [{"id": "1752477924228", "type": "custom", "data": {"index_chunk_variable_selector": ["1752482151668", "result"], "keyword_number": 10, "retrieval_model": {"top_k": 3, "score_threshold_enabled": false, "score_threshold": 0.5, "search_method": "semantic_search", "vector_setting": {"embedding_provider_name": "langgenius/openai/openai", "embedding_model_name": "text-embedding-ada-002"}}, "type": "knowledge-index", "title": "\u77e5\u8bc6\u5e93", "selected": false, "chunk_structure": "text_model", "indexing_technique": "high_quality", "embedding_model": "text-embedding-ada-002", "embedding_model_provider": "langgenius/openai/openai"}, "position": {"x": 1076.4656678451215, "y": 281.3910724383104}, "targetPosition": "left", "sourcePosition": "right", "positionAbsolute": {"x": 1076.4656678451215, "y": 281.3910724383104}, "width": 242, "height": 114, "selected": false}, {"id": "1752479895761", "type": "custom", "data": {"datasource_parameters": {}, "datasource_configurations": {}, "type": "datasource", "title": "File", "plugin_id": "langgenius/file", "provider_type": "local_file", "provider_name": "file", "datasource_name": "upload-file", "datasource_label": "File", "selected": false, "fileExtensions": ["txt", "markdown", "mdx", "pdf", "html", "xlsx", "xls", "vtt", "properties", "doc", "docx", "csv", "eml", "msg", "pptx", "xml", "epub", "ppt", "md"]}, "position": {"x": -839.8603427660498, "y": 251.3910724383104}, "targetPosition": "left", "sourcePosition": "right", "positionAbsolute": {"x": -839.8603427660498, "y": 251.3910724383104}, "width": 242, "height": 52, "selected": false}, {"id": "1752480460682", "type": "custom", "data": {"tool_parameters": {"file": {"type": "variable", "value": ["1752479895761", "file"]}}, "tool_configurations": {}, "type": "tool", "title": "Dify\u6587\u672c\u63d0\u53d6\u5668", "provider_id": "langgenius/dify_extractor/dify_extractor", "provider_type": "builtin", "provider_name": "langgenius/dify_extractor/dify_extractor", "tool_name": "dify_extractor", "tool_label": "Dify\u6587\u672c\u63d0\u53d6\u5668", "tool_description": "Dify Extractor", "is_team_authorization": true, "output_schema": {"properties": {"documents": {"description": "the documents extracted from the file", "items": {"type": "object"}, "type": "array"}, "images": {"description": "The images extracted from the file", "items": {"type": "object"}, "type": "array"}}, "type": "object"}, "paramSchemas": [{"name": "file", "label": {"en_US": "file", "zh_Hans": "file", "pt_BR": "file", "ja_JP": "file"}, "placeholder": null, "scope": null, "auto_generate": null, "template": null, "required": true, "default": null, "min": null, "max": null, "precision": null, "options": [], "type": "file", "human_description": {"en_US": "the file to be parsed(support pdf, ppt, pptx, doc, docx, png, jpg, jpeg)", "zh_Hans": "\u7528\u4e8e\u89e3\u6790\u7684\u6587\u4ef6(\u652f\u6301 pdf, ppt, pptx, doc, docx, png, jpg, jpeg)", "pt_BR": "o arquivo a ser analisado (suporta pdf, ppt, pptx, doc, docx, png, jpg, jpeg)", "ja_JP": "the file to be parsed(support pdf, ppt, pptx, doc, docx, png, jpg, jpeg)"}, "form": "llm", "llm_description": "the file to be parsed (support pdf, ppt, pptx, doc, docx, png, jpg, jpeg)"}], "params": {"file": ""}, "selected": false}, "position": {"x": -108.28652292656551, "y": 281.3910724383104}, "targetPosition": "left", "sourcePosition": "right", "positionAbsolute": {"x": -108.28652292656551, "y": 281.3910724383104}, "width": 242, "height": 52, "selected": false}, {"id": "1752481112180", "type": "custom", "data": {"variable_selector": ["1752479895761", "file"], "is_array_file": false, "type": "document-extractor", "title": "\u6587\u6863\u63d0\u53d6\u5668", "selected": false}, "position": {"x": -108.28652292656551, "y": 390.6576481692478}, "targetPosition": "left", "sourcePosition": "right", "positionAbsolute": {"x": -108.28652292656551, "y": 390.6576481692478}, "width": 242, "height": 90, "selected": false}, {"id": "1752481129417", "type": "custom", "data": {"cases": [{"id": "24e47cad-f1e2-4f74-9884-3f49d5bb37b7", "case_id": "24e47cad-f1e2-4f74-9884-3f49d5bb37b7", "logical_operator": "or", "conditions": [{"id": "9da88d93-3ff6-463f-abfd-6bcafbf2554d", "varType": "file", "variable_selector": ["1752479895761", "file", "extension"], "comparison_operator": "is", "value": ".xlsx"}, {"id": "d0e88f5e-dfe3-4bae-af0c-dbec267500de", "varType": "file", "variable_selector": ["1752479895761", "file", "extension"], "comparison_operator": "is", "value": ".xls"}, {"id": "a957e91e-1ed7-4c6b-9c80-2f0948858f1d", "varType": "file", "variable_selector": ["1752479895761", "file", "extension"], "comparison_operator": "is", "value": ".md"}, {"id": "870c3c39-8d3f-474a-ab8b-9c0ccf53db73", "varType": "file", "variable_selector": ["1752479895761", "file", "extension"], "comparison_operator": "is", "value": ".markdown"}, {"id": "f9541513-1e71-4dc1-9db5-35dc84a39e3c", "varType": "file", "variable_selector": ["1752479895761", "file", "extension"], "comparison_operator": "is", "value": ".mdx"}, {"id": "4c7f455b-ac20-40ca-9495-6cc44ffcb35d", "varType": "file", "variable_selector": ["1752479895761", "file", "extension"], "comparison_operator": "is", "value": ".html"}, {"id": "2e12d9c7-8057-4a09-8851-f9fd1d0718d1", "varType": "file", "variable_selector": ["1752479895761", "file", "extension"], "comparison_operator": "is", "value": ".htm"}, {"id": "73a995a9-d8b9-4aef-89f7-306e2ddcbce2", "varType": "file", "variable_selector": ["1752479895761", "file", "extension"], "comparison_operator": "is", "value": ".docx"}, {"id": "8a2e8772-0426-458b-a1f9-9eaaec0f27c8", "varType": "file", "variable_selector": ["1752479895761", "file", "extension"], "comparison_operator": "is", "value": ".csv"}, {"id": "aa2cb6b6-a2fc-462a-a9f5-c9c3f33a1602", "varType": "file", "variable_selector": ["1752479895761", "file", "extension"], "comparison_operator": "is", "value": ".txt"}]}], "type": "if-else", "title": "\u6761\u4ef6\u5206\u652f", "selected": false}, "position": {"x": -489.57009543377865, "y": 251.3910724383104}, "targetPosition": "left", "sourcePosition": "right", "positionAbsolute": {"x": -489.57009543377865, "y": 251.3910724383104}, "width": 242, "height": 358, "selected": true}, {"id": "1752482022496", "type": "custom", "data": {"output_type": "string", "variables": [["1752481112180", "text"], ["1752480460682", "text"]], "type": "variable-aggregator", "title": "\u53d8\u91cf\u805a\u5408\u5668", "selected": false, "advanced_settings": {"group_enabled": false, "groups": [{"output_type": "string", "variables": [["1752481112180", "text"], ["1752480460682", "text"]], "group_name": "Group1", "groupId": "f4cf07b4-914d-4544-8ef8-0c5d9e4f21a7"}]}}, "position": {"x": 319.441649575055, "y": 281.3910724383104}, "targetPosition": "left", "sourcePosition": "right", "positionAbsolute": {"x": 319.441649575055, "y": 281.3910724383104}, "width": 242, "height": 129, "selected": false}, {"id": "1752482151668", "type": "custom", "data": {"tool_parameters": {"input_variable": {"type": "mixed", "value": "{{#1752482022496.output#}}"}, "delimiter": {"type": "mixed", "value": "{{#rag.shared.delimiter#}}"}, "max_chunk_length": {"type": "variable", "value": ["rag", "shared", "max_chunk_length"]}, "chunk_overlap_length": {"type": "variable", "value": ["rag", "shared", "chunk_overlap"]}, "replace_consecutive_spaces_newlines_tabs": {"type": "mixed", "value": "{{#rag.shared.replace_consecutive_spaces#}}"}, "delete_all_urls_and_email_addresses": {"type": "mixed", "value": "{{#rag.shared.delete_urls_email#}}"}}, "tool_configurations": {}, "type": "tool", "title": "\u901a\u7528\u6587\u672c\u5206\u5757", "provider_id": "langgenius/general_chunk/general_chunk", "provider_type": "builtin", "provider_name": "langgenius/general_chunk/general_chunk", "tool_name": "general_chunk", "tool_label": "\u901a\u7528\u6587\u672c\u5206\u5757", "tool_description": "\u4e00\u4e2a\u7528\u4e8e\u901a\u7528\u6587\u672c\u5206\u5757\u6a21\u5f0f\u7684\u5de5\u5177\uff0c\u68c0\u7d22\u548c\u53ec\u56de\u7684\u5757\u662f\u76f8\u540c\u7684\u3002", "is_team_authorization": true, "output_schema": {"properties": {"result": {"description": "The result of the general chunk tool.", "properties": {"general_chunks": {"items": {"description": "The chunk of the text.", "type": "string"}, "type": "array"}}, "type": "object"}}, "type": "object"}, "paramSchemas": [{"name": "input_variable", "label": {"en_US": "Input Variable", "zh_Hans": "\u8f93\u5165\u53d8\u91cf", "pt_BR": "Input Variable", "ja_JP": "Input Variable"}, "placeholder": null, "scope": null, "auto_generate": null, "template": null, "required": true, "default": null, "min": null, "max": null, "precision": null, "options": [], "type": "string", "human_description": {"en_US": "The text you want to chunk.", "zh_Hans": "\u4f60\u60f3\u8981\u5206\u5757\u7684\u6587\u672c\u3002", "pt_BR": "The text you want to chunk.", "ja_JP": "The text you want to chunk."}, "form": "llm", "llm_description": "The text you want to chunk."}, {"name": "delimiter", "label": {"en_US": "Delimiter", "zh_Hans": "\u5206\u9694\u7b26", "pt_BR": "Delimiter", "ja_JP": "Delimiter"}, "placeholder": null, "scope": null, "auto_generate": null, "template": null, "required": true, "default": null, "min": null, "max": null, "precision": null, "options": [], "type": "string", "human_description": {"en_US": "The delimiter of the chunks.", "zh_Hans": "\u5757\u7684\u5206\u9694\u7b26\u3002", "pt_BR": "The delimiter of the chunks.", "ja_JP": "The delimiter of the chunks."}, "form": "llm", "llm_description": "The delimiter of the chunks, the format of the delimiter must be a string."}, {"name": "max_chunk_length", "label": {"en_US": "Maximum Chunk Length", "zh_Hans": "\u6700\u5927\u5757\u7684\u957f\u5ea6", "pt_BR": "Maximum Chunk Length", "ja_JP": "Maximum Chunk Length"}, "placeholder": null, "scope": null, "auto_generate": null, "template": null, "required": true, "default": null, "min": null, "max": null, "precision": null, "options": [], "type": "number", "human_description": {"en_US": "The maximum chunk length.", "zh_Hans": "\u6700\u5927\u5757\u7684\u957f\u5ea6\u3002", "pt_BR": "The maximum chunk length.", "ja_JP": "The maximum chunk length."}, "form": "llm", "llm_description": "The maximum chunk length, the format of the chunk size must be an integer."}, {"name": "chunk_overlap_length", "label": {"en_US": "Chunk Overlap Length", "zh_Hans": "\u5757\u7684\u91cd\u53e0\u957f\u5ea6", "pt_BR": "Chunk Overlap Length", "ja_JP": "Chunk Overlap Length"}, "placeholder": null, "scope": null, "auto_generate": null, "template": null, "required": false, "default": null, "min": null, "max": null, "precision": null, "options": [], "type": "number", "human_description": {"en_US": "The chunk overlap length.", "zh_Hans": "\u5757\u7684\u91cd\u53e0\u957f\u5ea6\u3002", "pt_BR": "The chunk overlap length.", "ja_JP": "The chunk overlap length."}, "form": "llm", "llm_description": "The chunk overlap length, the format of the chunk overlap length must be an integer."}, {"name": "replace_consecutive_spaces_newlines_tabs", "label": {"en_US": "Replace Consecutive Spaces, Newlines and Tabs", "zh_Hans": "\u66ff\u6362\u8fde\u7eed\u7684\u7a7a\u683c\u3001\u6362\u884c\u7b26\u548c\u5236\u8868\u7b26", "pt_BR": "Replace Consecutive Spaces, Newlines and Tabs", "ja_JP": "Replace Consecutive Spaces, Newlines and Tabs"}, "placeholder": null, "scope": null, "auto_generate": null, "template": null, "required": false, "default": null, "min": null, "max": null, "precision": null, "options": [], "type": "boolean", "human_description": {"en_US": "Replace consecutive spaces, newlines and tabs", "zh_Hans": "\u66ff\u6362\u8fde\u7eed\u7684\u7a7a\u683c\u3001\u6362\u884c\u7b26\u548c\u5236\u8868\u7b26", "pt_BR": "Replace consecutive spaces, newlines and tabs", "ja_JP": "Replace consecutive spaces, newlines and tabs"}, "form": "llm", "llm_description": "Replace consecutive spaces, newlines and tabs, the format of the replace must be a boolean."}, {"name": "delete_all_urls_and_email_addresses", "label": {"en_US": "Delete All URLs and Email Addresses", "zh_Hans": "\u5220\u9664\u6240\u6709URL\u548c\u7535\u5b50\u90ae\u4ef6\u5730\u5740", "pt_BR": "Delete All URLs and Email Addresses", "ja_JP": "Delete All URLs and Email Addresses"}, "placeholder": null, "scope": null, "auto_generate": null, "template": null, "required": false, "default": null, "min": null, "max": null, "precision": null, "options": [], "type": "boolean", "human_description": {"en_US": "Delete all URLs and email addresses", "zh_Hans": "\u5220\u9664\u6240\u6709URL\u548c\u7535\u5b50\u90ae\u4ef6\u5730\u5740", "pt_BR": "Delete all URLs and email addresses", "ja_JP": "Delete all URLs and email addresses"}, "form": "llm", "llm_description": "Delete all URLs and email addresses, the format of the delete must be a boolean."}], "params": {"input_variable": "", "delimiter": "", "max_chunk_length": "", "chunk_overlap_length": "", "replace_consecutive_spaces_newlines_tabs": "", "delete_all_urls_and_email_addresses": ""}, "selected": false}, "position": {"x": 693.5300771507484, "y": 281.3910724383104}, "targetPosition": "left", "sourcePosition": "right", "positionAbsolute": {"x": 693.5300771507484, "y": 281.3910724383104}, "width": 242, "height": 52, "selected": false}], "edges": [{"id": "1752479895761-source-1752481129417-target", "type": "custom", "source": "1752479895761", "sourceHandle": "source", "target": "1752481129417", "targetHandle": "target", "data": {"sourceType": "datasource", "targetType": "if-else", "isInIteration": false, "isInLoop": false}, "zIndex": 0}, {"id": "1752481129417-24e47cad-f1e2-4f74-9884-3f49d5bb37b7-1752480460682-target", "type": "custom", "source": "1752481129417", "target": "1752480460682", "sourceHandle": "24e47cad-f1e2-4f74-9884-3f49d5bb37b7", "targetHandle": "target", "data": {"sourceType": "if-else", "targetType": "tool", "isInLoop": false}, "zIndex": 0}, {"id": "1752481129417-false-1752481112180-target", "type": "custom", "source": "1752481129417", "target": "1752481112180", "sourceHandle": "false", "targetHandle": "target", "data": {"sourceType": "if-else", "targetType": "document-extractor", "isInLoop": false}, "zIndex": 0}, {"id": "1752480460682-source-1752482022496-target", "type": "custom", "source": "1752480460682", "sourceHandle": "source", "target": "1752482022496", "targetHandle": "target", "data": {"sourceType": "tool", "targetType": "variable-aggregator", "isInIteration": false, "isInLoop": false}, "zIndex": 0}, {"id": "1752481112180-source-1752482022496-target", "type": "custom", "source": "1752481112180", "target": "1752482022496", "sourceHandle": "source", "targetHandle": "target", "data": {"sourceType": "document-extractor", "targetType": "variable-aggregator", "isInLoop": false}, "zIndex": 0}, {"id": "1752482022496-source-1752482151668-target", "type": "custom", "source": "1752482022496", "sourceHandle": "source", "target": "1752482151668", "targetHandle": "target", "data": {"sourceType": "variable-aggregator", "targetType": "tool", "isInIteration": false, "isInLoop": false}, "zIndex": 0}, {"id": "1752482151668-source-1752477924228-target", "type": "custom", "source": "1752482151668", "sourceHandle": "source", "target": "1752477924228", "targetHandle": "target", "data": {"sourceType": "tool", "targetType": "knowledge-index", "isInIteration": false, "isInLoop": false}, "zIndex": 0}], "viewport": {"x": 701.4999626224237, "y": 128.33739021504016, "zoom": 0.48941689643726966}} \ No newline at end of file diff --git a/api/services/rag_pipeline/transform/file_parent_child.json b/api/services/rag_pipeline/transform/file_parent_child.json deleted file mode 100644 index 84710a842c..0000000000 --- a/api/services/rag_pipeline/transform/file_parent_child.json +++ /dev/null @@ -1 +0,0 @@ -{"nodes": [{"data": {"chunk_structure": "hierarchical_model", "embedding_model": "text-embedding-ada-002", "embedding_model_provider": "langgenius/openai/openai", "index_chunk_variable_selector": ["1752575473519", "result"], "indexing_technique": "high_quality", "keyword_number": 10, "retrieval_model": {"score_threshold": 0.5, "score_threshold_enabled": false, "search_method": "semantic_search", "top_k": 3, "vector_setting": {"embedding_model_name": "text-embedding-ada-002", "embedding_provider_name": "langgenius/openai/openai"}}, "selected": false, "title": "\u77e5\u8bc6\u5e93", "type": "knowledge-index"}, "height": 114, "id": "1752477924228", "position": {"x": 994.3774545394483, "y": 281.3910724383104}, "positionAbsolute": {"x": 994.3774545394483, "y": 281.3910724383104}, "selected": false, "sourcePosition": "right", "targetPosition": "left", "type": "custom", "width": 242}, {"data": {"datasource_configurations": {}, "datasource_label": "File", "datasource_name": "upload-file", "datasource_parameters": {}, "fileExtensions": ["txt", "markdown", "mdx", "pdf", "html", "xlsx", "xls", "vtt", "properties", "doc", "docx", "csv", "eml", "msg", "pptx", "xml", "epub", "ppt", "md"], "plugin_id": "langgenius/file", "provider_name": "file", "provider_type": "local_file", "selected": false, "title": "File", "type": "datasource"}, "height": 52, "id": "1752479895761", "position": {"x": -839.8603427660498, "y": 251.3910724383104}, "positionAbsolute": {"x": -839.8603427660498, "y": 251.3910724383104}, "selected": false, "sourcePosition": "right", "targetPosition": "left", "type": "custom", "width": 242}, {"data": {"is_team_authorization": true, "output_schema": {"properties": {"documents": {"description": "the documents extracted from the file", "items": {"type": "object"}, "type": "array"}, "images": {"description": "The images extracted from the file", "items": {"type": "object"}, "type": "array"}}, "type": "object"}, "paramSchemas": [{"auto_generate": null, "default": null, "form": "llm", "human_description": {"en_US": "the file to be parsed(support pdf, ppt, pptx, doc, docx, png, jpg, jpeg)", "ja_JP": "the file to be parsed(support pdf, ppt, pptx, doc, docx, png, jpg, jpeg)", "pt_BR": "o arquivo a ser analisado (suporta pdf, ppt, pptx, doc, docx, png, jpg, jpeg)", "zh_Hans": "\u7528\u4e8e\u89e3\u6790\u7684\u6587\u4ef6(\u652f\u6301 pdf, ppt, pptx, doc, docx, png, jpg, jpeg)"}, "label": {"en_US": "file", "ja_JP": "file", "pt_BR": "file", "zh_Hans": "file"}, "llm_description": "the file to be parsed (support pdf, ppt, pptx, doc, docx, png, jpg, jpeg)", "max": null, "min": null, "name": "file", "options": [], "placeholder": null, "precision": null, "required": true, "scope": null, "template": null, "type": "file"}], "params": {"file": ""}, "provider_id": "langgenius/dify_extractor/dify_extractor", "provider_name": "langgenius/dify_extractor/dify_extractor", "provider_type": "builtin", "selected": false, "title": "Dify\u6587\u672c\u63d0\u53d6\u5668", "tool_configurations": {}, "tool_description": "Dify Extractor", "tool_label": "Dify\u6587\u672c\u63d0\u53d6\u5668", "tool_name": "dify_extractor", "tool_parameters": {"file": {"type": "variable", "value": ["1752479895761", "file"]}}, "type": "tool"}, "height": 52, "id": "1752480460682", "position": {"x": -108.28652292656551, "y": 281.3910724383104}, "positionAbsolute": {"x": -108.28652292656551, "y": 281.3910724383104}, "selected": false, "sourcePosition": "right", "targetPosition": "left", "type": "custom", "width": 242}, {"data": {"is_array_file": false, "selected": false, "title": "\u6587\u6863\u63d0\u53d6\u5668", "type": "document-extractor", "variable_selector": ["1752479895761", "file"]}, "height": 90, "id": "1752481112180", "position": {"x": -108.28652292656551, "y": 390.6576481692478}, "positionAbsolute": {"x": -108.28652292656551, "y": 390.6576481692478}, "selected": false, "sourcePosition": "right", "targetPosition": "left", "type": "custom", "width": 242}, {"data": {"cases": [{"id": "24e47cad-f1e2-4f74-9884-3f49d5bb37b7", "case_id": "24e47cad-f1e2-4f74-9884-3f49d5bb37b7", "logical_operator": "or", "conditions": [{"comparison_operator": "is", "id": "9da88d93-3ff6-463f-abfd-6bcafbf2554d", "value": ".xlsx", "varType": "file", "variable_selector": ["1752479895761", "file", "extension"]}, {"comparison_operator": "is", "id": "d0e88f5e-dfe3-4bae-af0c-dbec267500de", "value": ".xls", "varType": "file", "variable_selector": ["1752479895761", "file", "extension"]}, {"comparison_operator": "is", "id": "a957e91e-1ed7-4c6b-9c80-2f0948858f1d", "value": ".md", "varType": "file", "variable_selector": ["1752479895761", "file", "extension"]}, {"comparison_operator": "is", "id": "870c3c39-8d3f-474a-ab8b-9c0ccf53db73", "value": ".markdown", "varType": "file", "variable_selector": ["1752479895761", "file", "extension"]}, {"comparison_operator": "is", "id": "f9541513-1e71-4dc1-9db5-35dc84a39e3c", "value": ".mdx", "varType": "file", "variable_selector": ["1752479895761", "file", "extension"]}, {"comparison_operator": "is", "id": "4c7f455b-ac20-40ca-9495-6cc44ffcb35d", "value": ".html", "varType": "file", "variable_selector": ["1752479895761", "file", "extension"]}, {"comparison_operator": "is", "id": "2e12d9c7-8057-4a09-8851-f9fd1d0718d1", "value": ".htm", "varType": "file", "variable_selector": ["1752479895761", "file", "extension"]}, {"comparison_operator": "is", "id": "73a995a9-d8b9-4aef-89f7-306e2ddcbce2", "value": ".docx", "varType": "file", "variable_selector": ["1752479895761", "file", "extension"]}, {"comparison_operator": "is", "id": "8a2e8772-0426-458b-a1f9-9eaaec0f27c8", "value": ".csv", "varType": "file", "variable_selector": ["1752479895761", "file", "extension"]}, {"comparison_operator": "is", "id": "aa2cb6b6-a2fc-462a-a9f5-c9c3f33a1602", "value": ".txt", "varType": "file", "variable_selector": ["1752479895761", "file", "extension"]}]}], "selected": false, "title": "\u6761\u4ef6\u5206\u652f", "type": "if-else"}, "height": 358, "id": "1752481129417", "position": {"x": -512.2335487893622, "y": 251.3910724383104}, "positionAbsolute": {"x": -512.2335487893622, "y": 251.3910724383104}, "selected": false, "sourcePosition": "right", "targetPosition": "left", "type": "custom", "width": 242}, {"data": {"advanced_settings": {"group_enabled": false, "groups": [{"groupId": "f4cf07b4-914d-4544-8ef8-0c5d9e4f21a7", "group_name": "Group1", "output_type": "string", "variables": [["1752481112180", "text"], ["1752480460682", "text"]]}]}, "output_type": "string", "selected": false, "title": "\u53d8\u91cf\u805a\u5408\u5668", "type": "variable-aggregator", "variables": [["1752481112180", "text"], ["1752480460682", "text"]]}, "height": 129, "id": "1752482022496", "position": {"x": 319.441649575055, "y": 281.3910724383104}, "positionAbsolute": {"x": 319.441649575055, "y": 281.3910724383104}, "selected": false, "sourcePosition": "right", "targetPosition": "left", "type": "custom", "width": 242}, {"id": "1752575473519", "type": "custom", "data": {"tool_parameters": {"input_text": {"type": "mixed", "value": "{{#1752482022496.output#}}"}, "max_length": {"type": "variable", "value": ["rag", "shared", "max_chunk_length"]}, "separator": {"type": "mixed", "value": "{{#rag.shared.delimiter#}}"}, "subchunk_max_length": {"type": "variable", "value": ["rag", "shared", "child_max_chunk_length"]}, "subchunk_separator": {"type": "mixed", "value": "{{#rag.shared.child_delimiter#}}"}, "parent_mode": {"type": "variable", "value": ["rag", "shared", "parent_mode"]}, "remove_extra_spaces": {"type": "mixed", "value": "{{#rag.shared.replace_consecutive_spaces#}}"}, "remove_urls_emails": {"type": "mixed", "value": "{{#rag.shared.delete_urls_email#}}"}}, "tool_configurations": {}, "type": "tool", "title": "\u7236\u5b50\u5206\u5757\u5904\u7406\u5668", "provider_id": "langgenius/parent_child_chunk/parent_child_chunk", "provider_type": "builtin", "provider_name": "langgenius/parent_child_chunk/parent_child_chunk", "tool_name": "parent_child_chunk", "tool_label": "\u7236\u5b50\u5206\u5757\u5904\u7406\u5668", "tool_description": "\u5c06\u6587\u6863\u5904\u7406\u4e3a\u7236\u5b50\u5206\u5757\u7ed3\u6784", "is_team_authorization": true, "output_schema": {"properties": {"result": {"description": "Parent child chunks result", "items": {"type": "object"}, "type": "array"}}, "type": "object"}, "paramSchemas": [{"name": "input_text", "label": {"en_US": "Input text", "zh_Hans": "\u8f93\u5165\u6587\u672c", "pt_BR": "Input text", "ja_JP": "Input text"}, "placeholder": null, "scope": null, "auto_generate": null, "template": null, "required": true, "default": null, "min": null, "max": null, "precision": null, "options": [], "type": "string", "human_description": {"en_US": "The text you want to chunk.", "zh_Hans": "\u4f60\u60f3\u8981\u5206\u5757\u7684\u6587\u672c\u3002", "pt_BR": "The text you want to chunk.", "ja_JP": "The text you want to chunk."}, "form": "llm", "llm_description": "The text you want to chunk."}, {"name": "max_length", "label": {"en_US": "Maximum Length", "zh_Hans": "\u6700\u5927\u957f\u5ea6", "pt_BR": "Comprimento M\u00e1ximo", "ja_JP": "Maximum Length"}, "placeholder": null, "scope": null, "auto_generate": null, "template": null, "required": false, "default": 1024, "min": null, "max": null, "precision": null, "options": [], "type": "number", "human_description": {"en_US": "Maximum length for chunking", "zh_Hans": "\u7528\u4e8e\u5206\u5757\u7684\u6700\u5927\u957f\u5ea6", "pt_BR": "Comprimento m\u00e1ximo para divis\u00e3o", "ja_JP": "Maximum length for chunking"}, "form": "llm", "llm_description": "Maximum length allowed per chunk"}, {"name": "separator", "label": {"en_US": "Chunk Separator", "zh_Hans": "\u5206\u5757\u5206\u9694\u7b26", "pt_BR": "Separador de Divis\u00e3o", "ja_JP": "Chunk Separator"}, "placeholder": null, "scope": null, "auto_generate": null, "template": null, "required": false, "default": "\n\n", "min": null, "max": null, "precision": null, "options": [], "type": "string", "human_description": {"en_US": "Separator used for chunking", "zh_Hans": "\u7528\u4e8e\u5206\u5757\u7684\u5206\u9694\u7b26", "pt_BR": "Separador usado para divis\u00e3o", "ja_JP": "Separator used for chunking"}, "form": "llm", "llm_description": "The separator used to split chunks"}, {"name": "subchunk_max_length", "label": {"en_US": "Subchunk Maximum Length", "zh_Hans": "\u5b50\u5206\u5757\u6700\u5927\u957f\u5ea6", "pt_BR": "Comprimento M\u00e1ximo de Subdivis\u00e3o", "ja_JP": "Subchunk Maximum Length"}, "placeholder": null, "scope": null, "auto_generate": null, "template": null, "required": false, "default": 512, "min": null, "max": null, "precision": null, "options": [], "type": "number", "human_description": {"en_US": "Maximum length for subchunking", "zh_Hans": "\u7528\u4e8e\u5b50\u5206\u5757\u7684\u6700\u5927\u957f\u5ea6", "pt_BR": "Comprimento m\u00e1ximo para subdivis\u00e3o", "ja_JP": "Maximum length for subchunking"}, "form": "llm", "llm_description": "Maximum length allowed per subchunk"}, {"name": "subchunk_separator", "label": {"en_US": "Subchunk Separator", "zh_Hans": "\u5b50\u5206\u5757\u5206\u9694\u7b26", "pt_BR": "Separador de Subdivis\u00e3o", "ja_JP": "Subchunk Separator"}, "placeholder": null, "scope": null, "auto_generate": null, "template": null, "required": false, "default": ". ", "min": null, "max": null, "precision": null, "options": [], "type": "string", "human_description": {"en_US": "Separator used for subchunking", "zh_Hans": "\u7528\u4e8e\u5b50\u5206\u5757\u7684\u5206\u9694\u7b26", "pt_BR": "Separador usado para subdivis\u00e3o", "ja_JP": "Separator used for subchunking"}, "form": "llm", "llm_description": "The separator used to split subchunks"}, {"name": "parent_mode", "label": {"en_US": "Parent Mode", "zh_Hans": "\u7236\u5757\u6a21\u5f0f", "pt_BR": "Modo Pai", "ja_JP": "Parent Mode"}, "placeholder": null, "scope": null, "auto_generate": null, "template": null, "required": true, "default": "paragraph", "min": null, "max": null, "precision": null, "options": [{"value": "paragraph", "label": {"en_US": "Paragraph", "zh_Hans": "\u6bb5\u843d", "pt_BR": "Par\u00e1grafo", "ja_JP": "Paragraph"}, "icon": ""}, {"value": "full_doc", "label": {"en_US": "Full Document", "zh_Hans": "\u5168\u6587", "pt_BR": "Documento Completo", "ja_JP": "Full Document"}, "icon": ""}], "type": "select", "human_description": {"en_US": "Split text into paragraphs based on separator and maximum chunk length, using split text as parent block or entire document as parent block and directly retrieve.", "zh_Hans": "\u6839\u636e\u5206\u9694\u7b26\u548c\u6700\u5927\u5757\u957f\u5ea6\u5c06\u6587\u672c\u62c6\u5206\u4e3a\u6bb5\u843d\uff0c\u4f7f\u7528\u62c6\u5206\u6587\u672c\u4f5c\u4e3a\u68c0\u7d22\u7684\u7236\u5757\u6216\u6574\u4e2a\u6587\u6863\u7528\u4f5c\u7236\u5757\u5e76\u76f4\u63a5\u68c0\u7d22\u3002", "pt_BR": "Dividir texto em par\u00e1grafos com base no separador e no comprimento m\u00e1ximo do bloco, usando o texto dividido como bloco pai ou documento completo como bloco pai e diretamente recuper\u00e1-lo.", "ja_JP": "Split text into paragraphs based on separator and maximum chunk length, using split text as parent block or entire document as parent block and directly retrieve."}, "form": "llm", "llm_description": "Split text into paragraphs based on separator and maximum chunk length, using split text as parent block or entire document as parent block and directly retrieve."}, {"name": "remove_extra_spaces", "label": {"en_US": "Remove Extra Spaces", "zh_Hans": "\u79fb\u9664\u591a\u4f59\u7a7a\u683c", "pt_BR": "Remover Espa\u00e7os Extras", "ja_JP": "Remove Extra Spaces"}, "placeholder": null, "scope": null, "auto_generate": null, "template": null, "required": false, "default": 0, "min": null, "max": null, "precision": null, "options": [], "type": "boolean", "human_description": {"en_US": "Whether to remove extra spaces in the text", "zh_Hans": "\u662f\u5426\u79fb\u9664\u6587\u672c\u4e2d\u7684\u591a\u4f59\u7a7a\u683c", "pt_BR": "Se deve remover espa\u00e7os extras no texto", "ja_JP": "Whether to remove extra spaces in the text"}, "form": "llm", "llm_description": "Whether to remove extra spaces in the text"}, {"name": "remove_urls_emails", "label": {"en_US": "Remove URLs and Emails", "zh_Hans": "\u79fb\u9664URL\u548c\u7535\u5b50\u90ae\u4ef6\u5730\u5740", "pt_BR": "Remover URLs e E-mails", "ja_JP": "Remove URLs and Emails"}, "placeholder": null, "scope": null, "auto_generate": null, "template": null, "required": false, "default": 0, "min": null, "max": null, "precision": null, "options": [], "type": "boolean", "human_description": {"en_US": "Whether to remove URLs and emails in the text", "zh_Hans": "\u662f\u5426\u79fb\u9664\u6587\u672c\u4e2d\u7684URL\u548c\u7535\u5b50\u90ae\u4ef6\u5730\u5740", "pt_BR": "Se deve remover URLs e e-mails no texto", "ja_JP": "Whether to remove URLs and emails in the text"}, "form": "llm", "llm_description": "Whether to remove URLs and emails in the text"}], "params": {"input_text": "", "max_length": "", "separator": "", "subchunk_max_length": "", "subchunk_separator": "", "parent_mode": "", "remove_extra_spaces": "", "remove_urls_emails": ""}, "selected": false}, "position": {"x": 637.9241611063885, "y": 281.3910724383104}, "targetPosition": "left", "sourcePosition": "right", "positionAbsolute": {"x": 637.9241611063885, "y": 281.3910724383104}, "width": 242, "height": 52, "selected": true}], "edges": [{"data": {"isInIteration": false, "isInLoop": false, "sourceType": "datasource", "targetType": "if-else"}, "id": "1752479895761-source-1752481129417-target", "source": "1752479895761", "sourceHandle": "source", "target": "1752481129417", "targetHandle": "target", "type": "custom", "zIndex": 0}, {"data": {"isInLoop": false, "sourceType": "if-else", "targetType": "tool"}, "id": "1752481129417-24e47cad-f1e2-4f74-9884-3f49d5bb37b7-1752480460682-target", "source": "1752481129417", "sourceHandle": "24e47cad-f1e2-4f74-9884-3f49d5bb37b7", "target": "1752480460682", "targetHandle": "target", "type": "custom", "zIndex": 0}, {"data": {"isInLoop": false, "sourceType": "if-else", "targetType": "document-extractor"}, "id": "1752481129417-false-1752481112180-target", "source": "1752481129417", "sourceHandle": "false", "target": "1752481112180", "targetHandle": "target", "type": "custom", "zIndex": 0}, {"data": {"isInIteration": false, "isInLoop": false, "sourceType": "tool", "targetType": "variable-aggregator"}, "id": "1752480460682-source-1752482022496-target", "source": "1752480460682", "sourceHandle": "source", "target": "1752482022496", "targetHandle": "target", "type": "custom", "zIndex": 0}, {"data": {"isInLoop": false, "sourceType": "document-extractor", "targetType": "variable-aggregator"}, "id": "1752481112180-source-1752482022496-target", "source": "1752481112180", "sourceHandle": "source", "target": "1752482022496", "targetHandle": "target", "type": "custom", "zIndex": 0}, {"id": "1752482022496-source-1752575473519-target", "type": "custom", "source": "1752482022496", "sourceHandle": "source", "target": "1752575473519", "targetHandle": "target", "data": {"sourceType": "variable-aggregator", "targetType": "tool", "isInIteration": false, "isInLoop": false}, "zIndex": 0}, {"id": "1752575473519-source-1752477924228-target", "type": "custom", "source": "1752575473519", "target": "1752477924228", "sourceHandle": "source", "targetHandle": "target", "data": {"sourceType": "tool", "targetType": "knowledge-index", "isInLoop": false}, "zIndex": 0}], "viewport": {"x": 948.6766333808323, "y": -102.06757184183238, "zoom": 0.8375774577380971}} \ No newline at end of file diff --git a/api/services/rag_pipeline/transform/notion-general-economy.yml b/api/services/rag_pipeline/transform/notion-general-economy.yml index 3f103e7e2b..5fce71c7b9 100644 --- a/api/services/rag_pipeline/transform/notion-general-economy.yml +++ b/api/services/rag_pipeline/transform/notion-general-economy.yml @@ -1,8 +1,12 @@ dependencies: - current_identifier: null - type: package + type: marketplace value: - plugin_unique_identifier: langgenius/general_chunk:0.0.2@0856fa64f7b0dc937c982f12d45b3a1ad91ba8aacc0d28a1b436e6c94a77e298 + plugin_unique_identifier: langgenius/general_chunker:0.0.1@e3da408b7277866404c3f884d599261f9d0b9003ea4ef7eb3b64489bdf39d18b +- current_identifier: null + type: marketplace + value: + plugin_unique_identifier: langgenius/notion_datasource:0.0.1@2dd49c2c3ffff976be8d22efb1ac0f63522a8d0f24ef8c44729d0a50a94ec039 kind: rag_pipeline rag_pipeline: description: '' diff --git a/api/services/rag_pipeline/transform/notion-general-high-quality.yml b/api/services/rag_pipeline/transform/notion-general-high-quality.yml index 371623a7fe..7634a33102 100644 --- a/api/services/rag_pipeline/transform/notion-general-high-quality.yml +++ b/api/services/rag_pipeline/transform/notion-general-high-quality.yml @@ -1,8 +1,12 @@ dependencies: - current_identifier: null - type: package + type: marketplace value: - plugin_unique_identifier: langgenius/general_chunk:0.0.2@0856fa64f7b0dc937c982f12d45b3a1ad91ba8aacc0d28a1b436e6c94a77e298 + plugin_unique_identifier: langgenius/general_chunker:0.0.1@e3da408b7277866404c3f884d599261f9d0b9003ea4ef7eb3b64489bdf39d18b +- current_identifier: null + type: marketplace + value: + plugin_unique_identifier: langgenius/notion_datasource:0.0.1@2dd49c2c3ffff976be8d22efb1ac0f63522a8d0f24ef8c44729d0a50a94ec039 kind: rag_pipeline rag_pipeline: description: '' diff --git a/api/services/rag_pipeline/transform/notion-parentchild.yml b/api/services/rag_pipeline/transform/notion-parentchild.yml index b793c621d6..c3bb51c597 100644 --- a/api/services/rag_pipeline/transform/notion-parentchild.yml +++ b/api/services/rag_pipeline/transform/notion-parentchild.yml @@ -1,8 +1,12 @@ dependencies: - current_identifier: null - type: package + type: marketplace value: - plugin_unique_identifier: langgenius/parent_child_chunk:0.0.1@f8f9ba1f3bcda159ebc0168baa755c2181b923da8157ebb439b8046019f5b510 + plugin_unique_identifier: langgenius/parentchild_chunker:0.0.1@b1a28a27e33fec442ce494da2a7814edd7eb9d646c81f38bccfcf1133d486e40 +- current_identifier: null + type: marketplace + value: + plugin_unique_identifier: langgenius/notion_datasource:0.0.1@2dd49c2c3ffff976be8d22efb1ac0f63522a8d0f24ef8c44729d0a50a94ec039 kind: rag_pipeline rag_pipeline: description: '' diff --git a/api/services/rag_pipeline/transform/notion_general_economy.json b/api/services/rag_pipeline/transform/notion_general_economy.json deleted file mode 100644 index ed5071fc36..0000000000 --- a/api/services/rag_pipeline/transform/notion_general_economy.json +++ /dev/null @@ -1 +0,0 @@ -{"nodes": [{"data": {"chunk_structure": "text_model", "embedding_model": "text-embedding-ada-002", "embedding_model_provider": "langgenius/openai/openai", "index_chunk_variable_selector": ["1752482151668", "result"], "indexing_technique": "economy", "keyword_number": 10, "retrieval_model": {"score_threshold": 0.5, "score_threshold_enabled": false, "search_method": "keyword_search", "top_k": 3, "vector_setting": {"embedding_model_name": "text-embedding-ada-002", "embedding_provider_name": "langgenius/openai/openai"}}, "selected": false, "title": "\u77e5\u8bc6\u5e93", "type": "knowledge-index"}, "height": 114, "id": "1752477924228", "position": {"x": 1444.5503479271906, "y": 281.3910724383104}, "positionAbsolute": {"x": 1444.5503479271906, "y": 281.3910724383104}, "selected": true, "sourcePosition": "right", "targetPosition": "left", "type": "custom", "width": 242}, {"data": {"is_team_authorization": true, "output_schema": {"properties": {"result": {"description": "The result of the general chunk tool.", "properties": {"general_chunks": {"items": {"description": "The chunk of the text.", "type": "string"}, "type": "array"}}, "type": "object"}}, "type": "object"}, "paramSchemas": [{"auto_generate": null, "default": null, "form": "llm", "human_description": {"en_US": "The text you want to chunk.", "ja_JP": "The text you want to chunk.", "pt_BR": "The text you want to chunk.", "zh_Hans": "\u4f60\u60f3\u8981\u5206\u5757\u7684\u6587\u672c\u3002"}, "label": {"en_US": "Input Variable", "ja_JP": "Input Variable", "pt_BR": "Input Variable", "zh_Hans": "\u8f93\u5165\u53d8\u91cf"}, "llm_description": "The text you want to chunk.", "max": null, "min": null, "name": "input_variable", "options": [], "placeholder": null, "precision": null, "required": true, "scope": null, "template": null, "type": "string"}, {"auto_generate": null, "default": null, "form": "llm", "human_description": {"en_US": "The delimiter of the chunks.", "ja_JP": "The delimiter of the chunks.", "pt_BR": "The delimiter of the chunks.", "zh_Hans": "\u5757\u7684\u5206\u9694\u7b26\u3002"}, "label": {"en_US": "Delimiter", "ja_JP": "Delimiter", "pt_BR": "Delimiter", "zh_Hans": "\u5206\u9694\u7b26"}, "llm_description": "The delimiter of the chunks, the format of the delimiter must be a string.", "max": null, "min": null, "name": "delimiter", "options": [], "placeholder": null, "precision": null, "required": true, "scope": null, "template": null, "type": "string"}, {"auto_generate": null, "default": null, "form": "llm", "human_description": {"en_US": "The maximum chunk length.", "ja_JP": "The maximum chunk length.", "pt_BR": "The maximum chunk length.", "zh_Hans": "\u6700\u5927\u5757\u7684\u957f\u5ea6\u3002"}, "label": {"en_US": "Maximum Chunk Length", "ja_JP": "Maximum Chunk Length", "pt_BR": "Maximum Chunk Length", "zh_Hans": "\u6700\u5927\u5757\u7684\u957f\u5ea6"}, "llm_description": "The maximum chunk length, the format of the chunk size must be an integer.", "max": null, "min": null, "name": "max_chunk_length", "options": [], "placeholder": null, "precision": null, "required": true, "scope": null, "template": null, "type": "number"}, {"auto_generate": null, "default": null, "form": "llm", "human_description": {"en_US": "The chunk overlap length.", "ja_JP": "The chunk overlap length.", "pt_BR": "The chunk overlap length.", "zh_Hans": "\u5757\u7684\u91cd\u53e0\u957f\u5ea6\u3002"}, "label": {"en_US": "Chunk Overlap Length", "ja_JP": "Chunk Overlap Length", "pt_BR": "Chunk Overlap Length", "zh_Hans": "\u5757\u7684\u91cd\u53e0\u957f\u5ea6"}, "llm_description": "The chunk overlap length, the format of the chunk overlap length must be an integer.", "max": null, "min": null, "name": "chunk_overlap_length", "options": [], "placeholder": null, "precision": null, "required": false, "scope": null, "template": null, "type": "number"}, {"auto_generate": null, "default": null, "form": "llm", "human_description": {"en_US": "Replace consecutive spaces, newlines and tabs", "ja_JP": "Replace consecutive spaces, newlines and tabs", "pt_BR": "Replace consecutive spaces, newlines and tabs", "zh_Hans": "\u66ff\u6362\u8fde\u7eed\u7684\u7a7a\u683c\u3001\u6362\u884c\u7b26\u548c\u5236\u8868\u7b26"}, "label": {"en_US": "Replace Consecutive Spaces, Newlines and Tabs", "ja_JP": "Replace Consecutive Spaces, Newlines and Tabs", "pt_BR": "Replace Consecutive Spaces, Newlines and Tabs", "zh_Hans": "\u66ff\u6362\u8fde\u7eed\u7684\u7a7a\u683c\u3001\u6362\u884c\u7b26\u548c\u5236\u8868\u7b26"}, "llm_description": "Replace consecutive spaces, newlines and tabs, the format of the replace must be a boolean.", "max": null, "min": null, "name": "replace_consecutive_spaces_newlines_tabs", "options": [], "placeholder": null, "precision": null, "required": false, "scope": null, "template": null, "type": "boolean"}, {"auto_generate": null, "default": null, "form": "llm", "human_description": {"en_US": "Delete all URLs and email addresses", "ja_JP": "Delete all URLs and email addresses", "pt_BR": "Delete all URLs and email addresses", "zh_Hans": "\u5220\u9664\u6240\u6709URL\u548c\u7535\u5b50\u90ae\u4ef6\u5730\u5740"}, "label": {"en_US": "Delete All URLs and Email Addresses", "ja_JP": "Delete All URLs and Email Addresses", "pt_BR": "Delete All URLs and Email Addresses", "zh_Hans": "\u5220\u9664\u6240\u6709URL\u548c\u7535\u5b50\u90ae\u4ef6\u5730\u5740"}, "llm_description": "Delete all URLs and email addresses, the format of the delete must be a boolean.", "max": null, "min": null, "name": "delete_all_urls_and_email_addresses", "options": [], "placeholder": null, "precision": null, "required": false, "scope": null, "template": null, "type": "boolean"}], "params": {"chunk_overlap_length": "", "delete_all_urls_and_email_addresses": "", "delimiter": "", "input_variable": "", "max_chunk_length": "", "replace_consecutive_spaces_newlines_tabs": ""}, "provider_id": "langgenius/general_chunk/general_chunk", "provider_name": "langgenius/general_chunk/general_chunk", "provider_type": "builtin", "selected": false, "title": "\u901a\u7528\u6587\u672c\u5206\u5757", "tool_configurations": {}, "tool_description": "\u4e00\u4e2a\u7528\u4e8e\u901a\u7528\u6587\u672c\u5206\u5757\u6a21\u5f0f\u7684\u5de5\u5177\uff0c\u68c0\u7d22\u548c\u53ec\u56de\u7684\u5757\u662f\u76f8\u540c\u7684\u3002", "tool_label": "\u901a\u7528\u6587\u672c\u5206\u5757", "tool_name": "general_chunk", "tool_parameters": {"chunk_overlap_length": {"type": "variable", "value": ["rag", "shared", "chunk_overlap"]}, "delimiter": {"type": "mixed", "value": "{{#rag.shared.delimiter#}}"}, "input_variable": {"type": "mixed", "value": "{{#1752489759475.content#}}"}, "max_chunk_length": {"type": "variable", "value": ["rag", "shared", "max_chunk_length"]}, "replace_consecutive_spaces_newlines_tabs": {"type": "mixed", "value": "{{#rag.shared.replace_consecutive_spaces#}}"}, "delete_all_urls_and_email_addresses": {"type": "mixed", "value": "{{#rag.shared.delete_urls_email#}}"}}, "type": "tool"}, "height": 52, "id": "1752482151668", "position": {"x": 1063.6922916384628, "y": 281.3910724383104}, "positionAbsolute": {"x": 1063.6922916384628, "y": 281.3910724383104}, "selected": false, "sourcePosition": "right", "targetPosition": "left", "type": "custom", "width": 242}, {"id": "1752489759475", "type": "custom", "data": {"datasource_parameters": {}, "datasource_configurations": {}, "type": "datasource", "title": "Notion\u6570\u636e\u6e90", "plugin_id": "langgenius/notion_datasource", "provider_type": "online_document", "provider_name": "notion", "datasource_name": "notion_datasource", "datasource_label": "Notion\u6570\u636e\u6e90", "selected": false}, "position": {"x": 736.9082104000458, "y": 281.3910724383104}, "targetPosition": "left", "sourcePosition": "right", "positionAbsolute": {"x": 736.9082104000458, "y": 281.3910724383104}, "width": 242, "height": 52}], "edges": [{"data": {"isInIteration": false, "isInLoop": false, "sourceType": "tool", "targetType": "knowledge-index"}, "id": "1752482151668-source-1752477924228-target", "source": "1752482151668", "sourceHandle": "source", "target": "1752477924228", "targetHandle": "target", "type": "custom", "zIndex": 0}, {"id": "1752489759475-source-1752482151668-target", "type": "custom", "source": "1752489759475", "sourceHandle": "source", "target": "1752482151668", "targetHandle": "target", "data": {"sourceType": "datasource", "targetType": "tool", "isInIteration": false, "isInLoop": false}, "zIndex": 0}], "viewport": {"x": -838.5696493231662, "y": -168.94656489167426, "zoom": 1.286925643857699}} \ No newline at end of file diff --git a/api/services/rag_pipeline/transform/notion_general_high_quality.json b/api/services/rag_pipeline/transform/notion_general_high_quality.json deleted file mode 100644 index d62674a0bb..0000000000 --- a/api/services/rag_pipeline/transform/notion_general_high_quality.json +++ /dev/null @@ -1 +0,0 @@ -{"nodes": [{"data": {"chunk_structure": "hierarchical_model", "embedding_model": "text-embedding-ada-002", "embedding_model_provider": "langgenius/openai/openai", "index_chunk_variable_selector": ["1752490343805", "result"], "indexing_technique": "high_quality", "keyword_number": 10, "retrieval_model": {"score_threshold": 0.5, "score_threshold_enabled": false, "search_method": "semantic_search", "top_k": 3, "vector_setting": {"embedding_model_name": "text-embedding-ada-002", "embedding_provider_name": "langgenius/openai/openai"}}, "selected": false, "title": "\u77e5\u8bc6\u5e93", "type": "knowledge-index"}, "height": 114, "id": "1752477924228", "position": {"x": 1486.2052698032674, "y": 281.3910724383104}, "positionAbsolute": {"x": 1486.2052698032674, "y": 281.3910724383104}, "selected": false, "sourcePosition": "right", "targetPosition": "left", "type": "custom", "width": 242}, {"data": {"datasource_configurations": {}, "datasource_label": "Notion\u6570\u636e\u6e90", "datasource_name": "notion_datasource", "datasource_parameters": {}, "plugin_id": "langgenius/notion_datasource", "provider_name": "notion", "provider_type": "online_document", "selected": false, "title": "Notion\u6570\u636e\u6e90", "type": "datasource"}, "height": 52, "id": "1752489759475", "position": {"x": 736.9082104000458, "y": 281.3910724383104}, "positionAbsolute": {"x": 736.9082104000458, "y": 281.3910724383104}, "sourcePosition": "right", "targetPosition": "left", "type": "custom", "width": 242, "selected": false}, {"id": "1752490343805", "type": "custom", "data": {"tool_parameters": {"input_text": {"type": "mixed", "value": "{{#1752489759475.content#}}"}, "max_length": {"type": "variable", "value": ["rag", "shared", "max_chunk_length"]}, "separator": {"type": "mixed", "value": "{{#rag.shared.delimiter#}}"}, "subchunk_max_length": {"type": "variable", "value": ["rag", "shared", "child_max_chunk_length"]}, "subchunk_separator": {"type": "mixed", "value": "{{#rag.shared.child_delimiter#}}"}, "parent_mode": {"type": "variable", "value": ["rag", "shared", "parent_mode"]}, "remove_extra_spaces": {"type": "mixed", "value": "{{#rag.shared.replace_consecutive_spaces#}}"}, "remove_urls_emails": {"type": "mixed", "value": "{{#rag.shared.delete_urls_email#}}"}}, "tool_configurations": {}, "type": "tool", "title": "\u7236\u5b50\u5206\u5757\u5904\u7406\u5668", "provider_id": "langgenius/parent_child_chunk/parent_child_chunk", "provider_type": "builtin", "provider_name": "langgenius/parent_child_chunk/parent_child_chunk", "tool_name": "parent_child_chunk", "tool_label": "\u7236\u5b50\u5206\u5757\u5904\u7406\u5668", "tool_description": "\u5c06\u6587\u6863\u5904\u7406\u4e3a\u7236\u5b50\u5206\u5757\u7ed3\u6784", "is_team_authorization": true, "output_schema": {"properties": {"result": {"description": "Parent child chunks result", "items": {"type": "object"}, "type": "array"}}, "type": "object"}, "paramSchemas": [{"name": "input_text", "label": {"en_US": "Input text", "zh_Hans": "\u8f93\u5165\u6587\u672c", "pt_BR": "Input text", "ja_JP": "Input text"}, "placeholder": null, "scope": null, "auto_generate": null, "template": null, "required": true, "default": null, "min": null, "max": null, "precision": null, "options": [], "type": "string", "human_description": {"en_US": "The text you want to chunk.", "zh_Hans": "\u4f60\u60f3\u8981\u5206\u5757\u7684\u6587\u672c\u3002", "pt_BR": "The text you want to chunk.", "ja_JP": "The text you want to chunk."}, "form": "llm", "llm_description": "The text you want to chunk."}, {"name": "max_length", "label": {"en_US": "Maximum Length", "zh_Hans": "\u6700\u5927\u957f\u5ea6", "pt_BR": "Comprimento M\u00e1ximo", "ja_JP": "Maximum Length"}, "placeholder": null, "scope": null, "auto_generate": null, "template": null, "required": false, "default": 1024, "min": null, "max": null, "precision": null, "options": [], "type": "number", "human_description": {"en_US": "Maximum length for chunking", "zh_Hans": "\u7528\u4e8e\u5206\u5757\u7684\u6700\u5927\u957f\u5ea6", "pt_BR": "Comprimento m\u00e1ximo para divis\u00e3o", "ja_JP": "Maximum length for chunking"}, "form": "llm", "llm_description": "Maximum length allowed per chunk"}, {"name": "separator", "label": {"en_US": "Chunk Separator", "zh_Hans": "\u5206\u5757\u5206\u9694\u7b26", "pt_BR": "Separador de Divis\u00e3o", "ja_JP": "Chunk Separator"}, "placeholder": null, "scope": null, "auto_generate": null, "template": null, "required": false, "default": "\n\n", "min": null, "max": null, "precision": null, "options": [], "type": "string", "human_description": {"en_US": "Separator used for chunking", "zh_Hans": "\u7528\u4e8e\u5206\u5757\u7684\u5206\u9694\u7b26", "pt_BR": "Separador usado para divis\u00e3o", "ja_JP": "Separator used for chunking"}, "form": "llm", "llm_description": "The separator used to split chunks"}, {"name": "subchunk_max_length", "label": {"en_US": "Subchunk Maximum Length", "zh_Hans": "\u5b50\u5206\u5757\u6700\u5927\u957f\u5ea6", "pt_BR": "Comprimento M\u00e1ximo de Subdivis\u00e3o", "ja_JP": "Subchunk Maximum Length"}, "placeholder": null, "scope": null, "auto_generate": null, "template": null, "required": false, "default": 512, "min": null, "max": null, "precision": null, "options": [], "type": "number", "human_description": {"en_US": "Maximum length for subchunking", "zh_Hans": "\u7528\u4e8e\u5b50\u5206\u5757\u7684\u6700\u5927\u957f\u5ea6", "pt_BR": "Comprimento m\u00e1ximo para subdivis\u00e3o", "ja_JP": "Maximum length for subchunking"}, "form": "llm", "llm_description": "Maximum length allowed per subchunk"}, {"name": "subchunk_separator", "label": {"en_US": "Subchunk Separator", "zh_Hans": "\u5b50\u5206\u5757\u5206\u9694\u7b26", "pt_BR": "Separador de Subdivis\u00e3o", "ja_JP": "Subchunk Separator"}, "placeholder": null, "scope": null, "auto_generate": null, "template": null, "required": false, "default": ". ", "min": null, "max": null, "precision": null, "options": [], "type": "string", "human_description": {"en_US": "Separator used for subchunking", "zh_Hans": "\u7528\u4e8e\u5b50\u5206\u5757\u7684\u5206\u9694\u7b26", "pt_BR": "Separador usado para subdivis\u00e3o", "ja_JP": "Separator used for subchunking"}, "form": "llm", "llm_description": "The separator used to split subchunks"}, {"name": "parent_mode", "label": {"en_US": "Parent Mode", "zh_Hans": "\u7236\u5757\u6a21\u5f0f", "pt_BR": "Modo Pai", "ja_JP": "Parent Mode"}, "placeholder": null, "scope": null, "auto_generate": null, "template": null, "required": true, "default": "paragraph", "min": null, "max": null, "precision": null, "options": [{"value": "paragraph", "label": {"en_US": "Paragraph", "zh_Hans": "\u6bb5\u843d", "pt_BR": "Par\u00e1grafo", "ja_JP": "Paragraph"}, "icon": ""}, {"value": "full_doc", "label": {"en_US": "Full Document", "zh_Hans": "\u5168\u6587", "pt_BR": "Documento Completo", "ja_JP": "Full Document"}, "icon": ""}], "type": "select", "human_description": {"en_US": "Split text into paragraphs based on separator and maximum chunk length, using split text as parent block or entire document as parent block and directly retrieve.", "zh_Hans": "\u6839\u636e\u5206\u9694\u7b26\u548c\u6700\u5927\u5757\u957f\u5ea6\u5c06\u6587\u672c\u62c6\u5206\u4e3a\u6bb5\u843d\uff0c\u4f7f\u7528\u62c6\u5206\u6587\u672c\u4f5c\u4e3a\u68c0\u7d22\u7684\u7236\u5757\u6216\u6574\u4e2a\u6587\u6863\u7528\u4f5c\u7236\u5757\u5e76\u76f4\u63a5\u68c0\u7d22\u3002", "pt_BR": "Dividir texto em par\u00e1grafos com base no separador e no comprimento m\u00e1ximo do bloco, usando o texto dividido como bloco pai ou documento completo como bloco pai e diretamente recuper\u00e1-lo.", "ja_JP": "Split text into paragraphs based on separator and maximum chunk length, using split text as parent block or entire document as parent block and directly retrieve."}, "form": "llm", "llm_description": "Split text into paragraphs based on separator and maximum chunk length, using split text as parent block or entire document as parent block and directly retrieve."}, {"name": "remove_extra_spaces", "label": {"en_US": "Remove Extra Spaces", "zh_Hans": "\u79fb\u9664\u591a\u4f59\u7a7a\u683c", "pt_BR": "Remover Espa\u00e7os Extras", "ja_JP": "Remove Extra Spaces"}, "placeholder": null, "scope": null, "auto_generate": null, "template": null, "required": false, "default": 0, "min": null, "max": null, "precision": null, "options": [], "type": "boolean", "human_description": {"en_US": "Whether to remove extra spaces in the text", "zh_Hans": "\u662f\u5426\u79fb\u9664\u6587\u672c\u4e2d\u7684\u591a\u4f59\u7a7a\u683c", "pt_BR": "Se deve remover espa\u00e7os extras no texto", "ja_JP": "Whether to remove extra spaces in the text"}, "form": "llm", "llm_description": "Whether to remove extra spaces in the text"}, {"name": "remove_urls_emails", "label": {"en_US": "Remove URLs and Emails", "zh_Hans": "\u79fb\u9664URL\u548c\u7535\u5b50\u90ae\u4ef6\u5730\u5740", "pt_BR": "Remover URLs e E-mails", "ja_JP": "Remove URLs and Emails"}, "placeholder": null, "scope": null, "auto_generate": null, "template": null, "required": false, "default": 0, "min": null, "max": null, "precision": null, "options": [], "type": "boolean", "human_description": {"en_US": "Whether to remove URLs and emails in the text", "zh_Hans": "\u662f\u5426\u79fb\u9664\u6587\u672c\u4e2d\u7684URL\u548c\u7535\u5b50\u90ae\u4ef6\u5730\u5740", "pt_BR": "Se deve remover URLs e e-mails no texto", "ja_JP": "Whether to remove URLs and emails in the text"}, "form": "llm", "llm_description": "Whether to remove URLs and emails in the text"}], "params": {"input_text": "", "max_length": "", "separator": "", "subchunk_max_length": "", "subchunk_separator": "", "parent_mode": "", "remove_extra_spaces": "", "remove_urls_emails": ""}, "selected": true}, "position": {"x": 1077.0240183162543, "y": 281.3910724383104}, "targetPosition": "left", "sourcePosition": "right", "positionAbsolute": {"x": 1077.0240183162543, "y": 281.3910724383104}, "width": 242, "height": 52, "selected": true}], "edges": [{"id": "1752489759475-source-1752490343805-target", "type": "custom", "source": "1752489759475", "sourceHandle": "source", "target": "1752490343805", "targetHandle": "target", "data": {"sourceType": "datasource", "targetType": "tool", "isInIteration": false, "isInLoop": false}, "zIndex": 0}, {"id": "1752490343805-source-1752477924228-target", "type": "custom", "source": "1752490343805", "target": "1752477924228", "sourceHandle": "source", "targetHandle": "target", "data": {"sourceType": "tool", "targetType": "knowledge-index", "isInLoop": false}, "zIndex": 0}], "viewport": {"x": -487.2912544090391, "y": -54.7029301848807, "zoom": 0.9994011715768695}} \ No newline at end of file diff --git a/api/services/rag_pipeline/transform/notion_parent_child.json b/api/services/rag_pipeline/transform/notion_parent_child.json deleted file mode 100644 index 82ac85ff41..0000000000 --- a/api/services/rag_pipeline/transform/notion_parent_child.json +++ /dev/null @@ -1 +0,0 @@ -{"nodes": [{"id": "1752477924228", "type": "custom", "data": {"index_chunk_variable_selector": ["1752482151668", "result"], "keyword_number": 10, "retrieval_model": {"top_k": 3, "score_threshold_enabled": false, "score_threshold": 0.5, "search_method": "semantic_search", "vector_setting": {"embedding_provider_name": "langgenius/openai/openai", "embedding_model_name": "text-embedding-ada-002"}}, "type": "knowledge-index", "title": "\u77e5\u8bc6\u5e93", "selected": false, "chunk_structure": "text_model", "indexing_technique": "high_quality", "embedding_model": "text-embedding-ada-002", "embedding_model_provider": "langgenius/openai/openai"}, "position": {"x": 1076.4656678451215, "y": 281.3910724383104}, "targetPosition": "left", "sourcePosition": "right", "positionAbsolute": {"x": 1076.4656678451215, "y": 281.3910724383104}, "width": 242, "height": 114, "selected": false}, {"id": "1752479895761", "type": "custom", "data": {"datasource_parameters": {}, "datasource_configurations": {}, "type": "datasource", "title": "File", "plugin_id": "langgenius/file", "provider_type": "local_file", "provider_name": "file", "datasource_name": "upload-file", "datasource_label": "File", "selected": false, "fileExtensions": ["txt", "markdown", "mdx", "pdf", "html", "xlsx", "xls", "vtt", "properties", "doc", "docx", "csv", "eml", "msg", "pptx", "xml", "epub", "ppt", "md"]}, "position": {"x": -839.8603427660498, "y": 251.3910724383104}, "targetPosition": "left", "sourcePosition": "right", "positionAbsolute": {"x": -839.8603427660498, "y": 251.3910724383104}, "width": 242, "height": 52, "selected": false}, {"id": "1752480460682", "type": "custom", "data": {"tool_parameters": {"file": {"type": "variable", "value": ["1752479895761", "file"]}}, "tool_configurations": {}, "type": "tool", "title": "Dify\u6587\u672c\u63d0\u53d6\u5668", "provider_id": "langgenius/dify_extractor/dify_extractor", "provider_type": "builtin", "provider_name": "langgenius/dify_extractor/dify_extractor", "tool_name": "dify_extractor", "tool_label": "Dify\u6587\u672c\u63d0\u53d6\u5668", "tool_description": "Dify Extractor", "is_team_authorization": true, "output_schema": {"properties": {"documents": {"description": "the documents extracted from the file", "items": {"type": "object"}, "type": "array"}, "images": {"description": "The images extracted from the file", "items": {"type": "object"}, "type": "array"}}, "type": "object"}, "paramSchemas": [{"name": "file", "label": {"en_US": "file", "zh_Hans": "file", "pt_BR": "file", "ja_JP": "file"}, "placeholder": null, "scope": null, "auto_generate": null, "template": null, "required": true, "default": null, "min": null, "max": null, "precision": null, "options": [], "type": "file", "human_description": {"en_US": "the file to be parsed(support pdf, ppt, pptx, doc, docx, png, jpg, jpeg)", "zh_Hans": "\u7528\u4e8e\u89e3\u6790\u7684\u6587\u4ef6(\u652f\u6301 pdf, ppt, pptx, doc, docx, png, jpg, jpeg)", "pt_BR": "o arquivo a ser analisado (suporta pdf, ppt, pptx, doc, docx, png, jpg, jpeg)", "ja_JP": "the file to be parsed(support pdf, ppt, pptx, doc, docx, png, jpg, jpeg)"}, "form": "llm", "llm_description": "the file to be parsed (support pdf, ppt, pptx, doc, docx, png, jpg, jpeg)"}], "params": {"file": ""}, "selected": false}, "position": {"x": -108.28652292656551, "y": 281.3910724383104}, "targetPosition": "left", "sourcePosition": "right", "positionAbsolute": {"x": -108.28652292656551, "y": 281.3910724383104}, "width": 242, "height": 52, "selected": false}, {"id": "1752481112180", "type": "custom", "data": {"variable_selector": ["1752479895761", "file"], "is_array_file": false, "type": "document-extractor", "title": "\u6587\u6863\u63d0\u53d6\u5668", "selected": false}, "position": {"x": -108.28652292656551, "y": 390.6576481692478}, "targetPosition": "left", "sourcePosition": "right", "positionAbsolute": {"x": -108.28652292656551, "y": 390.6576481692478}, "width": 242, "height": 90, "selected": false}, {"id": "1752481129417", "type": "custom", "data": {"cases": [{"id": "24e47cad-f1e2-4f74-9884-3f49d5bb37b7", "case_id": "24e47cad-f1e2-4f74-9884-3f49d5bb37b7", "logical_operator": "or", "conditions": [{"id": "9da88d93-3ff6-463f-abfd-6bcafbf2554d", "varType": "file", "variable_selector": ["1752479895761", "file", "extension"], "comparison_operator": "is", "value": ".xlsx"}, {"id": "d0e88f5e-dfe3-4bae-af0c-dbec267500de", "varType": "file", "variable_selector": ["1752479895761", "file", "extension"], "comparison_operator": "is", "value": ".xls"}, {"id": "a957e91e-1ed7-4c6b-9c80-2f0948858f1d", "varType": "file", "variable_selector": ["1752479895761", "file", "extension"], "comparison_operator": "is", "value": ".md"}, {"id": "870c3c39-8d3f-474a-ab8b-9c0ccf53db73", "varType": "file", "variable_selector": ["1752479895761", "file", "extension"], "comparison_operator": "is", "value": ".markdown"}, {"id": "f9541513-1e71-4dc1-9db5-35dc84a39e3c", "varType": "file", "variable_selector": ["1752479895761", "file", "extension"], "comparison_operator": "is", "value": ".mdx"}, {"id": "4c7f455b-ac20-40ca-9495-6cc44ffcb35d", "varType": "file", "variable_selector": ["1752479895761", "file", "extension"], "comparison_operator": "is", "value": ".html"}, {"id": "2e12d9c7-8057-4a09-8851-f9fd1d0718d1", "varType": "file", "variable_selector": ["1752479895761", "file", "extension"], "comparison_operator": "is", "value": ".htm"}, {"id": "73a995a9-d8b9-4aef-89f7-306e2ddcbce2", "varType": "file", "variable_selector": ["1752479895761", "file", "extension"], "comparison_operator": "is", "value": ".docx"}, {"id": "8a2e8772-0426-458b-a1f9-9eaaec0f27c8", "varType": "file", "variable_selector": ["1752479895761", "file", "extension"], "comparison_operator": "is", "value": ".csv"}, {"id": "aa2cb6b6-a2fc-462a-a9f5-c9c3f33a1602", "varType": "file", "variable_selector": ["1752479895761", "file", "extension"], "comparison_operator": "is", "value": ".txt"}]}], "type": "if-else", "title": "\u6761\u4ef6\u5206\u652f", "selected": false}, "position": {"x": -489.57009543377865, "y": 251.3910724383104}, "targetPosition": "left", "sourcePosition": "right", "positionAbsolute": {"x": -489.57009543377865, "y": 251.3910724383104}, "width": 242, "height": 358, "selected": true}, {"id": "1752482022496", "type": "custom", "data": {"output_type": "string", "variables": [["1752481112180", "text"], ["1752480460682", "text"]], "type": "variable-aggregator", "title": "\u53d8\u91cf\u805a\u5408\u5668", "selected": false, "advanced_settings": {"group_enabled": false, "groups": [{"output_type": "string", "variables": [["1752481112180", "text"], ["1752480460682", "text"]], "group_name": "Group1", "groupId": "f4cf07b4-914d-4544-8ef8-0c5d9e4f21a7"}]}}, "position": {"x": 319.441649575055, "y": 281.3910724383104}, "targetPosition": "left", "sourcePosition": "right", "positionAbsolute": {"x": 319.441649575055, "y": 281.3910724383104}, "width": 242, "height": 129, "selected": false}, {"id": "1752482151668", "type": "custom", "data": {"tool_parameters": {"input_variable": {"type": "mixed", "value": "{{#1752482022496.output#}}"}, "delimiter": {"type": "mixed", "value": "{{#rag.shared.delimiter#}}"}, "max_chunk_length": {"type": "variable", "value": ["rag", "shared", "max_chunk_length"]}, "chunk_overlap_length": {"type": "variable", "value": ["rag", "shared", "chunk_overlap"]}, "replace_consecutive_spaces_newlines_tabs": {"type": "mixed", "value": "{{#rag.shared.replace_consecutive_spaces#}}"}, "delete_all_urls_and_email_addresses": {"type": "mixed", "value": "{{#rag.shared.delete_urls_email#}}"}}, "tool_configurations": {}, "type": "tool", "title": "\u901a\u7528\u6587\u672c\u5206\u5757", "provider_id": "langgenius/general_chunk/general_chunk", "provider_type": "builtin", "provider_name": "langgenius/general_chunk/general_chunk", "tool_name": "general_chunk", "tool_label": "\u901a\u7528\u6587\u672c\u5206\u5757", "tool_description": "\u4e00\u4e2a\u7528\u4e8e\u901a\u7528\u6587\u672c\u5206\u5757\u6a21\u5f0f\u7684\u5de5\u5177\uff0c\u68c0\u7d22\u548c\u53ec\u56de\u7684\u5757\u662f\u76f8\u540c\u7684\u3002", "is_team_authorization": true, "output_schema": {"properties": {"result": {"description": "The result of the general chunk tool.", "properties": {"general_chunks": {"items": {"description": "The chunk of the text.", "type": "string"}, "type": "array"}}, "type": "object"}}, "type": "object"}, "paramSchemas": [{"name": "input_variable", "label": {"en_US": "Input Variable", "zh_Hans": "\u8f93\u5165\u53d8\u91cf", "pt_BR": "Input Variable", "ja_JP": "Input Variable"}, "placeholder": null, "scope": null, "auto_generate": null, "template": null, "required": true, "default": null, "min": null, "max": null, "precision": null, "options": [], "type": "string", "human_description": {"en_US": "The text you want to chunk.", "zh_Hans": "\u4f60\u60f3\u8981\u5206\u5757\u7684\u6587\u672c\u3002", "pt_BR": "The text you want to chunk.", "ja_JP": "The text you want to chunk."}, "form": "llm", "llm_description": "The text you want to chunk."}, {"name": "delimiter", "label": {"en_US": "Delimiter", "zh_Hans": "\u5206\u9694\u7b26", "pt_BR": "Delimiter", "ja_JP": "Delimiter"}, "placeholder": null, "scope": null, "auto_generate": null, "template": null, "required": true, "default": null, "min": null, "max": null, "precision": null, "options": [], "type": "string", "human_description": {"en_US": "The delimiter of the chunks.", "zh_Hans": "\u5757\u7684\u5206\u9694\u7b26\u3002", "pt_BR": "The delimiter of the chunks.", "ja_JP": "The delimiter of the chunks."}, "form": "llm", "llm_description": "The delimiter of the chunks, the format of the delimiter must be a string."}, {"name": "max_chunk_length", "label": {"en_US": "Maximum Chunk Length", "zh_Hans": "\u6700\u5927\u5757\u7684\u957f\u5ea6", "pt_BR": "Maximum Chunk Length", "ja_JP": "Maximum Chunk Length"}, "placeholder": null, "scope": null, "auto_generate": null, "template": null, "required": true, "default": null, "min": null, "max": null, "precision": null, "options": [], "type": "number", "human_description": {"en_US": "The maximum chunk length.", "zh_Hans": "\u6700\u5927\u5757\u7684\u957f\u5ea6\u3002", "pt_BR": "The maximum chunk length.", "ja_JP": "The maximum chunk length."}, "form": "llm", "llm_description": "The maximum chunk length, the format of the chunk size must be an integer."}, {"name": "chunk_overlap_length", "label": {"en_US": "Chunk Overlap Length", "zh_Hans": "\u5757\u7684\u91cd\u53e0\u957f\u5ea6", "pt_BR": "Chunk Overlap Length", "ja_JP": "Chunk Overlap Length"}, "placeholder": null, "scope": null, "auto_generate": null, "template": null, "required": false, "default": null, "min": null, "max": null, "precision": null, "options": [], "type": "number", "human_description": {"en_US": "The chunk overlap length.", "zh_Hans": "\u5757\u7684\u91cd\u53e0\u957f\u5ea6\u3002", "pt_BR": "The chunk overlap length.", "ja_JP": "The chunk overlap length."}, "form": "llm", "llm_description": "The chunk overlap length, the format of the chunk overlap length must be an integer."}, {"name": "replace_consecutive_spaces_newlines_tabs", "label": {"en_US": "Replace Consecutive Spaces, Newlines and Tabs", "zh_Hans": "\u66ff\u6362\u8fde\u7eed\u7684\u7a7a\u683c\u3001\u6362\u884c\u7b26\u548c\u5236\u8868\u7b26", "pt_BR": "Replace Consecutive Spaces, Newlines and Tabs", "ja_JP": "Replace Consecutive Spaces, Newlines and Tabs"}, "placeholder": null, "scope": null, "auto_generate": null, "template": null, "required": false, "default": null, "min": null, "max": null, "precision": null, "options": [], "type": "boolean", "human_description": {"en_US": "Replace consecutive spaces, newlines and tabs", "zh_Hans": "\u66ff\u6362\u8fde\u7eed\u7684\u7a7a\u683c\u3001\u6362\u884c\u7b26\u548c\u5236\u8868\u7b26", "pt_BR": "Replace consecutive spaces, newlines and tabs", "ja_JP": "Replace consecutive spaces, newlines and tabs"}, "form": "llm", "llm_description": "Replace consecutive spaces, newlines and tabs, the format of the replace must be a boolean."}, {"name": "delete_all_urls_and_email_addresses", "label": {"en_US": "Delete All URLs and Email Addresses", "zh_Hans": "\u5220\u9664\u6240\u6709URL\u548c\u7535\u5b50\u90ae\u4ef6\u5730\u5740", "pt_BR": "Delete All URLs and Email Addresses", "ja_JP": "Delete All URLs and Email Addresses"}, "placeholder": null, "scope": null, "auto_generate": null, "template": null, "required": false, "default": null, "min": null, "max": null, "precision": null, "options": [], "type": "boolean", "human_description": {"en_US": "Delete all URLs and email addresses", "zh_Hans": "\u5220\u9664\u6240\u6709URL\u548c\u7535\u5b50\u90ae\u4ef6\u5730\u5740", "pt_BR": "Delete all URLs and email addresses", "ja_JP": "Delete all URLs and email addresses"}, "form": "llm", "llm_description": "Delete all URLs and email addresses, the format of the delete must be a boolean."}], "params": {"input_variable": "", "delimiter": "", "max_chunk_length": "", "chunk_overlap_length": "", "replace_consecutive_spaces_newlines_tabs": "", "delete_all_urls_and_email_addresses": ""}, "selected": false}, "position": {"x": 693.5300771507484, "y": 281.3910724383104}, "targetPosition": "left", "sourcePosition": "right", "positionAbsolute": {"x": 693.5300771507484, "y": 281.3910724383104}, "width": 242, "height": 52, "selected": false}], "edges": [{"id": "1752479895761-source-1752481129417-target", "type": "custom", "source": "1752479895761", "sourceHandle": "source", "target": "1752481129417", "targetHandle": "target", "data": {"sourceType": "datasource", "targetType": "if-else", "isInIteration": false, "isInLoop": false}, "zIndex": 0}, {"id": "1752481129417-24e47cad-f1e2-4f74-9884-3f49d5bb37b7-1752480460682-target", "type": "custom", "source": "1752481129417", "target": "1752480460682", "sourceHandle": "24e47cad-f1e2-4f74-9884-3f49d5bb37b7", "targetHandle": "target", "data": {"sourceType": "if-else", "targetType": "tool", "isInLoop": false}, "zIndex": 0}, {"id": "1752481129417-false-1752481112180-target", "type": "custom", "source": "1752481129417", "target": "1752481112180", "sourceHandle": "false", "targetHandle": "target", "data": {"sourceType": "if-else", "targetType": "document-extractor", "isInLoop": false}, "zIndex": 0}, {"id": "1752480460682-source-1752482022496-target", "type": "custom", "source": "1752480460682", "sourceHandle": "source", "target": "1752482022496", "targetHandle": "target", "data": {"sourceType": "tool", "targetType": "variable-aggregator", "isInIteration": false, "isInLoop": false}, "zIndex": 0}, {"id": "1752481112180-source-1752482022496-target", "type": "custom", "source": "1752481112180", "target": "1752482022496", "sourceHandle": "source", "targetHandle": "target", "data": {"sourceType": "document-extractor", "targetType": "variable-aggregator", "isInLoop": false}, "zIndex": 0}, {"id": "1752482022496-source-1752482151668-target", "type": "custom", "source": "1752482022496", "sourceHandle": "source", "target": "1752482151668", "targetHandle": "target", "data": {"sourceType": "variable-aggregator", "targetType": "tool", "isInIteration": false, "isInLoop": false}, "zIndex": 0}, {"id": "1752482151668-source-1752477924228-target", "type": "custom", "source": "1752482151668", "sourceHandle": "source", "target": "1752477924228", "targetHandle": "target", "data": {"sourceType": "tool", "targetType": "knowledge-index", "isInIteration": false, "isInLoop": false}, "zIndex": 0}], "viewport": {"x": 701.4999626224237, "y": 128.33739021504016, "zoom": 0.48941689643726966}} \ No newline at end of file diff --git a/api/services/rag_pipeline/transform/web_crawl_general_economy.json b/api/services/rag_pipeline/transform/web_crawl_general_economy.json deleted file mode 100644 index 7fa9ae95bd..0000000000 --- a/api/services/rag_pipeline/transform/web_crawl_general_economy.json +++ /dev/null @@ -1 +0,0 @@ -{"nodes": [{"data": {"chunk_structure": "text_model", "embedding_model": "text-embedding-ada-002", "embedding_model_provider": "langgenius/openai/openai", "index_chunk_variable_selector": ["1752569675978", "result"], "indexing_technique": "economy", "keyword_number": 10, "retrieval_model": {"score_threshold": 0.5, "score_threshold_enabled": false, "search_method": "keyword_search", "top_k": 3, "vector_setting": {"embedding_model_name": "text-embedding-ada-002", "embedding_provider_name": "langgenius/openai/openai"}}, "selected": false, "title": "\u77e5\u8bc6\u5e93", "type": "knowledge-index"}, "height": 114, "id": "1752477924228", "position": {"x": 2140.4053851189346, "y": 281.3910724383104}, "positionAbsolute": {"x": 2140.4053851189346, "y": 281.3910724383104}, "selected": true, "sourcePosition": "right", "targetPosition": "left", "type": "custom", "width": 242}, {"data": {"datasource_configurations": {}, "datasource_label": "Jina Reader", "datasource_name": "jina_reader", "datasource_parameters": {"crawl_sub_pages": {"type": "mixed", "value": "{{#rag.1752491761974.jina_crawl_sub_pages#}}"}, "limit": {"type": "variable", "value": ["rag", "1752491761974", "jina_limit"]}, "url": {"type": "mixed", "value": "{{#rag.1752491761974.jina_url#}}"}, "use_sitemap": {"type": "mixed", "value": "{{#rag.1752491761974.jina_use_sitemap#}}"}}, "plugin_id": "langgenius/jina_datasource", "provider_name": "jina", "provider_type": "website_crawl", "selected": false, "title": "Jina Reader", "type": "datasource"}, "height": 52, "id": "1752491761974", "position": {"x": 1067.7526055798794, "y": 281.3910724383104}, "positionAbsolute": {"x": 1067.7526055798794, "y": 281.3910724383104}, "selected": false, "sourcePosition": "right", "targetPosition": "left", "type": "custom", "width": 242}, {"data": {"datasource_configurations": {}, "datasource_label": "Firecrawl", "datasource_name": "crawl", "datasource_parameters": {"crawl_subpages": {"type": "mixed", "value": "{{#rag.1752565402678.firecrawl_crawl_sub_pages#}}"}, "exclude_paths": {"type": "mixed", "value": "{{#rag.1752565402678.firecrawl_exclude_paths#}}"}, "include_paths": {"type": "mixed", "value": "{{#rag.1752565402678.firecrawl_include_only_paths#}}"}, "limit": {"type": "variable", "value": ["rag", "1752565402678", "firecrawl_limit"]}, "max_depth": {"type": "variable", "value": ["rag", "1752565402678", "firecrawl_max_depth"]}, "only_main_content": {"type": "mixed", "value": "{{#rag.1752565402678.firecrawl_extract_main_content#}}"}, "url": {"type": "mixed", "value": "{{#rag.1752565402678.firecrawl_url#}}"}}, "plugin_id": "langgenius/firecrawl_datasource", "provider_name": "firecrawl", "provider_type": "website_crawl", "selected": false, "title": "Firecrawl", "type": "datasource"}, "height": 52, "id": "1752565402678", "position": {"x": 1067.7526055798794, "y": 417.32608398342404}, "positionAbsolute": {"x": 1067.7526055798794, "y": 417.32608398342404}, "selected": false, "sourcePosition": "right", "targetPosition": "left", "type": "custom", "width": 242}, {"data": {"output_type": "string", "selected": false, "title": "\u53d8\u91cf\u805a\u5408\u5668", "type": "variable-aggregator", "variables": [["1752491761974", "content"], ["1752565402678", "content"]]}, "height": 129, "id": "1752565435219", "position": {"x": 1505.4306671642219, "y": 281.3910724383104}, "positionAbsolute": {"x": 1505.4306671642219, "y": 281.3910724383104}, "selected": false, "sourcePosition": "right", "targetPosition": "left", "type": "custom", "width": 242}, {"id": "1752569675978", "type": "custom", "data": {"tool_parameters": {"input_variable": {"type": "mixed", "value": "{{#1752565435219.output#}}"}, "delimiter": {"type": "mixed", "value": "{{#rag.shared.delimiter#}}"}, "max_chunk_length": {"type": "variable", "value": ["rag", "shared", "max_chunk_length"]}, "chunk_overlap_length": {"type": "variable", "value": ["rag", "shared", "chunk_overlap"]}, "replace_consecutive_spaces_newlines_tabs": {"type": "mixed", "value": "{{#rag.shared.replace_consecutive_spaces#}}"}, "delete_all_urls_and_email_addresses": {"type": "mixed", "value": "{{#rag.shared.delete_urls_email#}}"}}, "tool_configurations": {}, "type": "tool", "title": "\u901a\u7528\u6587\u672c\u5206\u5757", "provider_id": "langgenius/general_chunk/general_chunk", "provider_type": "builtin", "provider_name": "langgenius/general_chunk/general_chunk", "tool_name": "general_chunk", "tool_label": "\u901a\u7528\u6587\u672c\u5206\u5757", "tool_description": "\u4e00\u4e2a\u7528\u4e8e\u901a\u7528\u6587\u672c\u5206\u5757\u6a21\u5f0f\u7684\u5de5\u5177\uff0c\u68c0\u7d22\u548c\u53ec\u56de\u7684\u5757\u662f\u76f8\u540c\u7684\u3002", "is_team_authorization": true, "output_schema": {"properties": {"result": {"description": "The result of the general chunk tool.", "properties": {"general_chunks": {"items": {"description": "The chunk of the text.", "type": "string"}, "type": "array"}}, "type": "object"}}, "type": "object"}, "paramSchemas": [{"name": "input_variable", "label": {"en_US": "Input Variable", "zh_Hans": "\u8f93\u5165\u53d8\u91cf", "pt_BR": "Input Variable", "ja_JP": "Input Variable"}, "placeholder": null, "scope": null, "auto_generate": null, "template": null, "required": true, "default": null, "min": null, "max": null, "precision": null, "options": [], "type": "string", "human_description": {"en_US": "The text you want to chunk.", "zh_Hans": "\u4f60\u60f3\u8981\u5206\u5757\u7684\u6587\u672c\u3002", "pt_BR": "The text you want to chunk.", "ja_JP": "The text you want to chunk."}, "form": "llm", "llm_description": "The text you want to chunk."}, {"name": "delimiter", "label": {"en_US": "Delimiter", "zh_Hans": "\u5206\u9694\u7b26", "pt_BR": "Delimiter", "ja_JP": "Delimiter"}, "placeholder": null, "scope": null, "auto_generate": null, "template": null, "required": true, "default": null, "min": null, "max": null, "precision": null, "options": [], "type": "string", "human_description": {"en_US": "The delimiter of the chunks.", "zh_Hans": "\u5757\u7684\u5206\u9694\u7b26\u3002", "pt_BR": "The delimiter of the chunks.", "ja_JP": "The delimiter of the chunks."}, "form": "llm", "llm_description": "The delimiter of the chunks, the format of the delimiter must be a string."}, {"name": "max_chunk_length", "label": {"en_US": "Maximum Chunk Length", "zh_Hans": "\u6700\u5927\u5757\u7684\u957f\u5ea6", "pt_BR": "Maximum Chunk Length", "ja_JP": "Maximum Chunk Length"}, "placeholder": null, "scope": null, "auto_generate": null, "template": null, "required": true, "default": null, "min": null, "max": null, "precision": null, "options": [], "type": "number", "human_description": {"en_US": "The maximum chunk length.", "zh_Hans": "\u6700\u5927\u5757\u7684\u957f\u5ea6\u3002", "pt_BR": "The maximum chunk length.", "ja_JP": "The maximum chunk length."}, "form": "llm", "llm_description": "The maximum chunk length, the format of the chunk size must be an integer."}, {"name": "chunk_overlap_length", "label": {"en_US": "Chunk Overlap Length", "zh_Hans": "\u5757\u7684\u91cd\u53e0\u957f\u5ea6", "pt_BR": "Chunk Overlap Length", "ja_JP": "Chunk Overlap Length"}, "placeholder": null, "scope": null, "auto_generate": null, "template": null, "required": false, "default": null, "min": null, "max": null, "precision": null, "options": [], "type": "number", "human_description": {"en_US": "The chunk overlap length.", "zh_Hans": "\u5757\u7684\u91cd\u53e0\u957f\u5ea6\u3002", "pt_BR": "The chunk overlap length.", "ja_JP": "The chunk overlap length."}, "form": "llm", "llm_description": "The chunk overlap length, the format of the chunk overlap length must be an integer."}, {"name": "replace_consecutive_spaces_newlines_tabs", "label": {"en_US": "Replace Consecutive Spaces, Newlines and Tabs", "zh_Hans": "\u66ff\u6362\u8fde\u7eed\u7684\u7a7a\u683c\u3001\u6362\u884c\u7b26\u548c\u5236\u8868\u7b26", "pt_BR": "Replace Consecutive Spaces, Newlines and Tabs", "ja_JP": "Replace Consecutive Spaces, Newlines and Tabs"}, "placeholder": null, "scope": null, "auto_generate": null, "template": null, "required": false, "default": null, "min": null, "max": null, "precision": null, "options": [], "type": "boolean", "human_description": {"en_US": "Replace consecutive spaces, newlines and tabs", "zh_Hans": "\u66ff\u6362\u8fde\u7eed\u7684\u7a7a\u683c\u3001\u6362\u884c\u7b26\u548c\u5236\u8868\u7b26", "pt_BR": "Replace consecutive spaces, newlines and tabs", "ja_JP": "Replace consecutive spaces, newlines and tabs"}, "form": "llm", "llm_description": "Replace consecutive spaces, newlines and tabs, the format of the replace must be a boolean."}, {"name": "delete_all_urls_and_email_addresses", "label": {"en_US": "Delete All URLs and Email Addresses", "zh_Hans": "\u5220\u9664\u6240\u6709URL\u548c\u7535\u5b50\u90ae\u4ef6\u5730\u5740", "pt_BR": "Delete All URLs and Email Addresses", "ja_JP": "Delete All URLs and Email Addresses"}, "placeholder": null, "scope": null, "auto_generate": null, "template": null, "required": false, "default": null, "min": null, "max": null, "precision": null, "options": [], "type": "boolean", "human_description": {"en_US": "Delete all URLs and email addresses", "zh_Hans": "\u5220\u9664\u6240\u6709URL\u548c\u7535\u5b50\u90ae\u4ef6\u5730\u5740", "pt_BR": "Delete all URLs and email addresses", "ja_JP": "Delete all URLs and email addresses"}, "form": "llm", "llm_description": "Delete all URLs and email addresses, the format of the delete must be a boolean."}], "params": {"input_variable": "", "delimiter": "", "max_chunk_length": "", "chunk_overlap_length": "", "replace_consecutive_spaces_newlines_tabs": "", "delete_all_urls_and_email_addresses": ""}, "selected": false}, "position": {"x": 1807.4306671642219, "y": 281.3910724383104}, "targetPosition": "left", "sourcePosition": "right", "positionAbsolute": {"x": 1807.4306671642219, "y": 281.3910724383104}, "width": 242, "height": 52}], "edges": [{"data": {"isInIteration": false, "isInLoop": false, "sourceType": "datasource", "targetType": "variable-aggregator"}, "id": "1752491761974-source-1752565435219-target", "source": "1752491761974", "sourceHandle": "source", "target": "1752565435219", "targetHandle": "target", "type": "custom", "zIndex": 0}, {"data": {"isInLoop": false, "sourceType": "datasource", "targetType": "variable-aggregator"}, "id": "1752565402678-source-1752565435219-target", "source": "1752565402678", "sourceHandle": "source", "target": "1752565435219", "targetHandle": "target", "type": "custom", "zIndex": 0}, {"id": "1752565435219-source-1752569675978-target", "type": "custom", "source": "1752565435219", "sourceHandle": "source", "target": "1752569675978", "targetHandle": "target", "data": {"sourceType": "variable-aggregator", "targetType": "tool", "isInIteration": false, "isInLoop": false}, "zIndex": 0}, {"id": "1752569675978-source-1752477924228-target", "type": "custom", "source": "1752569675978", "target": "1752477924228", "sourceHandle": "source", "targetHandle": "target", "data": {"sourceType": "tool", "targetType": "knowledge-index", "isInLoop": false}, "zIndex": 0}], "viewport": {"x": -707.721097109337, "y": -93.07807382100896, "zoom": 0.9350632198875476}} \ No newline at end of file diff --git a/api/services/rag_pipeline/transform/web_crawl_general_high_quality.json b/api/services/rag_pipeline/transform/web_crawl_general_high_quality.json deleted file mode 100644 index 25dfad4873..0000000000 --- a/api/services/rag_pipeline/transform/web_crawl_general_high_quality.json +++ /dev/null @@ -1 +0,0 @@ -{"nodes": [{"data": {"chunk_structure": "text_model", "embedding_model": "text-embedding-ada-002", "embedding_model_provider": "langgenius/openai/openai", "index_chunk_variable_selector": ["1752569675978", "result"], "indexing_technique": "high_quality", "keyword_number": 10, "retrieval_model": {"score_threshold": 0.5, "score_threshold_enabled": false, "search_method": "semantic_search", "top_k": 3, "vector_setting": {"embedding_model_name": "text-embedding-ada-002", "embedding_provider_name": "langgenius/openai/openai"}}, "selected": true, "title": "\u77e5\u8bc6\u5e93", "type": "knowledge-index"}, "height": 114, "id": "1752477924228", "position": {"x": 2140.4053851189346, "y": 281.3910724383104}, "positionAbsolute": {"x": 2140.4053851189346, "y": 281.3910724383104}, "selected": true, "sourcePosition": "right", "targetPosition": "left", "type": "custom", "width": 242}, {"data": {"datasource_configurations": {}, "datasource_label": "Jina Reader", "datasource_name": "jina_reader", "datasource_parameters": {"crawl_sub_pages": {"type": "mixed", "value": "{{#rag.1752491761974.jina_crawl_sub_pages#}}"}, "limit": {"type": "variable", "value": ["rag", "1752491761974", "jina_limit"]}, "url": {"type": "mixed", "value": "{{#rag.1752491761974.jina_url#}}"}, "use_sitemap": {"type": "mixed", "value": "{{#rag.1752491761974.jina_use_sitemap#}}"}}, "plugin_id": "langgenius/jina_datasource", "provider_name": "jina", "provider_type": "website_crawl", "selected": false, "title": "Jina Reader", "type": "datasource"}, "height": 52, "id": "1752491761974", "position": {"x": 1067.7526055798794, "y": 281.3910724383104}, "positionAbsolute": {"x": 1067.7526055798794, "y": 281.3910724383104}, "selected": false, "sourcePosition": "right", "targetPosition": "left", "type": "custom", "width": 242}, {"data": {"datasource_configurations": {}, "datasource_label": "Firecrawl", "datasource_name": "crawl", "datasource_parameters": {"crawl_subpages": {"type": "mixed", "value": "{{#rag.1752565402678.firecrawl_crawl_sub_pages#}}"}, "exclude_paths": {"type": "mixed", "value": "{{#rag.1752565402678.firecrawl_exclude_paths#}}"}, "include_paths": {"type": "mixed", "value": "{{#rag.1752565402678.firecrawl_include_only_paths#}}"}, "limit": {"type": "variable", "value": ["rag", "1752565402678", "firecrawl_limit"]}, "max_depth": {"type": "variable", "value": ["rag", "1752565402678", "firecrawl_max_depth"]}, "only_main_content": {"type": "mixed", "value": "{{#rag.1752565402678.firecrawl_extract_main_content#}}"}, "url": {"type": "mixed", "value": "{{#rag.1752565402678.firecrawl_url#}}"}}, "plugin_id": "langgenius/firecrawl_datasource", "provider_name": "firecrawl", "provider_type": "website_crawl", "selected": false, "title": "Firecrawl", "type": "datasource"}, "height": 52, "id": "1752565402678", "position": {"x": 1067.7526055798794, "y": 417.32608398342404}, "positionAbsolute": {"x": 1067.7526055798794, "y": 417.32608398342404}, "selected": false, "sourcePosition": "right", "targetPosition": "left", "type": "custom", "width": 242}, {"data": {"output_type": "string", "selected": false, "title": "\u53d8\u91cf\u805a\u5408\u5668", "type": "variable-aggregator", "variables": [["1752491761974", "content"], ["1752565402678", "content"]]}, "height": 129, "id": "1752565435219", "position": {"x": 1505.4306671642219, "y": 281.3910724383104}, "positionAbsolute": {"x": 1505.4306671642219, "y": 281.3910724383104}, "selected": false, "sourcePosition": "right", "targetPosition": "left", "type": "custom", "width": 242}, {"id": "1752569675978", "type": "custom", "data": {"tool_parameters": {"input_variable": {"type": "mixed", "value": "{{#1752565435219.output#}}"}, "delimiter": {"type": "mixed", "value": "{{#rag.shared.delimiter#}}"}, "max_chunk_length": {"type": "variable", "value": ["rag", "shared", "max_chunk_length"]}, "chunk_overlap_length": {"type": "variable", "value": ["rag", "shared", "chunk_overlap"]}, "replace_consecutive_spaces_newlines_tabs": {"type": "mixed", "value": "{{#rag.shared.replace_consecutive_spaces#}}"}, "delete_all_urls_and_email_addresses": {"type": "mixed", "value": "{{#rag.shared.delete_urls_email#}}"}}, "tool_configurations": {}, "type": "tool", "title": "\u901a\u7528\u6587\u672c\u5206\u5757", "provider_id": "langgenius/general_chunk/general_chunk", "provider_type": "builtin", "provider_name": "langgenius/general_chunk/general_chunk", "tool_name": "general_chunk", "tool_label": "\u901a\u7528\u6587\u672c\u5206\u5757", "tool_description": "\u4e00\u4e2a\u7528\u4e8e\u901a\u7528\u6587\u672c\u5206\u5757\u6a21\u5f0f\u7684\u5de5\u5177\uff0c\u68c0\u7d22\u548c\u53ec\u56de\u7684\u5757\u662f\u76f8\u540c\u7684\u3002", "is_team_authorization": true, "output_schema": {"properties": {"result": {"description": "The result of the general chunk tool.", "properties": {"general_chunks": {"items": {"description": "The chunk of the text.", "type": "string"}, "type": "array"}}, "type": "object"}}, "type": "object"}, "paramSchemas": [{"name": "input_variable", "label": {"en_US": "Input Variable", "zh_Hans": "\u8f93\u5165\u53d8\u91cf", "pt_BR": "Input Variable", "ja_JP": "Input Variable"}, "placeholder": null, "scope": null, "auto_generate": null, "template": null, "required": true, "default": null, "min": null, "max": null, "precision": null, "options": [], "type": "string", "human_description": {"en_US": "The text you want to chunk.", "zh_Hans": "\u4f60\u60f3\u8981\u5206\u5757\u7684\u6587\u672c\u3002", "pt_BR": "The text you want to chunk.", "ja_JP": "The text you want to chunk."}, "form": "llm", "llm_description": "The text you want to chunk."}, {"name": "delimiter", "label": {"en_US": "Delimiter", "zh_Hans": "\u5206\u9694\u7b26", "pt_BR": "Delimiter", "ja_JP": "Delimiter"}, "placeholder": null, "scope": null, "auto_generate": null, "template": null, "required": true, "default": null, "min": null, "max": null, "precision": null, "options": [], "type": "string", "human_description": {"en_US": "The delimiter of the chunks.", "zh_Hans": "\u5757\u7684\u5206\u9694\u7b26\u3002", "pt_BR": "The delimiter of the chunks.", "ja_JP": "The delimiter of the chunks."}, "form": "llm", "llm_description": "The delimiter of the chunks, the format of the delimiter must be a string."}, {"name": "max_chunk_length", "label": {"en_US": "Maximum Chunk Length", "zh_Hans": "\u6700\u5927\u5757\u7684\u957f\u5ea6", "pt_BR": "Maximum Chunk Length", "ja_JP": "Maximum Chunk Length"}, "placeholder": null, "scope": null, "auto_generate": null, "template": null, "required": true, "default": null, "min": null, "max": null, "precision": null, "options": [], "type": "number", "human_description": {"en_US": "The maximum chunk length.", "zh_Hans": "\u6700\u5927\u5757\u7684\u957f\u5ea6\u3002", "pt_BR": "The maximum chunk length.", "ja_JP": "The maximum chunk length."}, "form": "llm", "llm_description": "The maximum chunk length, the format of the chunk size must be an integer."}, {"name": "chunk_overlap_length", "label": {"en_US": "Chunk Overlap Length", "zh_Hans": "\u5757\u7684\u91cd\u53e0\u957f\u5ea6", "pt_BR": "Chunk Overlap Length", "ja_JP": "Chunk Overlap Length"}, "placeholder": null, "scope": null, "auto_generate": null, "template": null, "required": false, "default": null, "min": null, "max": null, "precision": null, "options": [], "type": "number", "human_description": {"en_US": "The chunk overlap length.", "zh_Hans": "\u5757\u7684\u91cd\u53e0\u957f\u5ea6\u3002", "pt_BR": "The chunk overlap length.", "ja_JP": "The chunk overlap length."}, "form": "llm", "llm_description": "The chunk overlap length, the format of the chunk overlap length must be an integer."}, {"name": "replace_consecutive_spaces_newlines_tabs", "label": {"en_US": "Replace Consecutive Spaces, Newlines and Tabs", "zh_Hans": "\u66ff\u6362\u8fde\u7eed\u7684\u7a7a\u683c\u3001\u6362\u884c\u7b26\u548c\u5236\u8868\u7b26", "pt_BR": "Replace Consecutive Spaces, Newlines and Tabs", "ja_JP": "Replace Consecutive Spaces, Newlines and Tabs"}, "placeholder": null, "scope": null, "auto_generate": null, "template": null, "required": false, "default": null, "min": null, "max": null, "precision": null, "options": [], "type": "boolean", "human_description": {"en_US": "Replace consecutive spaces, newlines and tabs", "zh_Hans": "\u66ff\u6362\u8fde\u7eed\u7684\u7a7a\u683c\u3001\u6362\u884c\u7b26\u548c\u5236\u8868\u7b26", "pt_BR": "Replace consecutive spaces, newlines and tabs", "ja_JP": "Replace consecutive spaces, newlines and tabs"}, "form": "llm", "llm_description": "Replace consecutive spaces, newlines and tabs, the format of the replace must be a boolean."}, {"name": "delete_all_urls_and_email_addresses", "label": {"en_US": "Delete All URLs and Email Addresses", "zh_Hans": "\u5220\u9664\u6240\u6709URL\u548c\u7535\u5b50\u90ae\u4ef6\u5730\u5740", "pt_BR": "Delete All URLs and Email Addresses", "ja_JP": "Delete All URLs and Email Addresses"}, "placeholder": null, "scope": null, "auto_generate": null, "template": null, "required": false, "default": null, "min": null, "max": null, "precision": null, "options": [], "type": "boolean", "human_description": {"en_US": "Delete all URLs and email addresses", "zh_Hans": "\u5220\u9664\u6240\u6709URL\u548c\u7535\u5b50\u90ae\u4ef6\u5730\u5740", "pt_BR": "Delete all URLs and email addresses", "ja_JP": "Delete all URLs and email addresses"}, "form": "llm", "llm_description": "Delete all URLs and email addresses, the format of the delete must be a boolean."}], "params": {"input_variable": "", "delimiter": "", "max_chunk_length": "", "chunk_overlap_length": "", "replace_consecutive_spaces_newlines_tabs": "", "delete_all_urls_and_email_addresses": ""}, "selected": false}, "position": {"x": 1807.4306671642219, "y": 281.3910724383104}, "targetPosition": "left", "sourcePosition": "right", "positionAbsolute": {"x": 1807.4306671642219, "y": 281.3910724383104}, "width": 242, "height": 52}], "edges": [{"data": {"isInIteration": false, "isInLoop": false, "sourceType": "datasource", "targetType": "variable-aggregator"}, "id": "1752491761974-source-1752565435219-target", "source": "1752491761974", "sourceHandle": "source", "target": "1752565435219", "targetHandle": "target", "type": "custom", "zIndex": 0}, {"data": {"isInLoop": false, "sourceType": "datasource", "targetType": "variable-aggregator"}, "id": "1752565402678-source-1752565435219-target", "source": "1752565402678", "sourceHandle": "source", "target": "1752565435219", "targetHandle": "target", "type": "custom", "zIndex": 0}, {"id": "1752565435219-source-1752569675978-target", "type": "custom", "source": "1752565435219", "sourceHandle": "source", "target": "1752569675978", "targetHandle": "target", "data": {"sourceType": "variable-aggregator", "targetType": "tool", "isInIteration": false, "isInLoop": false}, "zIndex": 0}, {"id": "1752569675978-source-1752477924228-target", "type": "custom", "source": "1752569675978", "target": "1752477924228", "sourceHandle": "source", "targetHandle": "target", "data": {"sourceType": "tool", "targetType": "knowledge-index", "isInLoop": false}, "zIndex": 0}], "viewport": {"x": -707.721097109337, "y": -93.07807382100896, "zoom": 0.9350632198875476}} \ No newline at end of file diff --git a/api/services/rag_pipeline/transform/web_crawl_parent_child.json b/api/services/rag_pipeline/transform/web_crawl_parent_child.json deleted file mode 100644 index 11a5a7cb48..0000000000 --- a/api/services/rag_pipeline/transform/web_crawl_parent_child.json +++ /dev/null @@ -1 +0,0 @@ -{"nodes": [{"data": {"chunk_structure": "hierarchical_model", "embedding_model": "text-embedding-ada-002", "embedding_model_provider": "langgenius/openai/openai", "index_chunk_variable_selector": ["1752490343805", "result"], "indexing_technique": "high_quality", "keyword_number": 10, "retrieval_model": {"score_threshold": 0.5, "score_threshold_enabled": false, "search_method": "semantic_search", "top_k": 3, "vector_setting": {"embedding_model_name": "text-embedding-ada-002", "embedding_provider_name": "langgenius/openai/openai"}}, "selected": false, "title": "\u77e5\u8bc6\u5e93", "type": "knowledge-index"}, "height": 114, "id": "1752477924228", "position": {"x": 2215.5544306817387, "y": 281.3910724383104}, "positionAbsolute": {"x": 2215.5544306817387, "y": 281.3910724383104}, "selected": false, "sourcePosition": "right", "targetPosition": "left", "type": "custom", "width": 242}, {"data": {"is_team_authorization": true, "output_schema": {"properties": {"result": {"description": "Parent child chunks result", "items": {"type": "object"}, "type": "array"}}, "type": "object"}, "paramSchemas": [{"auto_generate": null, "default": null, "form": "llm", "human_description": {"en_US": "The text you want to chunk.", "ja_JP": "The text you want to chunk.", "pt_BR": "The text you want to chunk.", "zh_Hans": "\u4f60\u60f3\u8981\u5206\u5757\u7684\u6587\u672c\u3002"}, "label": {"en_US": "Input text", "ja_JP": "Input text", "pt_BR": "Input text", "zh_Hans": "\u8f93\u5165\u6587\u672c"}, "llm_description": "The text you want to chunk.", "max": null, "min": null, "name": "input_text", "options": [], "placeholder": null, "precision": null, "required": true, "scope": null, "template": null, "type": "string"}, {"auto_generate": null, "default": 1024, "form": "llm", "human_description": {"en_US": "Maximum length for chunking", "ja_JP": "Maximum length for chunking", "pt_BR": "Comprimento m\u00e1ximo para divis\u00e3o", "zh_Hans": "\u7528\u4e8e\u5206\u5757\u7684\u6700\u5927\u957f\u5ea6"}, "label": {"en_US": "Maximum Length", "ja_JP": "Maximum Length", "pt_BR": "Comprimento M\u00e1ximo", "zh_Hans": "\u6700\u5927\u957f\u5ea6"}, "llm_description": "Maximum length allowed per chunk", "max": null, "min": null, "name": "max_length", "options": [], "placeholder": null, "precision": null, "required": false, "scope": null, "template": null, "type": "number"}, {"auto_generate": null, "default": "\n\n", "form": "llm", "human_description": {"en_US": "Separator used for chunking", "ja_JP": "Separator used for chunking", "pt_BR": "Separador usado para divis\u00e3o", "zh_Hans": "\u7528\u4e8e\u5206\u5757\u7684\u5206\u9694\u7b26"}, "label": {"en_US": "Chunk Separator", "ja_JP": "Chunk Separator", "pt_BR": "Separador de Divis\u00e3o", "zh_Hans": "\u5206\u5757\u5206\u9694\u7b26"}, "llm_description": "The separator used to split chunks", "max": null, "min": null, "name": "separator", "options": [], "placeholder": null, "precision": null, "required": false, "scope": null, "template": null, "type": "string"}, {"auto_generate": null, "default": 512, "form": "llm", "human_description": {"en_US": "Maximum length for subchunking", "ja_JP": "Maximum length for subchunking", "pt_BR": "Comprimento m\u00e1ximo para subdivis\u00e3o", "zh_Hans": "\u7528\u4e8e\u5b50\u5206\u5757\u7684\u6700\u5927\u957f\u5ea6"}, "label": {"en_US": "Subchunk Maximum Length", "ja_JP": "Subchunk Maximum Length", "pt_BR": "Comprimento M\u00e1ximo de Subdivis\u00e3o", "zh_Hans": "\u5b50\u5206\u5757\u6700\u5927\u957f\u5ea6"}, "llm_description": "Maximum length allowed per subchunk", "max": null, "min": null, "name": "subchunk_max_length", "options": [], "placeholder": null, "precision": null, "required": false, "scope": null, "template": null, "type": "number"}, {"auto_generate": null, "default": ". ", "form": "llm", "human_description": {"en_US": "Separator used for subchunking", "ja_JP": "Separator used for subchunking", "pt_BR": "Separador usado para subdivis\u00e3o", "zh_Hans": "\u7528\u4e8e\u5b50\u5206\u5757\u7684\u5206\u9694\u7b26"}, "label": {"en_US": "Subchunk Separator", "ja_JP": "Subchunk Separator", "pt_BR": "Separador de Subdivis\u00e3o", "zh_Hans": "\u5b50\u5206\u5757\u5206\u9694\u7b26"}, "llm_description": "The separator used to split subchunks", "max": null, "min": null, "name": "subchunk_separator", "options": [], "placeholder": null, "precision": null, "required": false, "scope": null, "template": null, "type": "string"}, {"auto_generate": null, "default": "paragraph", "form": "llm", "human_description": {"en_US": "Split text into paragraphs based on separator and maximum chunk length, using split text as parent block or entire document as parent block and directly retrieve.", "ja_JP": "Split text into paragraphs based on separator and maximum chunk length, using split text as parent block or entire document as parent block and directly retrieve.", "pt_BR": "Dividir texto em par\u00e1grafos com base no separador e no comprimento m\u00e1ximo do bloco, usando o texto dividido como bloco pai ou documento completo como bloco pai e diretamente recuper\u00e1-lo.", "zh_Hans": "\u6839\u636e\u5206\u9694\u7b26\u548c\u6700\u5927\u5757\u957f\u5ea6\u5c06\u6587\u672c\u62c6\u5206\u4e3a\u6bb5\u843d\uff0c\u4f7f\u7528\u62c6\u5206\u6587\u672c\u4f5c\u4e3a\u68c0\u7d22\u7684\u7236\u5757\u6216\u6574\u4e2a\u6587\u6863\u7528\u4f5c\u7236\u5757\u5e76\u76f4\u63a5\u68c0\u7d22\u3002"}, "label": {"en_US": "Parent Mode", "ja_JP": "Parent Mode", "pt_BR": "Modo Pai", "zh_Hans": "\u7236\u5757\u6a21\u5f0f"}, "llm_description": "Split text into paragraphs based on separator and maximum chunk length, using split text as parent block or entire document as parent block and directly retrieve.", "max": null, "min": null, "name": "parent_mode", "options": [{"icon": "", "label": {"en_US": "Paragraph", "ja_JP": "Paragraph", "pt_BR": "Par\u00e1grafo", "zh_Hans": "\u6bb5\u843d"}, "value": "paragraph"}, {"icon": "", "label": {"en_US": "Full Document", "ja_JP": "Full Document", "pt_BR": "Documento Completo", "zh_Hans": "\u5168\u6587"}, "value": "full_doc"}], "placeholder": null, "precision": null, "required": true, "scope": null, "template": null, "type": "select"}, {"auto_generate": null, "default": 0, "form": "llm", "human_description": {"en_US": "Whether to remove extra spaces in the text", "ja_JP": "Whether to remove extra spaces in the text", "pt_BR": "Se deve remover espa\u00e7os extras no texto", "zh_Hans": "\u662f\u5426\u79fb\u9664\u6587\u672c\u4e2d\u7684\u591a\u4f59\u7a7a\u683c"}, "label": {"en_US": "Remove Extra Spaces", "ja_JP": "Remove Extra Spaces", "pt_BR": "Remover Espa\u00e7os Extras", "zh_Hans": "\u79fb\u9664\u591a\u4f59\u7a7a\u683c"}, "llm_description": "Whether to remove extra spaces in the text", "max": null, "min": null, "name": "remove_extra_spaces", "options": [], "placeholder": null, "precision": null, "required": false, "scope": null, "template": null, "type": "boolean"}, {"auto_generate": null, "default": 0, "form": "llm", "human_description": {"en_US": "Whether to remove URLs and emails in the text", "ja_JP": "Whether to remove URLs and emails in the text", "pt_BR": "Se deve remover URLs e e-mails no texto", "zh_Hans": "\u662f\u5426\u79fb\u9664\u6587\u672c\u4e2d\u7684URL\u548c\u7535\u5b50\u90ae\u4ef6\u5730\u5740"}, "label": {"en_US": "Remove URLs and Emails", "ja_JP": "Remove URLs and Emails", "pt_BR": "Remover URLs e E-mails", "zh_Hans": "\u79fb\u9664URL\u548c\u7535\u5b50\u90ae\u4ef6\u5730\u5740"}, "llm_description": "Whether to remove URLs and emails in the text", "max": null, "min": null, "name": "remove_urls_emails", "options": [], "placeholder": null, "precision": null, "required": false, "scope": null, "template": null, "type": "boolean"}], "params": {"input_text": "", "max_length": "", "parent_mode": "", "remove_extra_spaces": "", "remove_urls_emails": "", "separator": "", "subchunk_max_length": "", "subchunk_separator": ""}, "provider_id": "langgenius/parent_child_chunk/parent_child_chunk", "provider_name": "langgenius/parent_child_chunk/parent_child_chunk", "provider_type": "builtin", "selected": true, "title": "\u7236\u5b50\u5206\u5757\u5904\u7406\u5668", "tool_configurations": {}, "tool_description": "\u5c06\u6587\u6863\u5904\u7406\u4e3a\u7236\u5b50\u5206\u5757\u7ed3\u6784", "tool_label": "\u7236\u5b50\u5206\u5757\u5904\u7406\u5668", "tool_name": "parent_child_chunk", "tool_parameters": {"input_text": {"type": "mixed", "value": "{{#1752565435219.output#}}"}, "max_length": {"type": "variable", "value": ["rag", "shared", "max_chunk_length"]}, "parent_mode": {"type": "variable", "value": ["rag", "shared", "parent_mode"]}, "separator": {"type": "mixed", "value": "{{#rag.shared.delimiter#}}"}, "subchunk_max_length": {"type": "variable", "value": ["rag", "shared", "child_max_chunk_length"]}, "subchunk_separator": {"type": "mixed", "value": "{{#rag.shared.child_delimiter#}}"}, "remove_extra_spaces": {"type": "mixed", "value": "{{#rag.shared.replace_consecutive_spaces#}}"}, "remove_urls_emails": {"type": "mixed", "value": "{{#rag.shared.delete_urls_email#}}"}}, "type": "tool"}, "height": 52, "id": "1752490343805", "position": {"x": 1853.5260563244174, "y": 281.3910724383104}, "positionAbsolute": {"x": 1853.5260563244174, "y": 281.3910724383104}, "selected": true, "sourcePosition": "right", "targetPosition": "left", "type": "custom", "width": 242}, {"id": "1752491761974", "type": "custom", "data": {"datasource_parameters": {"url": {"type": "mixed", "value": "{{#rag.1752491761974.jina_url#}}"}, "crawl_sub_pages": {"type": "mixed", "value": "{{#rag.1752491761974.jina_crawl_sub_pages#}}"}, "limit": {"type": "variable", "value": ["rag", "1752491761974", "jina_limit"]}, "use_sitemap": {"type": "mixed", "value": "{{#rag.1752491761974.jina_use_sitemap#}}"}}, "datasource_configurations": {}, "type": "datasource", "title": "Jina Reader", "plugin_id": "langgenius/jina_datasource", "provider_type": "website_crawl", "provider_name": "jina", "datasource_name": "jina_reader", "datasource_label": "Jina Reader", "selected": false}, "position": {"x": 1067.7526055798794, "y": 281.3910724383104}, "targetPosition": "left", "sourcePosition": "right", "positionAbsolute": {"x": 1067.7526055798794, "y": 281.3910724383104}, "width": 242, "height": 52, "selected": false}, {"id": "1752565402678", "type": "custom", "data": {"datasource_parameters": {"url": {"type": "mixed", "value": "{{#rag.1752565402678.firecrawl_url#}}"}, "crawl_subpages": {"type": "mixed", "value": "{{#rag.1752565402678.firecrawl_crawl_sub_pages#}}"}, "exclude_paths": {"type": "mixed", "value": "{{#rag.1752565402678.firecrawl_exclude_paths#}}"}, "include_paths": {"type": "mixed", "value": "{{#rag.1752565402678.firecrawl_include_only_paths#}}"}, "max_depth": {"type": "variable", "value": ["rag", "1752565402678", "firecrawl_max_depth"]}, "limit": {"type": "variable", "value": ["rag", "1752565402678", "firecrawl_limit"]}, "only_main_content": {"type": "mixed", "value": "{{#rag.1752565402678.firecrawl_extract_main_content#}}"}}, "datasource_configurations": {}, "type": "datasource", "title": "Firecrawl", "plugin_id": "langgenius/firecrawl_datasource", "provider_type": "website_crawl", "provider_name": "firecrawl", "datasource_name": "crawl", "datasource_label": "Firecrawl", "selected": false}, "position": {"x": 1067.7526055798794, "y": 417.32608398342404}, "targetPosition": "left", "sourcePosition": "right", "positionAbsolute": {"x": 1067.7526055798794, "y": 417.32608398342404}, "width": 242, "height": 52, "selected": false}, {"id": "1752565435219", "type": "custom", "data": {"output_type": "string", "variables": [["1752491761974", "content"], ["1752565402678", "content"]], "type": "variable-aggregator", "title": "\u53d8\u91cf\u805a\u5408\u5668", "selected": false}, "position": {"x": 1505.4306671642219, "y": 281.3910724383104}, "targetPosition": "left", "sourcePosition": "right", "positionAbsolute": {"x": 1505.4306671642219, "y": 281.3910724383104}, "width": 242, "height": 129, "selected": false}], "edges": [{"data": {"isInLoop": false, "sourceType": "tool", "targetType": "knowledge-index"}, "id": "1752490343805-source-1752477924228-target", "source": "1752490343805", "sourceHandle": "source", "target": "1752477924228", "targetHandle": "target", "type": "custom", "zIndex": 0}, {"id": "1752491761974-source-1752565435219-target", "type": "custom", "source": "1752491761974", "sourceHandle": "source", "target": "1752565435219", "targetHandle": "target", "data": {"sourceType": "datasource", "targetType": "variable-aggregator", "isInIteration": false, "isInLoop": false}, "zIndex": 0}, {"id": "1752565435219-source-1752490343805-target", "type": "custom", "source": "1752565435219", "sourceHandle": "source", "target": "1752490343805", "targetHandle": "target", "data": {"sourceType": "variable-aggregator", "targetType": "tool", "isInIteration": false, "isInLoop": false}, "zIndex": 0}, {"id": "1752565402678-source-1752565435219-target", "type": "custom", "source": "1752565402678", "target": "1752565435219", "sourceHandle": "source", "targetHandle": "target", "data": {"sourceType": "datasource", "targetType": "variable-aggregator", "isInLoop": false}, "zIndex": 0}], "viewport": {"x": -826.1791044466438, "y": -71.91725474841303, "zoom": 0.9980166672552107}} \ No newline at end of file diff --git a/api/services/rag_pipeline/transform/website-crawl-general-economy.yml b/api/services/rag_pipeline/transform/website-crawl-general-economy.yml index acdc0fe52a..0181f6a6d5 100644 --- a/api/services/rag_pipeline/transform/website-crawl-general-economy.yml +++ b/api/services/rag_pipeline/transform/website-crawl-general-economy.yml @@ -1,8 +1,16 @@ dependencies: - current_identifier: null - type: package + type: marketplace value: - plugin_unique_identifier: langgenius/general_chunk:0.0.2@0856fa64f7b0dc937c982f12d45b3a1ad91ba8aacc0d28a1b436e6c94a77e298 + plugin_unique_identifier: langgenius/general_chunker:0.0.1@e3da408b7277866404c3f884d599261f9d0b9003ea4ef7eb3b64489bdf39d18b +- current_identifier: null + type: marketplace + value: + plugin_unique_identifier: langgenius/firecrawl_datasource:0.0.1@f7aed0a26df0e5f4b9555371b5c9fa6db3c7dcf6a46dd1583245697bd90a539a +- current_identifier: null + type: marketplace + value: + plugin_unique_identifier: langgenius/jina_datasource:0.0.1@cf23afb2c3eeccc5a187763a1947f583f0bb10aa56461e512ac4141bf930d608 kind: rag_pipeline rag_pipeline: description: '' diff --git a/api/services/rag_pipeline/transform/website-crawl-general-high-quality.yml b/api/services/rag_pipeline/transform/website-crawl-general-high-quality.yml index 35e6fa5e8f..045098bcbf 100644 --- a/api/services/rag_pipeline/transform/website-crawl-general-high-quality.yml +++ b/api/services/rag_pipeline/transform/website-crawl-general-high-quality.yml @@ -1,8 +1,16 @@ dependencies: - current_identifier: null - type: package + type: marketplace value: - plugin_unique_identifier: langgenius/general_chunk:0.0.2@0856fa64f7b0dc937c982f12d45b3a1ad91ba8aacc0d28a1b436e6c94a77e298 + plugin_unique_identifier: langgenius/general_chunker:0.0.1@e3da408b7277866404c3f884d599261f9d0b9003ea4ef7eb3b64489bdf39d18b +- current_identifier: null + type: marketplace + value: + plugin_unique_identifier: langgenius/firecrawl_datasource:0.0.1@f7aed0a26df0e5f4b9555371b5c9fa6db3c7dcf6a46dd1583245697bd90a539a +- current_identifier: null + type: marketplace + value: + plugin_unique_identifier: langgenius/jina_datasource:0.0.1@cf23afb2c3eeccc5a187763a1947f583f0bb10aa56461e512ac4141bf930d608 kind: rag_pipeline rag_pipeline: description: '' diff --git a/api/services/rag_pipeline/transform/website-crawl-parentchild.yml b/api/services/rag_pipeline/transform/website-crawl-parentchild.yml index 521bb67c38..92bc953f3f 100644 --- a/api/services/rag_pipeline/transform/website-crawl-parentchild.yml +++ b/api/services/rag_pipeline/transform/website-crawl-parentchild.yml @@ -1,8 +1,16 @@ dependencies: - current_identifier: null - type: package + type: marketplace value: - plugin_unique_identifier: langgenius/parent_child_chunk:0.0.1@f8f9ba1f3bcda159ebc0168baa755c2181b923da8157ebb439b8046019f5b510 + plugin_unique_identifier: langgenius/parentchild_chunker:0.0.1@b1a28a27e33fec442ce494da2a7814edd7eb9d646c81f38bccfcf1133d486e40 +- current_identifier: null + type: marketplace + value: + plugin_unique_identifier: langgenius/firecrawl_datasource:0.0.1@f7aed0a26df0e5f4b9555371b5c9fa6db3c7dcf6a46dd1583245697bd90a539a +- current_identifier: null + type: marketplace + value: + plugin_unique_identifier: langgenius/jina_datasource:0.0.1@cf23afb2c3eeccc5a187763a1947f583f0bb10aa56461e512ac4141bf930d608 kind: rag_pipeline rag_pipeline: description: ''