From 271da87c847d4f819dcd64772ff4a5b9a89e94cf Mon Sep 17 00:00:00 2001 From: jyong <718720800@qq.com> Date: Tue, 16 Sep 2025 16:27:19 +0800 Subject: [PATCH 1/8] dev/reformat --- api/models/dataset.py | 2 +- .../priority_rag_pipeline_run_task.py | 44 +++++++++---------- 2 files changed, 23 insertions(+), 23 deletions(-) diff --git a/api/models/dataset.py b/api/models/dataset.py index 248c436dfa..89a736745f 100644 --- a/api/models/dataset.py +++ b/api/models/dataset.py @@ -10,7 +10,7 @@ import re import time from datetime import datetime from json import JSONDecodeError -from typing import Any, cast +from typing import Any, cast, Optional import sqlalchemy as sa from sqlalchemy import DateTime, String, func, select diff --git a/api/tasks/rag_pipeline/priority_rag_pipeline_run_task.py b/api/tasks/rag_pipeline/priority_rag_pipeline_run_task.py index 7021ddab38..b810507387 100644 --- a/api/tasks/rag_pipeline/priority_rag_pipeline_run_task.py +++ b/api/tasks/rag_pipeline/priority_rag_pipeline_run_task.py @@ -103,7 +103,7 @@ def run_single_rag_pipeline_task(rag_pipeline_invoke_entity: Mapping[str, Any], workflow_thread_pool_id = rag_pipeline_invoke_entity_model.workflow_thread_pool_id application_generate_entity = rag_pipeline_invoke_entity_model.application_generate_entity - with Session(db.engine) as session: + with Session(db.engine, expire_on_commit=False) as session: # Load required entities account = session.query(Account).filter(Account.id == user_id).first() if not account: @@ -144,30 +144,30 @@ def run_single_rag_pipeline_task(rag_pipeline_invoke_entity: Mapping[str, Any], triggered_from=WorkflowNodeExecutionTriggeredFrom.RAG_PIPELINE_RUN, ) - # Set the user directly in g for preserve_flask_contexts - g._login_user = account + # Set the user directly in g for preserve_flask_contexts + g._login_user = account - # Copy context for passing to pipeline generator - context = contextvars.copy_context() + # Copy context for passing to pipeline generator + context = contextvars.copy_context() - # Direct execution without creating another thread - # Since we're already in a thread pool, no need for nested threading - from core.app.apps.pipeline.pipeline_generator import PipelineGenerator + # Direct execution without creating another thread + # Since we're already in a thread pool, no need for nested threading + from core.app.apps.pipeline.pipeline_generator import PipelineGenerator - pipeline_generator = PipelineGenerator() - pipeline_generator._generate( - flask_app=flask_app, - context=context, - pipeline=pipeline, - workflow_id=workflow_id, - user=account, - application_generate_entity=entity, - invoke_from=InvokeFrom.PUBLISHED, - workflow_execution_repository=workflow_execution_repository, - workflow_node_execution_repository=workflow_node_execution_repository, - streaming=streaming, - workflow_thread_pool_id=workflow_thread_pool_id, - ) + pipeline_generator = PipelineGenerator() + pipeline_generator._generate( + flask_app=flask_app, + context=context, + pipeline=pipeline, + workflow_id=workflow_id, + user=account, + application_generate_entity=entity, + invoke_from=InvokeFrom.PUBLISHED, + workflow_execution_repository=workflow_execution_repository, + workflow_node_execution_repository=workflow_node_execution_repository, + streaming=streaming, + workflow_thread_pool_id=workflow_thread_pool_id, + ) except Exception: logging.exception("Error in priority pipeline task") raise From 1c6e57d3dff35bfd1556a8a9d6b2d134f2bb61fd Mon Sep 17 00:00:00 2001 From: jyong <718720800@qq.com> Date: Tue, 16 Sep 2025 16:41:50 +0800 Subject: [PATCH 2/8] dev/reformat --- api/controllers/console/datasets/datasets.py | 2 +- api/core/app/app_config/entities.py | 2 +- api/core/rag/extractor/entity/extract_setting.py | 2 ++ api/core/rag/index_processor/index_processor_base.py | 4 ++-- api/models/workflow.py | 2 +- api/services/dataset_service.py | 2 +- 6 files changed, 8 insertions(+), 6 deletions(-) diff --git a/api/controllers/console/datasets/datasets.py b/api/controllers/console/datasets/datasets.py index 3a13bb6b67..a1ae941d4b 100644 --- a/api/controllers/console/datasets/datasets.py +++ b/api/controllers/console/datasets/datasets.py @@ -741,7 +741,7 @@ class DatasetApiDeleteApi(Resource): return {"result": "success"}, 204 -@console_ns.route("/datasets//api-keys/") +@console_ns.route("/datasets//api-keys/") class DatasetEnableApiApi(Resource): @setup_required @login_required diff --git a/api/core/app/app_config/entities.py b/api/core/app/app_config/entities.py index 2ad81fe005..3dbc8706d3 100644 --- a/api/core/app/app_config/entities.py +++ b/api/core/app/app_config/entities.py @@ -1,6 +1,6 @@ from collections.abc import Sequence from enum import StrEnum, auto -from typing import Any, Literal +from typing import Any, Literal, Optional from pydantic import BaseModel, Field, field_validator diff --git a/api/core/rag/extractor/entity/extract_setting.py b/api/core/rag/extractor/entity/extract_setting.py index 0a57c792f1..c0e79b02c4 100644 --- a/api/core/rag/extractor/entity/extract_setting.py +++ b/api/core/rag/extractor/entity/extract_setting.py @@ -1,3 +1,5 @@ +from typing import Optional + from pydantic import BaseModel, ConfigDict from models.dataset import Document diff --git a/api/core/rag/index_processor/index_processor_base.py b/api/core/rag/index_processor/index_processor_base.py index b3fc4ac221..05cffb5a55 100644 --- a/api/core/rag/index_processor/index_processor_base.py +++ b/api/core/rag/index_processor/index_processor_base.py @@ -2,7 +2,7 @@ from abc import ABC, abstractmethod from collections.abc import Mapping -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING, Any, Optional from configs import dify_config from core.rag.extractor.entity.extract_setting import ExtractSetting @@ -64,7 +64,7 @@ class BaseIndexProcessor(ABC): max_tokens: int, chunk_overlap: int, separator: str, - embedding_model_instance: ModelInstance | None, + embedding_model_instance: Optional["ModelInstance"], ) -> TextSplitter: """ Get the NodeParser object according to the processing rule. diff --git a/api/models/workflow.py b/api/models/workflow.py index a25d65669a..97e1790e19 100644 --- a/api/models/workflow.py +++ b/api/models/workflow.py @@ -3,7 +3,7 @@ import logging from collections.abc import Mapping, Sequence from datetime import datetime from enum import StrEnum -from typing import TYPE_CHECKING, Any, Union, cast +from typing import TYPE_CHECKING, Any, Union, cast, Optional from uuid import uuid4 import sqlalchemy as sa diff --git a/api/services/dataset_service.py b/api/services/dataset_service.py index ed2301e172..400b00ef83 100644 --- a/api/services/dataset_service.py +++ b/api/services/dataset_service.py @@ -7,7 +7,7 @@ import time import uuid from collections import Counter from collections.abc import Sequence -from typing import Any, Literal +from typing import Any, Literal, Optional import sqlalchemy as sa from sqlalchemy import exists, func, select From 7b9326a411b95218bcf7f7dcfc4390d1da0cfa7d Mon Sep 17 00:00:00 2001 From: jyong <718720800@qq.com> Date: Tue, 16 Sep 2025 17:08:39 +0800 Subject: [PATCH 3/8] dev/reformat --- api/controllers/console/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/api/controllers/console/__init__.py b/api/controllers/console/__init__.py index 4e54fa9220..4339501f73 100644 --- a/api/controllers/console/__init__.py +++ b/api/controllers/console/__init__.py @@ -61,6 +61,7 @@ from . import ( init_validate, ping, setup, + spec, version, ) From 98db7d365c4d7fff6ba5057cd9538e324b5d1e1b Mon Sep 17 00:00:00 2001 From: jyong <718720800@qq.com> Date: Tue, 16 Sep 2025 17:33:56 +0800 Subject: [PATCH 4/8] dev/reformat --- .../rag_pipeline/rag_pipeline_workflow.py | 2 +- .../rag_pipeline/rag_pipeline_workflow.py | 2 +- .../knowledge_index/knowledge_index_node.py | 2 +- .../rag_pipeline/pipeline_generate_service.py | 18 ++++++++++++++++- api/services/rag_pipeline/rag_pipeline.py | 20 ++++++------------- api/tasks/retry_document_indexing_task.py | 1 - 6 files changed, 26 insertions(+), 19 deletions(-) diff --git a/api/controllers/console/datasets/rag_pipeline/rag_pipeline_workflow.py b/api/controllers/console/datasets/rag_pipeline/rag_pipeline_workflow.py index d00be3a573..898ed416c0 100644 --- a/api/controllers/console/datasets/rag_pipeline/rag_pipeline_workflow.py +++ b/api/controllers/console/datasets/rag_pipeline/rag_pipeline_workflow.py @@ -39,7 +39,7 @@ from libs import helper from libs.helper import TimestampField, uuid_value from libs.login import current_user, login_required from models.account import Account -from models.dataset import Pipeline +from models.dataset import Document, Pipeline from models.model import EndUser from services.errors.app import WorkflowHashNotEqualError from services.errors.llm import InvokeRateLimitError diff --git a/api/controllers/service_api/dataset/rag_pipeline/rag_pipeline_workflow.py b/api/controllers/service_api/dataset/rag_pipeline/rag_pipeline_workflow.py index 55bfdde009..c386e8f41e 100644 --- a/api/controllers/service_api/dataset/rag_pipeline/rag_pipeline_workflow.py +++ b/api/controllers/service_api/dataset/rag_pipeline/rag_pipeline_workflow.py @@ -67,7 +67,7 @@ class DatasourceNodeRunApi(DatasetApiResource): """Resource for datasource node run.""" @service_api_ns.doc(shortcut="pipeline_datasource_node_run") - @service_api_ns.doc(description="Run a datasource node for a rag pipeline") + @service_api_ns.doc(description="Run a atasource node for a rag pipeline") @service_api_ns.doc( path={ "dataset_id": "Dataset ID", diff --git a/api/core/workflow/nodes/knowledge_index/knowledge_index_node.py b/api/core/workflow/nodes/knowledge_index/knowledge_index_node.py index d970d7480c..d7641bc123 100644 --- a/api/core/workflow/nodes/knowledge_index/knowledge_index_node.py +++ b/api/core/workflow/nodes/knowledge_index/knowledge_index_node.py @@ -141,7 +141,7 @@ class KnowledgeIndexNode(Node): index_processor = IndexProcessorFactory(dataset.chunk_structure).init_index_processor() if original_document_id: segments = db.session.scalars( - select(DocumentSegment).where(DocumentSegment.document_id == document_id) + select(DocumentSegment).where(DocumentSegment.document_id == original_document_id.value) ).all() if segments: index_node_ids = [segment.index_node_id for segment in segments] diff --git a/api/services/rag_pipeline/pipeline_generate_service.py b/api/services/rag_pipeline/pipeline_generate_service.py index da67801877..796d3ee3ae 100644 --- a/api/services/rag_pipeline/pipeline_generate_service.py +++ b/api/services/rag_pipeline/pipeline_generate_service.py @@ -4,7 +4,8 @@ from typing import Any, Union from configs import dify_config from core.app.apps.pipeline.pipeline_generator import PipelineGenerator from core.app.entities.app_invoke_entities import InvokeFrom -from models.dataset import Pipeline +from extensions.ext_database import db +from models.dataset import Document, Pipeline from models.model import Account, App, EndUser from models.workflow import Workflow from services.rag_pipeline.rag_pipeline import RagPipelineService @@ -31,6 +32,9 @@ class PipelineGenerateService: """ try: workflow = cls._get_workflow(pipeline, invoke_from) + if args.get("original_document_id"): + # update document status to waiting + cls.update_document_status(args.get("original_document_id", "")) return PipelineGenerator.convert_to_event_stream( PipelineGenerator().generate( pipeline=pipeline, @@ -97,3 +101,15 @@ class PipelineGenerateService: raise ValueError("Workflow not published") return workflow + + @classmethod + def update_document_status(cls, document_id: str): + """ + Update document status to waiting + :param document_id: document id + """ + document = db.session.query(Document).filter(Document.id == document_id).first() + if document: + document.indexing_status = "waiting" + db.session.add(document) + db.session.commit() \ No newline at end of file diff --git a/api/services/rag_pipeline/rag_pipeline.py b/api/services/rag_pipeline/rag_pipeline.py index 4f97e0f9bc..2da61c828c 100644 --- a/api/services/rag_pipeline/rag_pipeline.py +++ b/api/services/rag_pipeline/rag_pipeline.py @@ -7,6 +7,7 @@ from collections.abc import Callable, Generator, Mapping, Sequence from datetime import UTC, datetime from typing import Any, Optional, Union, cast from uuid import uuid4 +import uuid from flask_login import current_user from sqlalchemy import func, or_, select @@ -14,6 +15,7 @@ from sqlalchemy.orm import Session, sessionmaker import contexts from configs import dify_config +from core.app.apps.pipeline.pipeline_config_manager import PipelineConfigManager from core.app.apps.pipeline.pipeline_generator import PipelineGenerator from core.app.entities.app_invoke_entities import InvokeFrom from core.datasource.entities.datasource_entities import ( @@ -55,14 +57,7 @@ from core.workflow.workflow_entry import WorkflowEntry from extensions.ext_database import db from libs.infinite_scroll_pagination import InfiniteScrollPagination from models.account import Account -from models.dataset import ( # type: ignore - Dataset, - Document, - DocumentPipelineExecutionLog, - Pipeline, - PipelineCustomizedTemplate, - PipelineRecommendedPlugin, -) +from models.dataset import Dataset, Document, DocumentPipelineExecutionLog, Pipeline, PipelineCustomizedTemplate, PipelineRecommendedPlugin # type: ignore from models.enums import WorkflowRunTriggeredFrom from models.model import EndUser from models.workflow import ( @@ -1325,11 +1320,8 @@ class RagPipelineService: """ Retry error document """ - document_pipeline_excution_log = ( - db.session.query(DocumentPipelineExecutionLog) - .filter(DocumentPipelineExecutionLog.document_id == document.id) - .first() - ) + document_pipeline_excution_log = db.session.query(DocumentPipelineExecutionLog).filter( + DocumentPipelineExecutionLog.document_id == document.id).first() if not document_pipeline_excution_log: raise ValueError("Document pipeline execution log not found") pipeline = db.session.query(Pipeline).filter(Pipeline.id == document_pipeline_excution_log.pipeline_id).first() @@ -1348,11 +1340,11 @@ class RagPipelineService: "start_node_id": document_pipeline_excution_log.datasource_node_id, "datasource_type": document_pipeline_excution_log.datasource_type, "datasource_info_list": [json.loads(document_pipeline_excution_log.datasource_info)], + "original_document_id": document.id, }, invoke_from=InvokeFrom.PUBLISHED, streaming=False, call_depth=0, workflow_thread_pool_id=None, is_retry=True, - documents=[document], ) diff --git a/api/tasks/retry_document_indexing_task.py b/api/tasks/retry_document_indexing_task.py index ff7848eea6..f4e9b52778 100644 --- a/api/tasks/retry_document_indexing_task.py +++ b/api/tasks/retry_document_indexing_task.py @@ -29,7 +29,6 @@ def retry_document_indexing_task(dataset_id: str, document_ids: list[str], user_ Usage: retry_document_indexing_task.delay(dataset_id, document_ids, user_id) """ start_at = time.perf_counter() - print("sadaddadadaaaadadadadsdsadasdadasdasda") try: dataset = db.session.query(Dataset).where(Dataset.id == dataset_id).first() if not dataset: From 9ed6679966e2aef5bf826bb4257b8dfcbf197fab Mon Sep 17 00:00:00 2001 From: jyong <718720800@qq.com> Date: Tue, 16 Sep 2025 17:34:22 +0800 Subject: [PATCH 5/8] dev/reformat --- .../rag_pipeline/rag_pipeline_workflow.py | 2 +- api/models/dataset.py | 2 +- api/models/workflow.py | 2 +- .../rag_pipeline/pipeline_generate_service.py | 2 +- api/services/rag_pipeline/rag_pipeline.py | 18 +++++++++++++----- 5 files changed, 17 insertions(+), 9 deletions(-) diff --git a/api/controllers/console/datasets/rag_pipeline/rag_pipeline_workflow.py b/api/controllers/console/datasets/rag_pipeline/rag_pipeline_workflow.py index 898ed416c0..d00be3a573 100644 --- a/api/controllers/console/datasets/rag_pipeline/rag_pipeline_workflow.py +++ b/api/controllers/console/datasets/rag_pipeline/rag_pipeline_workflow.py @@ -39,7 +39,7 @@ from libs import helper from libs.helper import TimestampField, uuid_value from libs.login import current_user, login_required from models.account import Account -from models.dataset import Document, Pipeline +from models.dataset import Pipeline from models.model import EndUser from services.errors.app import WorkflowHashNotEqualError from services.errors.llm import InvokeRateLimitError diff --git a/api/models/dataset.py b/api/models/dataset.py index 89a736745f..2c03a0c30c 100644 --- a/api/models/dataset.py +++ b/api/models/dataset.py @@ -10,7 +10,7 @@ import re import time from datetime import datetime from json import JSONDecodeError -from typing import Any, cast, Optional +from typing import Any, Optional, cast import sqlalchemy as sa from sqlalchemy import DateTime, String, func, select diff --git a/api/models/workflow.py b/api/models/workflow.py index 97e1790e19..bb7ea2c074 100644 --- a/api/models/workflow.py +++ b/api/models/workflow.py @@ -3,7 +3,7 @@ import logging from collections.abc import Mapping, Sequence from datetime import datetime from enum import StrEnum -from typing import TYPE_CHECKING, Any, Union, cast, Optional +from typing import TYPE_CHECKING, Any, Optional, Union, cast from uuid import uuid4 import sqlalchemy as sa diff --git a/api/services/rag_pipeline/pipeline_generate_service.py b/api/services/rag_pipeline/pipeline_generate_service.py index 796d3ee3ae..51e03d11c7 100644 --- a/api/services/rag_pipeline/pipeline_generate_service.py +++ b/api/services/rag_pipeline/pipeline_generate_service.py @@ -112,4 +112,4 @@ class PipelineGenerateService: if document: document.indexing_status = "waiting" db.session.add(document) - db.session.commit() \ No newline at end of file + db.session.commit() diff --git a/api/services/rag_pipeline/rag_pipeline.py b/api/services/rag_pipeline/rag_pipeline.py index 2da61c828c..99c192d709 100644 --- a/api/services/rag_pipeline/rag_pipeline.py +++ b/api/services/rag_pipeline/rag_pipeline.py @@ -7,7 +7,6 @@ from collections.abc import Callable, Generator, Mapping, Sequence from datetime import UTC, datetime from typing import Any, Optional, Union, cast from uuid import uuid4 -import uuid from flask_login import current_user from sqlalchemy import func, or_, select @@ -15,7 +14,6 @@ from sqlalchemy.orm import Session, sessionmaker import contexts from configs import dify_config -from core.app.apps.pipeline.pipeline_config_manager import PipelineConfigManager from core.app.apps.pipeline.pipeline_generator import PipelineGenerator from core.app.entities.app_invoke_entities import InvokeFrom from core.datasource.entities.datasource_entities import ( @@ -57,7 +55,14 @@ from core.workflow.workflow_entry import WorkflowEntry from extensions.ext_database import db from libs.infinite_scroll_pagination import InfiniteScrollPagination from models.account import Account -from models.dataset import Dataset, Document, DocumentPipelineExecutionLog, Pipeline, PipelineCustomizedTemplate, PipelineRecommendedPlugin # type: ignore +from models.dataset import ( # type: ignore + Dataset, + Document, + DocumentPipelineExecutionLog, + Pipeline, + PipelineCustomizedTemplate, + PipelineRecommendedPlugin, +) from models.enums import WorkflowRunTriggeredFrom from models.model import EndUser from models.workflow import ( @@ -1320,8 +1325,11 @@ class RagPipelineService: """ Retry error document """ - document_pipeline_excution_log = db.session.query(DocumentPipelineExecutionLog).filter( - DocumentPipelineExecutionLog.document_id == document.id).first() + document_pipeline_excution_log = ( + db.session.query(DocumentPipelineExecutionLog) + .filter(DocumentPipelineExecutionLog.document_id == document.id) + .first() + ) if not document_pipeline_excution_log: raise ValueError("Document pipeline execution log not found") pipeline = db.session.query(Pipeline).filter(Pipeline.id == document_pipeline_excution_log.pipeline_id).first() From 6be5772a5174d1ad911238570519897f3ad2d022 Mon Sep 17 00:00:00 2001 From: jyong <718720800@qq.com> Date: Tue, 16 Sep 2025 17:45:21 +0800 Subject: [PATCH 6/8] dev/reformat --- .../transform/file-general-economy.yml | 20 +++++++++---------- .../transform/file-general-high-quality.yml | 20 +++++++++---------- .../transform/file-parentchild.yml | 20 +++++++++---------- 3 files changed, 30 insertions(+), 30 deletions(-) diff --git a/api/services/rag_pipeline/transform/file-general-economy.yml b/api/services/rag_pipeline/transform/file-general-economy.yml index 147920a878..daad0ea166 100644 --- a/api/services/rag_pipeline/transform/file-general-economy.yml +++ b/api/services/rag_pipeline/transform/file-general-economy.yml @@ -282,7 +282,7 @@ workflow: conditions: - comparison_operator: is id: 9da88d93-3ff6-463f-abfd-6bcafbf2554d - value: xlsx + value: .xlsx varType: file variable_selector: - '1752479895761' @@ -290,7 +290,7 @@ workflow: - extension - comparison_operator: is id: d0e88f5e-dfe3-4bae-af0c-dbec267500de - value: xls + value: .xls varType: file variable_selector: - '1752479895761' @@ -298,7 +298,7 @@ workflow: - extension - comparison_operator: is id: a957e91e-1ed7-4c6b-9c80-2f0948858f1d - value: md + value: .md varType: file variable_selector: - '1752479895761' @@ -306,7 +306,7 @@ workflow: - extension - comparison_operator: is id: 870c3c39-8d3f-474a-ab8b-9c0ccf53db73 - value: markdown + value: .markdown varType: file variable_selector: - '1752479895761' @@ -314,7 +314,7 @@ workflow: - extension - comparison_operator: is id: f9541513-1e71-4dc1-9db5-35dc84a39e3c - value: mdx + value: .mdx varType: file variable_selector: - '1752479895761' @@ -322,7 +322,7 @@ workflow: - extension - comparison_operator: is id: 4c7f455b-ac20-40ca-9495-6cc44ffcb35d - value: html + value: .html varType: file variable_selector: - '1752479895761' @@ -330,7 +330,7 @@ workflow: - extension - comparison_operator: is id: 2e12d9c7-8057-4a09-8851-f9fd1d0718d1 - value: htm + value: .htm varType: file variable_selector: - '1752479895761' @@ -338,7 +338,7 @@ workflow: - extension - comparison_operator: is id: 73a995a9-d8b9-4aef-89f7-306e2ddcbce2 - value: docx + value: .docx varType: file variable_selector: - '1752479895761' @@ -346,7 +346,7 @@ workflow: - extension - comparison_operator: is id: 8a2e8772-0426-458b-a1f9-9eaaec0f27c8 - value: csv + value: .csv varType: file variable_selector: - '1752479895761' @@ -354,7 +354,7 @@ workflow: - extension - comparison_operator: is id: aa2cb6b6-a2fc-462a-a9f5-c9c3f33a1602 - value: txt + value: .txt varType: file variable_selector: - '1752479895761' diff --git a/api/services/rag_pipeline/transform/file-general-high-quality.yml b/api/services/rag_pipeline/transform/file-general-high-quality.yml index 5d0e7817a0..fd414741d2 100644 --- a/api/services/rag_pipeline/transform/file-general-high-quality.yml +++ b/api/services/rag_pipeline/transform/file-general-high-quality.yml @@ -282,7 +282,7 @@ workflow: conditions: - comparison_operator: is id: 9da88d93-3ff6-463f-abfd-6bcafbf2554d - value: xlsx + value: .xlsx varType: file variable_selector: - '1752479895761' @@ -290,7 +290,7 @@ workflow: - extension - comparison_operator: is id: d0e88f5e-dfe3-4bae-af0c-dbec267500de - value: xls + value: .xls varType: file variable_selector: - '1752479895761' @@ -298,7 +298,7 @@ workflow: - extension - comparison_operator: is id: a957e91e-1ed7-4c6b-9c80-2f0948858f1d - value: md + value: .md varType: file variable_selector: - '1752479895761' @@ -306,7 +306,7 @@ workflow: - extension - comparison_operator: is id: 870c3c39-8d3f-474a-ab8b-9c0ccf53db73 - value: markdown + value: .markdown varType: file variable_selector: - '1752479895761' @@ -314,7 +314,7 @@ workflow: - extension - comparison_operator: is id: f9541513-1e71-4dc1-9db5-35dc84a39e3c - value: mdx + value: .mdx varType: file variable_selector: - '1752479895761' @@ -322,7 +322,7 @@ workflow: - extension - comparison_operator: is id: 4c7f455b-ac20-40ca-9495-6cc44ffcb35d - value: html + value: .html varType: file variable_selector: - '1752479895761' @@ -330,7 +330,7 @@ workflow: - extension - comparison_operator: is id: 2e12d9c7-8057-4a09-8851-f9fd1d0718d1 - value: htm + value: .htm varType: file variable_selector: - '1752479895761' @@ -338,7 +338,7 @@ workflow: - extension - comparison_operator: is id: 73a995a9-d8b9-4aef-89f7-306e2ddcbce2 - value: docx + value: .docx varType: file variable_selector: - '1752479895761' @@ -346,7 +346,7 @@ workflow: - extension - comparison_operator: is id: 8a2e8772-0426-458b-a1f9-9eaaec0f27c8 - value: csv + value: .csv varType: file variable_selector: - '1752479895761' @@ -354,7 +354,7 @@ workflow: - extension - comparison_operator: is id: aa2cb6b6-a2fc-462a-a9f5-c9c3f33a1602 - value: txt + value: .txt varType: file variable_selector: - '1752479895761' diff --git a/api/services/rag_pipeline/transform/file-parentchild.yml b/api/services/rag_pipeline/transform/file-parentchild.yml index 97c108d221..af945e7d36 100644 --- a/api/services/rag_pipeline/transform/file-parentchild.yml +++ b/api/services/rag_pipeline/transform/file-parentchild.yml @@ -281,7 +281,7 @@ workflow: conditions: - comparison_operator: is id: 9da88d93-3ff6-463f-abfd-6bcafbf2554d - value: xlsx + value: .xlsx varType: file variable_selector: - '1752479895761' @@ -289,7 +289,7 @@ workflow: - extension - comparison_operator: is id: d0e88f5e-dfe3-4bae-af0c-dbec267500de - value: xls + value: .xls varType: file variable_selector: - '1752479895761' @@ -297,7 +297,7 @@ workflow: - extension - comparison_operator: is id: a957e91e-1ed7-4c6b-9c80-2f0948858f1d - value: md + value: .md varType: file variable_selector: - '1752479895761' @@ -305,7 +305,7 @@ workflow: - extension - comparison_operator: is id: 870c3c39-8d3f-474a-ab8b-9c0ccf53db73 - value: markdown + value: .markdown varType: file variable_selector: - '1752479895761' @@ -313,7 +313,7 @@ workflow: - extension - comparison_operator: is id: f9541513-1e71-4dc1-9db5-35dc84a39e3c - value: mdx + value: .mdx varType: file variable_selector: - '1752479895761' @@ -321,7 +321,7 @@ workflow: - extension - comparison_operator: is id: 4c7f455b-ac20-40ca-9495-6cc44ffcb35d - value: html + value: .html varType: file variable_selector: - '1752479895761' @@ -329,7 +329,7 @@ workflow: - extension - comparison_operator: is id: 2e12d9c7-8057-4a09-8851-f9fd1d0718d1 - value: htm + value: .htm varType: file variable_selector: - '1752479895761' @@ -337,7 +337,7 @@ workflow: - extension - comparison_operator: is id: 73a995a9-d8b9-4aef-89f7-306e2ddcbce2 - value: docx + value: .docx varType: file variable_selector: - '1752479895761' @@ -345,7 +345,7 @@ workflow: - extension - comparison_operator: is id: 8a2e8772-0426-458b-a1f9-9eaaec0f27c8 - value: csv + value: .csv varType: file variable_selector: - '1752479895761' @@ -353,7 +353,7 @@ workflow: - extension - comparison_operator: is id: aa2cb6b6-a2fc-462a-a9f5-c9c3f33a1602 - value: txt + value: .txt varType: file variable_selector: - '1752479895761' From 5133623d4245aa48526804b06bdab79fee91f67d Mon Sep 17 00:00:00 2001 From: jyong <718720800@qq.com> Date: Tue, 16 Sep 2025 19:06:42 +0800 Subject: [PATCH 7/8] dev/reformat --- api/core/app/apps/pipeline/pipeline_generator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/core/app/apps/pipeline/pipeline_generator.py b/api/core/app/apps/pipeline/pipeline_generator.py index 574d9c71bf..9be8ec82ce 100644 --- a/api/core/app/apps/pipeline/pipeline_generator.py +++ b/api/core/app/apps/pipeline/pipeline_generator.py @@ -119,7 +119,7 @@ class PipelineGenerator(BaseAppGenerator): ) -> Union[Mapping[str, Any], Generator[Mapping | str, None, None], None]: # Add null check for dataset - with Session(db.engine) as session: + with Session(db.engine, expire_on_commit=False) as session: dataset = pipeline.retrieve_dataset(session) if not dataset: raise ValueError("Pipeline dataset is required") From 8cbfaa2c03dfc891cd7effae6eca606bbcfc6fbc Mon Sep 17 00:00:00 2001 From: jyong <718720800@qq.com> Date: Tue, 16 Sep 2025 19:13:22 +0800 Subject: [PATCH 8/8] dev/reformat --- .../service_api/dataset/rag_pipeline/rag_pipeline_workflow.py | 2 +- api/services/rag_pipeline/pipeline_generate_service.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/api/controllers/service_api/dataset/rag_pipeline/rag_pipeline_workflow.py b/api/controllers/service_api/dataset/rag_pipeline/rag_pipeline_workflow.py index c386e8f41e..55bfdde009 100644 --- a/api/controllers/service_api/dataset/rag_pipeline/rag_pipeline_workflow.py +++ b/api/controllers/service_api/dataset/rag_pipeline/rag_pipeline_workflow.py @@ -67,7 +67,7 @@ class DatasourceNodeRunApi(DatasetApiResource): """Resource for datasource node run.""" @service_api_ns.doc(shortcut="pipeline_datasource_node_run") - @service_api_ns.doc(description="Run a atasource node for a rag pipeline") + @service_api_ns.doc(description="Run a datasource node for a rag pipeline") @service_api_ns.doc( path={ "dataset_id": "Dataset ID", diff --git a/api/services/rag_pipeline/pipeline_generate_service.py b/api/services/rag_pipeline/pipeline_generate_service.py index 51e03d11c7..563174c528 100644 --- a/api/services/rag_pipeline/pipeline_generate_service.py +++ b/api/services/rag_pipeline/pipeline_generate_service.py @@ -32,9 +32,9 @@ class PipelineGenerateService: """ try: workflow = cls._get_workflow(pipeline, invoke_from) - if args.get("original_document_id"): + if original_document_id := args.get("original_document_id"): # update document status to waiting - cls.update_document_status(args.get("original_document_id", "")) + cls.update_document_status(original_document_id) return PipelineGenerator.convert_to_event_stream( PipelineGenerator().generate( pipeline=pipeline,