diff --git a/api/controllers/console/__init__.py b/api/controllers/console/__init__.py index 4e54fa9220..4339501f73 100644 --- a/api/controllers/console/__init__.py +++ b/api/controllers/console/__init__.py @@ -61,6 +61,7 @@ from . import ( init_validate, ping, setup, + spec, version, ) diff --git a/api/controllers/console/datasets/datasets.py b/api/controllers/console/datasets/datasets.py index 3a13bb6b67..a1ae941d4b 100644 --- a/api/controllers/console/datasets/datasets.py +++ b/api/controllers/console/datasets/datasets.py @@ -741,7 +741,7 @@ class DatasetApiDeleteApi(Resource): return {"result": "success"}, 204 -@console_ns.route("/datasets//api-keys/") +@console_ns.route("/datasets//api-keys/") class DatasetEnableApiApi(Resource): @setup_required @login_required diff --git a/api/core/app/app_config/entities.py b/api/core/app/app_config/entities.py index 2ad81fe005..3dbc8706d3 100644 --- a/api/core/app/app_config/entities.py +++ b/api/core/app/app_config/entities.py @@ -1,6 +1,6 @@ from collections.abc import Sequence from enum import StrEnum, auto -from typing import Any, Literal +from typing import Any, Literal, Optional from pydantic import BaseModel, Field, field_validator diff --git a/api/core/app/apps/pipeline/pipeline_generator.py b/api/core/app/apps/pipeline/pipeline_generator.py index 574d9c71bf..9be8ec82ce 100644 --- a/api/core/app/apps/pipeline/pipeline_generator.py +++ b/api/core/app/apps/pipeline/pipeline_generator.py @@ -119,7 +119,7 @@ class PipelineGenerator(BaseAppGenerator): ) -> Union[Mapping[str, Any], Generator[Mapping | str, None, None], None]: # Add null check for dataset - with Session(db.engine) as session: + with Session(db.engine, expire_on_commit=False) as session: dataset = pipeline.retrieve_dataset(session) if not dataset: raise ValueError("Pipeline dataset is required") diff --git a/api/core/rag/extractor/entity/extract_setting.py b/api/core/rag/extractor/entity/extract_setting.py index 0a57c792f1..c0e79b02c4 100644 --- a/api/core/rag/extractor/entity/extract_setting.py +++ b/api/core/rag/extractor/entity/extract_setting.py @@ -1,3 +1,5 @@ +from typing import Optional + from pydantic import BaseModel, ConfigDict from models.dataset import Document diff --git a/api/core/rag/index_processor/index_processor_base.py b/api/core/rag/index_processor/index_processor_base.py index b3fc4ac221..05cffb5a55 100644 --- a/api/core/rag/index_processor/index_processor_base.py +++ b/api/core/rag/index_processor/index_processor_base.py @@ -2,7 +2,7 @@ from abc import ABC, abstractmethod from collections.abc import Mapping -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING, Any, Optional from configs import dify_config from core.rag.extractor.entity.extract_setting import ExtractSetting @@ -64,7 +64,7 @@ class BaseIndexProcessor(ABC): max_tokens: int, chunk_overlap: int, separator: str, - embedding_model_instance: ModelInstance | None, + embedding_model_instance: Optional["ModelInstance"], ) -> TextSplitter: """ Get the NodeParser object according to the processing rule. diff --git a/api/core/workflow/nodes/knowledge_index/knowledge_index_node.py b/api/core/workflow/nodes/knowledge_index/knowledge_index_node.py index d970d7480c..d7641bc123 100644 --- a/api/core/workflow/nodes/knowledge_index/knowledge_index_node.py +++ b/api/core/workflow/nodes/knowledge_index/knowledge_index_node.py @@ -141,7 +141,7 @@ class KnowledgeIndexNode(Node): index_processor = IndexProcessorFactory(dataset.chunk_structure).init_index_processor() if original_document_id: segments = db.session.scalars( - select(DocumentSegment).where(DocumentSegment.document_id == document_id) + select(DocumentSegment).where(DocumentSegment.document_id == original_document_id.value) ).all() if segments: index_node_ids = [segment.index_node_id for segment in segments] diff --git a/api/models/dataset.py b/api/models/dataset.py index 248c436dfa..2c03a0c30c 100644 --- a/api/models/dataset.py +++ b/api/models/dataset.py @@ -10,7 +10,7 @@ import re import time from datetime import datetime from json import JSONDecodeError -from typing import Any, cast +from typing import Any, Optional, cast import sqlalchemy as sa from sqlalchemy import DateTime, String, func, select diff --git a/api/models/workflow.py b/api/models/workflow.py index a25d65669a..bb7ea2c074 100644 --- a/api/models/workflow.py +++ b/api/models/workflow.py @@ -3,7 +3,7 @@ import logging from collections.abc import Mapping, Sequence from datetime import datetime from enum import StrEnum -from typing import TYPE_CHECKING, Any, Union, cast +from typing import TYPE_CHECKING, Any, Optional, Union, cast from uuid import uuid4 import sqlalchemy as sa diff --git a/api/services/dataset_service.py b/api/services/dataset_service.py index ed2301e172..400b00ef83 100644 --- a/api/services/dataset_service.py +++ b/api/services/dataset_service.py @@ -7,7 +7,7 @@ import time import uuid from collections import Counter from collections.abc import Sequence -from typing import Any, Literal +from typing import Any, Literal, Optional import sqlalchemy as sa from sqlalchemy import exists, func, select diff --git a/api/services/rag_pipeline/pipeline_generate_service.py b/api/services/rag_pipeline/pipeline_generate_service.py index da67801877..563174c528 100644 --- a/api/services/rag_pipeline/pipeline_generate_service.py +++ b/api/services/rag_pipeline/pipeline_generate_service.py @@ -4,7 +4,8 @@ from typing import Any, Union from configs import dify_config from core.app.apps.pipeline.pipeline_generator import PipelineGenerator from core.app.entities.app_invoke_entities import InvokeFrom -from models.dataset import Pipeline +from extensions.ext_database import db +from models.dataset import Document, Pipeline from models.model import Account, App, EndUser from models.workflow import Workflow from services.rag_pipeline.rag_pipeline import RagPipelineService @@ -31,6 +32,9 @@ class PipelineGenerateService: """ try: workflow = cls._get_workflow(pipeline, invoke_from) + if original_document_id := args.get("original_document_id"): + # update document status to waiting + cls.update_document_status(original_document_id) return PipelineGenerator.convert_to_event_stream( PipelineGenerator().generate( pipeline=pipeline, @@ -97,3 +101,15 @@ class PipelineGenerateService: raise ValueError("Workflow not published") return workflow + + @classmethod + def update_document_status(cls, document_id: str): + """ + Update document status to waiting + :param document_id: document id + """ + document = db.session.query(Document).filter(Document.id == document_id).first() + if document: + document.indexing_status = "waiting" + db.session.add(document) + db.session.commit() diff --git a/api/services/rag_pipeline/rag_pipeline.py b/api/services/rag_pipeline/rag_pipeline.py index 4f97e0f9bc..99c192d709 100644 --- a/api/services/rag_pipeline/rag_pipeline.py +++ b/api/services/rag_pipeline/rag_pipeline.py @@ -1348,11 +1348,11 @@ class RagPipelineService: "start_node_id": document_pipeline_excution_log.datasource_node_id, "datasource_type": document_pipeline_excution_log.datasource_type, "datasource_info_list": [json.loads(document_pipeline_excution_log.datasource_info)], + "original_document_id": document.id, }, invoke_from=InvokeFrom.PUBLISHED, streaming=False, call_depth=0, workflow_thread_pool_id=None, is_retry=True, - documents=[document], ) diff --git a/api/services/rag_pipeline/transform/file-general-economy.yml b/api/services/rag_pipeline/transform/file-general-economy.yml index 147920a878..daad0ea166 100644 --- a/api/services/rag_pipeline/transform/file-general-economy.yml +++ b/api/services/rag_pipeline/transform/file-general-economy.yml @@ -282,7 +282,7 @@ workflow: conditions: - comparison_operator: is id: 9da88d93-3ff6-463f-abfd-6bcafbf2554d - value: xlsx + value: .xlsx varType: file variable_selector: - '1752479895761' @@ -290,7 +290,7 @@ workflow: - extension - comparison_operator: is id: d0e88f5e-dfe3-4bae-af0c-dbec267500de - value: xls + value: .xls varType: file variable_selector: - '1752479895761' @@ -298,7 +298,7 @@ workflow: - extension - comparison_operator: is id: a957e91e-1ed7-4c6b-9c80-2f0948858f1d - value: md + value: .md varType: file variable_selector: - '1752479895761' @@ -306,7 +306,7 @@ workflow: - extension - comparison_operator: is id: 870c3c39-8d3f-474a-ab8b-9c0ccf53db73 - value: markdown + value: .markdown varType: file variable_selector: - '1752479895761' @@ -314,7 +314,7 @@ workflow: - extension - comparison_operator: is id: f9541513-1e71-4dc1-9db5-35dc84a39e3c - value: mdx + value: .mdx varType: file variable_selector: - '1752479895761' @@ -322,7 +322,7 @@ workflow: - extension - comparison_operator: is id: 4c7f455b-ac20-40ca-9495-6cc44ffcb35d - value: html + value: .html varType: file variable_selector: - '1752479895761' @@ -330,7 +330,7 @@ workflow: - extension - comparison_operator: is id: 2e12d9c7-8057-4a09-8851-f9fd1d0718d1 - value: htm + value: .htm varType: file variable_selector: - '1752479895761' @@ -338,7 +338,7 @@ workflow: - extension - comparison_operator: is id: 73a995a9-d8b9-4aef-89f7-306e2ddcbce2 - value: docx + value: .docx varType: file variable_selector: - '1752479895761' @@ -346,7 +346,7 @@ workflow: - extension - comparison_operator: is id: 8a2e8772-0426-458b-a1f9-9eaaec0f27c8 - value: csv + value: .csv varType: file variable_selector: - '1752479895761' @@ -354,7 +354,7 @@ workflow: - extension - comparison_operator: is id: aa2cb6b6-a2fc-462a-a9f5-c9c3f33a1602 - value: txt + value: .txt varType: file variable_selector: - '1752479895761' diff --git a/api/services/rag_pipeline/transform/file-general-high-quality.yml b/api/services/rag_pipeline/transform/file-general-high-quality.yml index 5d0e7817a0..fd414741d2 100644 --- a/api/services/rag_pipeline/transform/file-general-high-quality.yml +++ b/api/services/rag_pipeline/transform/file-general-high-quality.yml @@ -282,7 +282,7 @@ workflow: conditions: - comparison_operator: is id: 9da88d93-3ff6-463f-abfd-6bcafbf2554d - value: xlsx + value: .xlsx varType: file variable_selector: - '1752479895761' @@ -290,7 +290,7 @@ workflow: - extension - comparison_operator: is id: d0e88f5e-dfe3-4bae-af0c-dbec267500de - value: xls + value: .xls varType: file variable_selector: - '1752479895761' @@ -298,7 +298,7 @@ workflow: - extension - comparison_operator: is id: a957e91e-1ed7-4c6b-9c80-2f0948858f1d - value: md + value: .md varType: file variable_selector: - '1752479895761' @@ -306,7 +306,7 @@ workflow: - extension - comparison_operator: is id: 870c3c39-8d3f-474a-ab8b-9c0ccf53db73 - value: markdown + value: .markdown varType: file variable_selector: - '1752479895761' @@ -314,7 +314,7 @@ workflow: - extension - comparison_operator: is id: f9541513-1e71-4dc1-9db5-35dc84a39e3c - value: mdx + value: .mdx varType: file variable_selector: - '1752479895761' @@ -322,7 +322,7 @@ workflow: - extension - comparison_operator: is id: 4c7f455b-ac20-40ca-9495-6cc44ffcb35d - value: html + value: .html varType: file variable_selector: - '1752479895761' @@ -330,7 +330,7 @@ workflow: - extension - comparison_operator: is id: 2e12d9c7-8057-4a09-8851-f9fd1d0718d1 - value: htm + value: .htm varType: file variable_selector: - '1752479895761' @@ -338,7 +338,7 @@ workflow: - extension - comparison_operator: is id: 73a995a9-d8b9-4aef-89f7-306e2ddcbce2 - value: docx + value: .docx varType: file variable_selector: - '1752479895761' @@ -346,7 +346,7 @@ workflow: - extension - comparison_operator: is id: 8a2e8772-0426-458b-a1f9-9eaaec0f27c8 - value: csv + value: .csv varType: file variable_selector: - '1752479895761' @@ -354,7 +354,7 @@ workflow: - extension - comparison_operator: is id: aa2cb6b6-a2fc-462a-a9f5-c9c3f33a1602 - value: txt + value: .txt varType: file variable_selector: - '1752479895761' diff --git a/api/services/rag_pipeline/transform/file-parentchild.yml b/api/services/rag_pipeline/transform/file-parentchild.yml index 97c108d221..af945e7d36 100644 --- a/api/services/rag_pipeline/transform/file-parentchild.yml +++ b/api/services/rag_pipeline/transform/file-parentchild.yml @@ -281,7 +281,7 @@ workflow: conditions: - comparison_operator: is id: 9da88d93-3ff6-463f-abfd-6bcafbf2554d - value: xlsx + value: .xlsx varType: file variable_selector: - '1752479895761' @@ -289,7 +289,7 @@ workflow: - extension - comparison_operator: is id: d0e88f5e-dfe3-4bae-af0c-dbec267500de - value: xls + value: .xls varType: file variable_selector: - '1752479895761' @@ -297,7 +297,7 @@ workflow: - extension - comparison_operator: is id: a957e91e-1ed7-4c6b-9c80-2f0948858f1d - value: md + value: .md varType: file variable_selector: - '1752479895761' @@ -305,7 +305,7 @@ workflow: - extension - comparison_operator: is id: 870c3c39-8d3f-474a-ab8b-9c0ccf53db73 - value: markdown + value: .markdown varType: file variable_selector: - '1752479895761' @@ -313,7 +313,7 @@ workflow: - extension - comparison_operator: is id: f9541513-1e71-4dc1-9db5-35dc84a39e3c - value: mdx + value: .mdx varType: file variable_selector: - '1752479895761' @@ -321,7 +321,7 @@ workflow: - extension - comparison_operator: is id: 4c7f455b-ac20-40ca-9495-6cc44ffcb35d - value: html + value: .html varType: file variable_selector: - '1752479895761' @@ -329,7 +329,7 @@ workflow: - extension - comparison_operator: is id: 2e12d9c7-8057-4a09-8851-f9fd1d0718d1 - value: htm + value: .htm varType: file variable_selector: - '1752479895761' @@ -337,7 +337,7 @@ workflow: - extension - comparison_operator: is id: 73a995a9-d8b9-4aef-89f7-306e2ddcbce2 - value: docx + value: .docx varType: file variable_selector: - '1752479895761' @@ -345,7 +345,7 @@ workflow: - extension - comparison_operator: is id: 8a2e8772-0426-458b-a1f9-9eaaec0f27c8 - value: csv + value: .csv varType: file variable_selector: - '1752479895761' @@ -353,7 +353,7 @@ workflow: - extension - comparison_operator: is id: aa2cb6b6-a2fc-462a-a9f5-c9c3f33a1602 - value: txt + value: .txt varType: file variable_selector: - '1752479895761' diff --git a/api/tasks/rag_pipeline/priority_rag_pipeline_run_task.py b/api/tasks/rag_pipeline/priority_rag_pipeline_run_task.py index 7021ddab38..b810507387 100644 --- a/api/tasks/rag_pipeline/priority_rag_pipeline_run_task.py +++ b/api/tasks/rag_pipeline/priority_rag_pipeline_run_task.py @@ -103,7 +103,7 @@ def run_single_rag_pipeline_task(rag_pipeline_invoke_entity: Mapping[str, Any], workflow_thread_pool_id = rag_pipeline_invoke_entity_model.workflow_thread_pool_id application_generate_entity = rag_pipeline_invoke_entity_model.application_generate_entity - with Session(db.engine) as session: + with Session(db.engine, expire_on_commit=False) as session: # Load required entities account = session.query(Account).filter(Account.id == user_id).first() if not account: @@ -144,30 +144,30 @@ def run_single_rag_pipeline_task(rag_pipeline_invoke_entity: Mapping[str, Any], triggered_from=WorkflowNodeExecutionTriggeredFrom.RAG_PIPELINE_RUN, ) - # Set the user directly in g for preserve_flask_contexts - g._login_user = account + # Set the user directly in g for preserve_flask_contexts + g._login_user = account - # Copy context for passing to pipeline generator - context = contextvars.copy_context() + # Copy context for passing to pipeline generator + context = contextvars.copy_context() - # Direct execution without creating another thread - # Since we're already in a thread pool, no need for nested threading - from core.app.apps.pipeline.pipeline_generator import PipelineGenerator + # Direct execution without creating another thread + # Since we're already in a thread pool, no need for nested threading + from core.app.apps.pipeline.pipeline_generator import PipelineGenerator - pipeline_generator = PipelineGenerator() - pipeline_generator._generate( - flask_app=flask_app, - context=context, - pipeline=pipeline, - workflow_id=workflow_id, - user=account, - application_generate_entity=entity, - invoke_from=InvokeFrom.PUBLISHED, - workflow_execution_repository=workflow_execution_repository, - workflow_node_execution_repository=workflow_node_execution_repository, - streaming=streaming, - workflow_thread_pool_id=workflow_thread_pool_id, - ) + pipeline_generator = PipelineGenerator() + pipeline_generator._generate( + flask_app=flask_app, + context=context, + pipeline=pipeline, + workflow_id=workflow_id, + user=account, + application_generate_entity=entity, + invoke_from=InvokeFrom.PUBLISHED, + workflow_execution_repository=workflow_execution_repository, + workflow_node_execution_repository=workflow_node_execution_repository, + streaming=streaming, + workflow_thread_pool_id=workflow_thread_pool_id, + ) except Exception: logging.exception("Error in priority pipeline task") raise diff --git a/api/tasks/retry_document_indexing_task.py b/api/tasks/retry_document_indexing_task.py index ff7848eea6..f4e9b52778 100644 --- a/api/tasks/retry_document_indexing_task.py +++ b/api/tasks/retry_document_indexing_task.py @@ -29,7 +29,6 @@ def retry_document_indexing_task(dataset_id: str, document_ids: list[str], user_ Usage: retry_document_indexing_task.delay(dataset_id, document_ids, user_id) """ start_at = time.perf_counter() - print("sadaddadadaaaadadadadsdsadasdadasdasda") try: dataset = db.session.query(Dataset).where(Dataset.id == dataset_id).first() if not dataset: