Fix/merge fix (#25781)

This commit is contained in:
Jyong 2025-09-16 19:14:33 +08:00 committed by GitHub
commit b37bef44f6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
17 changed files with 82 additions and 64 deletions

View File

@ -61,6 +61,7 @@ from . import (
init_validate,
ping,
setup,
spec,
version,
)

View File

@ -741,7 +741,7 @@ class DatasetApiDeleteApi(Resource):
return {"result": "success"}, 204
@console_ns.route("/datasets/<uuid:dataset_id>/api-keys/<str:status>")
@console_ns.route("/datasets/<uuid:dataset_id>/api-keys/<string:status>")
class DatasetEnableApiApi(Resource):
@setup_required
@login_required

View File

@ -1,6 +1,6 @@
from collections.abc import Sequence
from enum import StrEnum, auto
from typing import Any, Literal
from typing import Any, Literal, Optional
from pydantic import BaseModel, Field, field_validator

View File

@ -119,7 +119,7 @@ class PipelineGenerator(BaseAppGenerator):
) -> Union[Mapping[str, Any], Generator[Mapping | str, None, None], None]:
# Add null check for dataset
with Session(db.engine) as session:
with Session(db.engine, expire_on_commit=False) as session:
dataset = pipeline.retrieve_dataset(session)
if not dataset:
raise ValueError("Pipeline dataset is required")

View File

@ -1,3 +1,5 @@
from typing import Optional
from pydantic import BaseModel, ConfigDict
from models.dataset import Document

View File

@ -2,7 +2,7 @@
from abc import ABC, abstractmethod
from collections.abc import Mapping
from typing import TYPE_CHECKING, Any
from typing import TYPE_CHECKING, Any, Optional
from configs import dify_config
from core.rag.extractor.entity.extract_setting import ExtractSetting
@ -64,7 +64,7 @@ class BaseIndexProcessor(ABC):
max_tokens: int,
chunk_overlap: int,
separator: str,
embedding_model_instance: ModelInstance | None,
embedding_model_instance: Optional["ModelInstance"],
) -> TextSplitter:
"""
Get the NodeParser object according to the processing rule.

View File

@ -141,7 +141,7 @@ class KnowledgeIndexNode(Node):
index_processor = IndexProcessorFactory(dataset.chunk_structure).init_index_processor()
if original_document_id:
segments = db.session.scalars(
select(DocumentSegment).where(DocumentSegment.document_id == document_id)
select(DocumentSegment).where(DocumentSegment.document_id == original_document_id.value)
).all()
if segments:
index_node_ids = [segment.index_node_id for segment in segments]

View File

@ -10,7 +10,7 @@ import re
import time
from datetime import datetime
from json import JSONDecodeError
from typing import Any, cast
from typing import Any, Optional, cast
import sqlalchemy as sa
from sqlalchemy import DateTime, String, func, select

View File

@ -3,7 +3,7 @@ import logging
from collections.abc import Mapping, Sequence
from datetime import datetime
from enum import StrEnum
from typing import TYPE_CHECKING, Any, Union, cast
from typing import TYPE_CHECKING, Any, Optional, Union, cast
from uuid import uuid4
import sqlalchemy as sa

View File

@ -7,7 +7,7 @@ import time
import uuid
from collections import Counter
from collections.abc import Sequence
from typing import Any, Literal
from typing import Any, Literal, Optional
import sqlalchemy as sa
from sqlalchemy import exists, func, select

View File

@ -4,7 +4,8 @@ from typing import Any, Union
from configs import dify_config
from core.app.apps.pipeline.pipeline_generator import PipelineGenerator
from core.app.entities.app_invoke_entities import InvokeFrom
from models.dataset import Pipeline
from extensions.ext_database import db
from models.dataset import Document, Pipeline
from models.model import Account, App, EndUser
from models.workflow import Workflow
from services.rag_pipeline.rag_pipeline import RagPipelineService
@ -31,6 +32,9 @@ class PipelineGenerateService:
"""
try:
workflow = cls._get_workflow(pipeline, invoke_from)
if original_document_id := args.get("original_document_id"):
# update document status to waiting
cls.update_document_status(original_document_id)
return PipelineGenerator.convert_to_event_stream(
PipelineGenerator().generate(
pipeline=pipeline,
@ -97,3 +101,15 @@ class PipelineGenerateService:
raise ValueError("Workflow not published")
return workflow
@classmethod
def update_document_status(cls, document_id: str):
"""
Update document status to waiting
:param document_id: document id
"""
document = db.session.query(Document).filter(Document.id == document_id).first()
if document:
document.indexing_status = "waiting"
db.session.add(document)
db.session.commit()

View File

@ -1348,11 +1348,11 @@ class RagPipelineService:
"start_node_id": document_pipeline_excution_log.datasource_node_id,
"datasource_type": document_pipeline_excution_log.datasource_type,
"datasource_info_list": [json.loads(document_pipeline_excution_log.datasource_info)],
"original_document_id": document.id,
},
invoke_from=InvokeFrom.PUBLISHED,
streaming=False,
call_depth=0,
workflow_thread_pool_id=None,
is_retry=True,
documents=[document],
)

View File

@ -282,7 +282,7 @@ workflow:
conditions:
- comparison_operator: is
id: 9da88d93-3ff6-463f-abfd-6bcafbf2554d
value: xlsx
value: .xlsx
varType: file
variable_selector:
- '1752479895761'
@ -290,7 +290,7 @@ workflow:
- extension
- comparison_operator: is
id: d0e88f5e-dfe3-4bae-af0c-dbec267500de
value: xls
value: .xls
varType: file
variable_selector:
- '1752479895761'
@ -298,7 +298,7 @@ workflow:
- extension
- comparison_operator: is
id: a957e91e-1ed7-4c6b-9c80-2f0948858f1d
value: md
value: .md
varType: file
variable_selector:
- '1752479895761'
@ -306,7 +306,7 @@ workflow:
- extension
- comparison_operator: is
id: 870c3c39-8d3f-474a-ab8b-9c0ccf53db73
value: markdown
value: .markdown
varType: file
variable_selector:
- '1752479895761'
@ -314,7 +314,7 @@ workflow:
- extension
- comparison_operator: is
id: f9541513-1e71-4dc1-9db5-35dc84a39e3c
value: mdx
value: .mdx
varType: file
variable_selector:
- '1752479895761'
@ -322,7 +322,7 @@ workflow:
- extension
- comparison_operator: is
id: 4c7f455b-ac20-40ca-9495-6cc44ffcb35d
value: html
value: .html
varType: file
variable_selector:
- '1752479895761'
@ -330,7 +330,7 @@ workflow:
- extension
- comparison_operator: is
id: 2e12d9c7-8057-4a09-8851-f9fd1d0718d1
value: htm
value: .htm
varType: file
variable_selector:
- '1752479895761'
@ -338,7 +338,7 @@ workflow:
- extension
- comparison_operator: is
id: 73a995a9-d8b9-4aef-89f7-306e2ddcbce2
value: docx
value: .docx
varType: file
variable_selector:
- '1752479895761'
@ -346,7 +346,7 @@ workflow:
- extension
- comparison_operator: is
id: 8a2e8772-0426-458b-a1f9-9eaaec0f27c8
value: csv
value: .csv
varType: file
variable_selector:
- '1752479895761'
@ -354,7 +354,7 @@ workflow:
- extension
- comparison_operator: is
id: aa2cb6b6-a2fc-462a-a9f5-c9c3f33a1602
value: txt
value: .txt
varType: file
variable_selector:
- '1752479895761'

View File

@ -282,7 +282,7 @@ workflow:
conditions:
- comparison_operator: is
id: 9da88d93-3ff6-463f-abfd-6bcafbf2554d
value: xlsx
value: .xlsx
varType: file
variable_selector:
- '1752479895761'
@ -290,7 +290,7 @@ workflow:
- extension
- comparison_operator: is
id: d0e88f5e-dfe3-4bae-af0c-dbec267500de
value: xls
value: .xls
varType: file
variable_selector:
- '1752479895761'
@ -298,7 +298,7 @@ workflow:
- extension
- comparison_operator: is
id: a957e91e-1ed7-4c6b-9c80-2f0948858f1d
value: md
value: .md
varType: file
variable_selector:
- '1752479895761'
@ -306,7 +306,7 @@ workflow:
- extension
- comparison_operator: is
id: 870c3c39-8d3f-474a-ab8b-9c0ccf53db73
value: markdown
value: .markdown
varType: file
variable_selector:
- '1752479895761'
@ -314,7 +314,7 @@ workflow:
- extension
- comparison_operator: is
id: f9541513-1e71-4dc1-9db5-35dc84a39e3c
value: mdx
value: .mdx
varType: file
variable_selector:
- '1752479895761'
@ -322,7 +322,7 @@ workflow:
- extension
- comparison_operator: is
id: 4c7f455b-ac20-40ca-9495-6cc44ffcb35d
value: html
value: .html
varType: file
variable_selector:
- '1752479895761'
@ -330,7 +330,7 @@ workflow:
- extension
- comparison_operator: is
id: 2e12d9c7-8057-4a09-8851-f9fd1d0718d1
value: htm
value: .htm
varType: file
variable_selector:
- '1752479895761'
@ -338,7 +338,7 @@ workflow:
- extension
- comparison_operator: is
id: 73a995a9-d8b9-4aef-89f7-306e2ddcbce2
value: docx
value: .docx
varType: file
variable_selector:
- '1752479895761'
@ -346,7 +346,7 @@ workflow:
- extension
- comparison_operator: is
id: 8a2e8772-0426-458b-a1f9-9eaaec0f27c8
value: csv
value: .csv
varType: file
variable_selector:
- '1752479895761'
@ -354,7 +354,7 @@ workflow:
- extension
- comparison_operator: is
id: aa2cb6b6-a2fc-462a-a9f5-c9c3f33a1602
value: txt
value: .txt
varType: file
variable_selector:
- '1752479895761'

View File

@ -281,7 +281,7 @@ workflow:
conditions:
- comparison_operator: is
id: 9da88d93-3ff6-463f-abfd-6bcafbf2554d
value: xlsx
value: .xlsx
varType: file
variable_selector:
- '1752479895761'
@ -289,7 +289,7 @@ workflow:
- extension
- comparison_operator: is
id: d0e88f5e-dfe3-4bae-af0c-dbec267500de
value: xls
value: .xls
varType: file
variable_selector:
- '1752479895761'
@ -297,7 +297,7 @@ workflow:
- extension
- comparison_operator: is
id: a957e91e-1ed7-4c6b-9c80-2f0948858f1d
value: md
value: .md
varType: file
variable_selector:
- '1752479895761'
@ -305,7 +305,7 @@ workflow:
- extension
- comparison_operator: is
id: 870c3c39-8d3f-474a-ab8b-9c0ccf53db73
value: markdown
value: .markdown
varType: file
variable_selector:
- '1752479895761'
@ -313,7 +313,7 @@ workflow:
- extension
- comparison_operator: is
id: f9541513-1e71-4dc1-9db5-35dc84a39e3c
value: mdx
value: .mdx
varType: file
variable_selector:
- '1752479895761'
@ -321,7 +321,7 @@ workflow:
- extension
- comparison_operator: is
id: 4c7f455b-ac20-40ca-9495-6cc44ffcb35d
value: html
value: .html
varType: file
variable_selector:
- '1752479895761'
@ -329,7 +329,7 @@ workflow:
- extension
- comparison_operator: is
id: 2e12d9c7-8057-4a09-8851-f9fd1d0718d1
value: htm
value: .htm
varType: file
variable_selector:
- '1752479895761'
@ -337,7 +337,7 @@ workflow:
- extension
- comparison_operator: is
id: 73a995a9-d8b9-4aef-89f7-306e2ddcbce2
value: docx
value: .docx
varType: file
variable_selector:
- '1752479895761'
@ -345,7 +345,7 @@ workflow:
- extension
- comparison_operator: is
id: 8a2e8772-0426-458b-a1f9-9eaaec0f27c8
value: csv
value: .csv
varType: file
variable_selector:
- '1752479895761'
@ -353,7 +353,7 @@ workflow:
- extension
- comparison_operator: is
id: aa2cb6b6-a2fc-462a-a9f5-c9c3f33a1602
value: txt
value: .txt
varType: file
variable_selector:
- '1752479895761'

View File

@ -103,7 +103,7 @@ def run_single_rag_pipeline_task(rag_pipeline_invoke_entity: Mapping[str, Any],
workflow_thread_pool_id = rag_pipeline_invoke_entity_model.workflow_thread_pool_id
application_generate_entity = rag_pipeline_invoke_entity_model.application_generate_entity
with Session(db.engine) as session:
with Session(db.engine, expire_on_commit=False) as session:
# Load required entities
account = session.query(Account).filter(Account.id == user_id).first()
if not account:
@ -144,30 +144,30 @@ def run_single_rag_pipeline_task(rag_pipeline_invoke_entity: Mapping[str, Any],
triggered_from=WorkflowNodeExecutionTriggeredFrom.RAG_PIPELINE_RUN,
)
# Set the user directly in g for preserve_flask_contexts
g._login_user = account
# Set the user directly in g for preserve_flask_contexts
g._login_user = account
# Copy context for passing to pipeline generator
context = contextvars.copy_context()
# Copy context for passing to pipeline generator
context = contextvars.copy_context()
# Direct execution without creating another thread
# Since we're already in a thread pool, no need for nested threading
from core.app.apps.pipeline.pipeline_generator import PipelineGenerator
# Direct execution without creating another thread
# Since we're already in a thread pool, no need for nested threading
from core.app.apps.pipeline.pipeline_generator import PipelineGenerator
pipeline_generator = PipelineGenerator()
pipeline_generator._generate(
flask_app=flask_app,
context=context,
pipeline=pipeline,
workflow_id=workflow_id,
user=account,
application_generate_entity=entity,
invoke_from=InvokeFrom.PUBLISHED,
workflow_execution_repository=workflow_execution_repository,
workflow_node_execution_repository=workflow_node_execution_repository,
streaming=streaming,
workflow_thread_pool_id=workflow_thread_pool_id,
)
pipeline_generator = PipelineGenerator()
pipeline_generator._generate(
flask_app=flask_app,
context=context,
pipeline=pipeline,
workflow_id=workflow_id,
user=account,
application_generate_entity=entity,
invoke_from=InvokeFrom.PUBLISHED,
workflow_execution_repository=workflow_execution_repository,
workflow_node_execution_repository=workflow_node_execution_repository,
streaming=streaming,
workflow_thread_pool_id=workflow_thread_pool_id,
)
except Exception:
logging.exception("Error in priority pipeline task")
raise

View File

@ -29,7 +29,6 @@ def retry_document_indexing_task(dataset_id: str, document_ids: list[str], user_
Usage: retry_document_indexing_task.delay(dataset_id, document_ids, user_id)
"""
start_at = time.perf_counter()
print("sadaddadadaaaadadadadsdsadasdadasdasda")
try:
dataset = db.session.query(Dataset).where(Dataset.id == dataset_id).first()
if not dataset: