feat(datasource): change datasource result type to event-stream

This commit is contained in:
Dongyu Li 2025-06-18 16:04:40 +08:00
parent e51d308312
commit 224111081b
4 changed files with 45 additions and 42 deletions

View File

@ -414,16 +414,19 @@ class RagPipelinePublishedDatasourceNodeRunApi(Resource):
raise ValueError("missing datasource_type")
rag_pipeline_service = RagPipelineService()
result = rag_pipeline_service.run_datasource_workflow_node(
pipeline=pipeline,
node_id=node_id,
user_inputs=inputs,
account=current_user,
datasource_type=datasource_type,
is_published=True,
return helper.compact_generate_response(
PipelineGenerator.convert_to_event_stream(
rag_pipeline_service.run_datasource_workflow_node(
pipeline=pipeline,
node_id=node_id,
user_inputs=inputs,
account=current_user,
datasource_type=datasource_type,
is_published=False,
)
)
)
return result
class RagPipelineDraftDatasourceNodeRunApi(Resource):
@ -455,21 +458,18 @@ class RagPipelineDraftDatasourceNodeRunApi(Resource):
raise ValueError("missing datasource_type")
rag_pipeline_service = RagPipelineService()
try:
return helper.compact_generate_response(
PipelineGenerator.convert_to_event_stream(
rag_pipeline_service.run_datasource_workflow_node(
pipeline=pipeline,
node_id=node_id,
user_inputs=inputs,
account=current_user,
datasource_type=datasource_type,
is_published=False,
)
return helper.compact_generate_response(
PipelineGenerator.convert_to_event_stream(
rag_pipeline_service.run_datasource_workflow_node(
pipeline=pipeline,
node_id=node_id,
user_inputs=inputs,
account=current_user,
datasource_type=datasource_type,
is_published=False,
)
)
except Exception as e:
print(e)
)
class RagPipelinePublishedNodeRunApi(Resource):

View File

@ -2,14 +2,14 @@ import contextvars
import datetime
import json
import logging
import random
import secrets
import threading
import time
import uuid
from collections.abc import Generator, Mapping
from typing import Any, Literal, Optional, Union, overload
from flask import Flask, copy_current_request_context, current_app, has_request_context
from flask import Flask, current_app
from pydantic import ValidationError
from sqlalchemy.orm import sessionmaker
@ -110,7 +110,7 @@ class PipelineGenerator(BaseAppGenerator):
start_node_id: str = args["start_node_id"]
datasource_type: str = args["datasource_type"]
datasource_info_list: list[Mapping[str, Any]] = args["datasource_info_list"]
batch = time.strftime("%Y%m%d%H%M%S") + str(random.randint(100000, 999999))
batch = time.strftime("%Y%m%d%H%M%S") + str(secrets.randbelow(900000) + 100000)
documents = []
if invoke_from == InvokeFrom.PUBLISHED:
for datasource_info in datasource_info_list:

View File

@ -19,9 +19,9 @@ class BaseDatasourceEvent(BaseModel):
class DatasourceCompletedEvent(BaseDatasourceEvent):
event: str = DatasourceStreamEvent.COMPLETED.value
data: Mapping[str,Any] | list = Field(..., description="result")
total: Optional[int] = Field(..., description="total")
completed: Optional[int] = Field(..., description="completed")
time_consuming: Optional[float] = Field(..., description="time consuming")
total: Optional[int] = Field(default=0, description="total")
completed: Optional[int] = Field(default=0, description="completed")
time_consuming: Optional[float] = Field(default=0.0, description="time consuming")
class DatasourceProcessingEvent(BaseDatasourceEvent):
event: str = DatasourceStreamEvent.PROCESSING.value

View File

@ -558,21 +558,24 @@ class RagPipelineService:
provider_type=datasource_runtime.datasource_provider_type(),
)
start_time = time.time()
for message in website_crawl_result:
end_time = time.time()
if message.result.status == "completed":
crawl_event = DatasourceCompletedEvent(
data=message.result.web_info_list,
total=message.result.total,
completed=message.result.completed,
time_consuming=round(end_time - start_time, 2)
)
else:
crawl_event = DatasourceProcessingEvent(
total=message.result.total,
completed=message.result.completed,
)
yield crawl_event.model_dump()
try:
for message in website_crawl_result:
end_time = time.time()
if message.result.status == "completed":
crawl_event = DatasourceCompletedEvent(
data=message.result.web_info_list,
total=message.result.total,
completed=message.result.completed,
time_consuming=round(end_time - start_time, 2)
)
else:
crawl_event = DatasourceProcessingEvent(
total=message.result.total,
completed=message.result.completed,
)
yield crawl_event.model_dump()
except Exception as e:
print(str(e))
case _:
raise ValueError(f"Unsupported datasource provider: {datasource_runtime.datasource_provider_type}")