mirror of https://github.com/langgenius/dify.git
refactor(graph_engine): Merge worker management into one WorkerPool
Signed-off-by: -LAN- <laipz8200@outlook.com>
This commit is contained in:
parent
202fdfcb81
commit
64c1234724
|
|
@ -77,16 +77,15 @@ forbidden_modules =
|
|||
core.workflow.graph_engine.layers
|
||||
core.workflow.graph_engine.protocols
|
||||
|
||||
[importlinter:contract:worker-management-layers]
|
||||
name = Worker Management Layers
|
||||
type = layers
|
||||
layers =
|
||||
worker_pool
|
||||
worker_factory
|
||||
dynamic_scaler
|
||||
activity_tracker
|
||||
containers =
|
||||
[importlinter:contract:worker-management]
|
||||
name = Worker Management
|
||||
type = forbidden
|
||||
source_modules =
|
||||
core.workflow.graph_engine.worker_management
|
||||
forbidden_modules =
|
||||
core.workflow.graph_engine.orchestration
|
||||
core.workflow.graph_engine.command_processing
|
||||
core.workflow.graph_engine.event_management
|
||||
|
||||
[importlinter:contract:error-handling-strategies]
|
||||
name = Error Handling Strategies
|
||||
|
|
|
|||
|
|
@ -13,7 +13,6 @@ from typing import final
|
|||
|
||||
from flask import Flask, current_app
|
||||
|
||||
from configs import dify_config
|
||||
from core.app.entities.app_invoke_entities import InvokeFrom
|
||||
from core.workflow.entities import GraphRuntimeState
|
||||
from core.workflow.enums import NodeExecutionType
|
||||
|
|
@ -40,7 +39,7 @@ from .output_registry import OutputRegistry
|
|||
from .protocols.command_channel import CommandChannel
|
||||
from .response_coordinator import ResponseStreamCoordinator
|
||||
from .state_management import UnifiedStateManager
|
||||
from .worker_management import ActivityTracker, DynamicScaler, WorkerFactory, WorkerPool
|
||||
from .worker_management import SimpleWorkerPool
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
@ -215,31 +214,17 @@ class GraphEngine:
|
|||
|
||||
context_vars = contextvars.copy_context()
|
||||
|
||||
# Create worker management components
|
||||
self._activity_tracker = ActivityTracker()
|
||||
self._dynamic_scaler = DynamicScaler(
|
||||
min_workers=(self._min_workers if self._min_workers is not None else dify_config.GRAPH_ENGINE_MIN_WORKERS),
|
||||
max_workers=(self._max_workers if self._max_workers is not None else dify_config.GRAPH_ENGINE_MAX_WORKERS),
|
||||
scale_up_threshold=(
|
||||
self._scale_up_threshold
|
||||
if self._scale_up_threshold is not None
|
||||
else dify_config.GRAPH_ENGINE_SCALE_UP_THRESHOLD
|
||||
),
|
||||
scale_down_idle_time=(
|
||||
self._scale_down_idle_time
|
||||
if self._scale_down_idle_time is not None
|
||||
else dify_config.GRAPH_ENGINE_SCALE_DOWN_IDLE_TIME
|
||||
),
|
||||
)
|
||||
self._worker_factory = WorkerFactory(flask_app, context_vars)
|
||||
|
||||
self._worker_pool = WorkerPool(
|
||||
# Create simple worker pool
|
||||
self._worker_pool = SimpleWorkerPool(
|
||||
ready_queue=self.ready_queue,
|
||||
event_queue=self.event_queue,
|
||||
graph=self.graph,
|
||||
worker_factory=self._worker_factory,
|
||||
dynamic_scaler=self._dynamic_scaler,
|
||||
activity_tracker=self._activity_tracker,
|
||||
flask_app=flask_app,
|
||||
context_vars=context_vars,
|
||||
min_workers=self._min_workers,
|
||||
max_workers=self._max_workers,
|
||||
scale_up_threshold=self._scale_up_threshold,
|
||||
scale_down_idle_time=self._scale_down_idle_time,
|
||||
)
|
||||
|
||||
def _validate_graph_state_consistency(self) -> None:
|
||||
|
|
@ -316,11 +301,8 @@ class GraphEngine:
|
|||
|
||||
def _start_execution(self) -> None:
|
||||
"""Start execution subsystems."""
|
||||
# Calculate initial worker count
|
||||
initial_workers = self._dynamic_scaler.calculate_initial_workers(self.graph)
|
||||
|
||||
# Start worker pool
|
||||
self._worker_pool.start(initial_workers)
|
||||
# Start worker pool (it calculates initial workers internally)
|
||||
self._worker_pool.start()
|
||||
|
||||
# Register response nodes
|
||||
for node in self.graph.nodes.values():
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ from ..command_processing import CommandProcessor
|
|||
from ..domain import GraphExecution
|
||||
from ..event_management import EventCollector
|
||||
from ..state_management import UnifiedStateManager
|
||||
from ..worker_management import WorkerPool
|
||||
from ..worker_management import SimpleWorkerPool
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from ..event_management import EventHandlerRegistry
|
||||
|
|
@ -30,7 +30,7 @@ class ExecutionCoordinator:
|
|||
event_handler: "EventHandlerRegistry",
|
||||
event_collector: EventCollector,
|
||||
command_processor: CommandProcessor,
|
||||
worker_pool: WorkerPool,
|
||||
worker_pool: SimpleWorkerPool,
|
||||
) -> None:
|
||||
"""
|
||||
Initialize the execution coordinator.
|
||||
|
|
@ -56,9 +56,7 @@ class ExecutionCoordinator:
|
|||
|
||||
def check_scaling(self) -> None:
|
||||
"""Check and perform worker scaling if needed."""
|
||||
queue_depth = self.state_manager.ready_queue.qsize()
|
||||
executing_count = self.state_manager.get_executing_count()
|
||||
self.worker_pool.check_scaling(queue_depth, executing_count)
|
||||
self.worker_pool.check_and_scale()
|
||||
|
||||
def is_execution_complete(self) -> bool:
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -5,14 +5,8 @@ This package manages the worker pool, including creation,
|
|||
scaling, and activity tracking.
|
||||
"""
|
||||
|
||||
from .activity_tracker import ActivityTracker
|
||||
from .dynamic_scaler import DynamicScaler
|
||||
from .worker_factory import WorkerFactory
|
||||
from .worker_pool import WorkerPool
|
||||
from .simple_worker_pool import SimpleWorkerPool
|
||||
|
||||
__all__ = [
|
||||
"ActivityTracker",
|
||||
"DynamicScaler",
|
||||
"WorkerFactory",
|
||||
"WorkerPool",
|
||||
"SimpleWorkerPool",
|
||||
]
|
||||
|
|
|
|||
|
|
@ -1,76 +0,0 @@
|
|||
"""
|
||||
Activity tracker for monitoring worker activity.
|
||||
"""
|
||||
|
||||
import threading
|
||||
import time
|
||||
from typing import final
|
||||
|
||||
|
||||
@final
|
||||
class ActivityTracker:
|
||||
"""
|
||||
Tracks worker activity for scaling decisions.
|
||||
|
||||
This monitors which workers are active or idle to support
|
||||
dynamic scaling decisions.
|
||||
"""
|
||||
|
||||
def __init__(self, idle_threshold: float = 30.0) -> None:
|
||||
"""
|
||||
Initialize the activity tracker.
|
||||
|
||||
Args:
|
||||
idle_threshold: Seconds before a worker is considered idle
|
||||
"""
|
||||
self.idle_threshold = idle_threshold
|
||||
self._worker_activity: dict[int, tuple[bool, float]] = {}
|
||||
self._lock = threading.RLock()
|
||||
|
||||
def track_activity(self, worker_id: int, is_active: bool) -> None:
|
||||
"""
|
||||
Track worker activity state.
|
||||
|
||||
Args:
|
||||
worker_id: ID of the worker
|
||||
is_active: Whether the worker is active
|
||||
"""
|
||||
with self._lock:
|
||||
self._worker_activity[worker_id] = (is_active, time.time())
|
||||
|
||||
def get_idle_workers(self) -> list[int]:
|
||||
"""
|
||||
Get list of workers that have been idle too long.
|
||||
|
||||
Returns:
|
||||
List of idle worker IDs
|
||||
"""
|
||||
current_time = time.time()
|
||||
idle_workers: list[int] = []
|
||||
|
||||
with self._lock:
|
||||
for worker_id, (is_active, last_change) in self._worker_activity.items():
|
||||
if not is_active and (current_time - last_change) > self.idle_threshold:
|
||||
idle_workers.append(worker_id)
|
||||
|
||||
return idle_workers
|
||||
|
||||
def remove_worker(self, worker_id: int) -> None:
|
||||
"""
|
||||
Remove a worker from tracking.
|
||||
|
||||
Args:
|
||||
worker_id: ID of the worker to remove
|
||||
"""
|
||||
with self._lock:
|
||||
self._worker_activity.pop(worker_id, None)
|
||||
|
||||
def get_active_count(self) -> int:
|
||||
"""
|
||||
Get count of currently active workers.
|
||||
|
||||
Returns:
|
||||
Number of active workers
|
||||
"""
|
||||
with self._lock:
|
||||
return sum(1 for is_active, _ in self._worker_activity.values() if is_active)
|
||||
|
|
@ -1,101 +0,0 @@
|
|||
"""
|
||||
Dynamic scaler for worker pool sizing.
|
||||
"""
|
||||
|
||||
from typing import final
|
||||
|
||||
from core.workflow.graph import Graph
|
||||
|
||||
|
||||
@final
|
||||
class DynamicScaler:
|
||||
"""
|
||||
Manages dynamic scaling decisions for the worker pool.
|
||||
|
||||
This encapsulates the logic for when to scale up or down
|
||||
based on workload and configuration.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
min_workers: int = 2,
|
||||
max_workers: int = 10,
|
||||
scale_up_threshold: int = 5,
|
||||
scale_down_idle_time: float = 30.0,
|
||||
) -> None:
|
||||
"""
|
||||
Initialize the dynamic scaler.
|
||||
|
||||
Args:
|
||||
min_workers: Minimum number of workers
|
||||
max_workers: Maximum number of workers
|
||||
scale_up_threshold: Queue depth to trigger scale up
|
||||
scale_down_idle_time: Idle time before scaling down
|
||||
"""
|
||||
self.min_workers = min_workers
|
||||
self.max_workers = max_workers
|
||||
self.scale_up_threshold = scale_up_threshold
|
||||
self.scale_down_idle_time = scale_down_idle_time
|
||||
|
||||
def calculate_initial_workers(self, graph: Graph) -> int:
|
||||
"""
|
||||
Calculate initial worker count based on graph complexity.
|
||||
|
||||
Args:
|
||||
graph: The workflow graph
|
||||
|
||||
Returns:
|
||||
Initial number of workers to create
|
||||
"""
|
||||
node_count = len(graph.nodes)
|
||||
|
||||
# Simple heuristic: more nodes = more workers
|
||||
if node_count < 10:
|
||||
initial = self.min_workers
|
||||
elif node_count < 50:
|
||||
initial = min(4, self.max_workers)
|
||||
elif node_count < 100:
|
||||
initial = min(6, self.max_workers)
|
||||
else:
|
||||
initial = min(8, self.max_workers)
|
||||
|
||||
return max(self.min_workers, initial)
|
||||
|
||||
def should_scale_up(self, current_workers: int, queue_depth: int, executing_count: int) -> bool:
|
||||
"""
|
||||
Determine if scaling up is needed.
|
||||
|
||||
Args:
|
||||
current_workers: Current number of workers
|
||||
queue_depth: Number of nodes waiting
|
||||
executing_count: Number of nodes executing
|
||||
|
||||
Returns:
|
||||
True if should scale up
|
||||
"""
|
||||
if current_workers >= self.max_workers:
|
||||
return False
|
||||
|
||||
# Scale up if queue is deep and workers are busy
|
||||
if queue_depth > self.scale_up_threshold:
|
||||
if executing_count >= current_workers * 0.8:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def should_scale_down(self, current_workers: int, idle_workers: list[int]) -> bool:
|
||||
"""
|
||||
Determine if scaling down is appropriate.
|
||||
|
||||
Args:
|
||||
current_workers: Current number of workers
|
||||
idle_workers: List of idle worker IDs
|
||||
|
||||
Returns:
|
||||
True if should scale down
|
||||
"""
|
||||
if current_workers <= self.min_workers:
|
||||
return False
|
||||
|
||||
# Scale down if we have idle workers
|
||||
return len(idle_workers) > 0
|
||||
|
|
@ -1,332 +0,0 @@
|
|||
"""
|
||||
Enhanced worker pool with integrated activity tracking and dynamic scaling.
|
||||
|
||||
This is a proposed simplification that merges WorkerPool, ActivityTracker,
|
||||
and DynamicScaler into a single cohesive class.
|
||||
"""
|
||||
|
||||
import queue
|
||||
import threading
|
||||
import time
|
||||
from typing import TYPE_CHECKING, final
|
||||
|
||||
from configs import dify_config
|
||||
from core.workflow.graph import Graph
|
||||
from core.workflow.graph_events import GraphNodeEventBase
|
||||
|
||||
from ..worker import Worker
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from contextvars import Context
|
||||
|
||||
from flask import Flask
|
||||
|
||||
|
||||
@final
|
||||
class EnhancedWorkerPool:
|
||||
"""
|
||||
Enhanced worker pool with integrated features.
|
||||
|
||||
This class combines the responsibilities of:
|
||||
- WorkerPool: Managing worker threads
|
||||
- ActivityTracker: Tracking worker activity
|
||||
- DynamicScaler: Making scaling decisions
|
||||
|
||||
Benefits:
|
||||
- Simplified interface with fewer classes
|
||||
- Direct integration of related features
|
||||
- Reduced inter-class communication overhead
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
ready_queue: queue.Queue[str],
|
||||
event_queue: queue.Queue[GraphNodeEventBase],
|
||||
graph: Graph,
|
||||
flask_app: "Flask | None" = None,
|
||||
context_vars: "Context | None" = None,
|
||||
min_workers: int | None = None,
|
||||
max_workers: int | None = None,
|
||||
scale_up_threshold: int | None = None,
|
||||
scale_down_idle_time: float | None = None,
|
||||
) -> None:
|
||||
"""
|
||||
Initialize the enhanced worker pool.
|
||||
|
||||
Args:
|
||||
ready_queue: Queue of nodes ready for execution
|
||||
event_queue: Queue for worker events
|
||||
graph: The workflow graph
|
||||
flask_app: Optional Flask app for context preservation
|
||||
context_vars: Optional context variables
|
||||
min_workers: Minimum number of workers
|
||||
max_workers: Maximum number of workers
|
||||
scale_up_threshold: Queue depth to trigger scale up
|
||||
scale_down_idle_time: Seconds before scaling down idle workers
|
||||
"""
|
||||
self.ready_queue = ready_queue
|
||||
self.event_queue = event_queue
|
||||
self.graph = graph
|
||||
self.flask_app = flask_app
|
||||
self.context_vars = context_vars
|
||||
|
||||
# Scaling parameters
|
||||
self.min_workers = min_workers or dify_config.GRAPH_ENGINE_MIN_WORKERS
|
||||
self.max_workers = max_workers or dify_config.GRAPH_ENGINE_MAX_WORKERS
|
||||
self.scale_up_threshold = scale_up_threshold or dify_config.GRAPH_ENGINE_SCALE_UP_THRESHOLD
|
||||
self.scale_down_idle_time = scale_down_idle_time or dify_config.GRAPH_ENGINE_SCALE_DOWN_IDLE_TIME
|
||||
|
||||
# Worker management
|
||||
self.workers: list[Worker] = []
|
||||
self._worker_counter = 0
|
||||
self._lock = threading.RLock()
|
||||
self._running = False
|
||||
|
||||
# Activity tracking (integrated)
|
||||
self._worker_activity: dict[int, tuple[bool, float]] = {}
|
||||
|
||||
# Scaling control
|
||||
self._last_scale_check = time.time()
|
||||
self._scale_check_interval = 1.0 # Check scaling every second
|
||||
|
||||
def start(self, initial_count: int | None = None) -> None:
|
||||
"""
|
||||
Start the worker pool with initial workers.
|
||||
|
||||
Args:
|
||||
initial_count: Number of workers to start with (auto-calculated if None)
|
||||
"""
|
||||
with self._lock:
|
||||
if self._running:
|
||||
return
|
||||
|
||||
self._running = True
|
||||
|
||||
# Calculate initial worker count if not specified
|
||||
if initial_count is None:
|
||||
initial_count = self._calculate_initial_workers()
|
||||
|
||||
# Create initial workers
|
||||
for _ in range(initial_count):
|
||||
self._add_worker()
|
||||
|
||||
def stop(self) -> None:
|
||||
"""Stop all workers in the pool."""
|
||||
with self._lock:
|
||||
self._running = False
|
||||
|
||||
# Stop all workers
|
||||
for worker in self.workers:
|
||||
worker.stop()
|
||||
|
||||
# Wait for workers to finish
|
||||
for worker in self.workers:
|
||||
if worker.is_alive():
|
||||
worker.join(timeout=10.0)
|
||||
|
||||
self.workers.clear()
|
||||
self._worker_activity.clear()
|
||||
|
||||
def check_and_scale(self) -> None:
|
||||
"""
|
||||
Check and perform scaling if needed.
|
||||
|
||||
This method should be called periodically to adjust pool size.
|
||||
"""
|
||||
current_time = time.time()
|
||||
|
||||
# Rate limit scaling checks
|
||||
if current_time - self._last_scale_check < self._scale_check_interval:
|
||||
return
|
||||
|
||||
self._last_scale_check = current_time
|
||||
|
||||
with self._lock:
|
||||
if not self._running:
|
||||
return
|
||||
|
||||
current_count = len(self.workers)
|
||||
queue_depth = self.ready_queue.qsize()
|
||||
|
||||
# Check for scale up
|
||||
if self._should_scale_up(current_count, queue_depth):
|
||||
self._add_worker()
|
||||
|
||||
# Check for scale down
|
||||
idle_workers = self._get_idle_workers(current_time)
|
||||
if idle_workers and self._should_scale_down(current_count):
|
||||
# Remove the most idle worker
|
||||
self._remove_worker(idle_workers[0])
|
||||
|
||||
# ============= Private Methods =============
|
||||
|
||||
def _calculate_initial_workers(self) -> int:
|
||||
"""
|
||||
Calculate initial number of workers based on graph complexity.
|
||||
|
||||
Returns:
|
||||
Initial worker count
|
||||
"""
|
||||
# Simple heuristic: start with min_workers, scale based on graph size
|
||||
node_count = len(self.graph.nodes)
|
||||
|
||||
if node_count < 10:
|
||||
return self.min_workers
|
||||
elif node_count < 50:
|
||||
return min(self.min_workers + 1, self.max_workers)
|
||||
else:
|
||||
return min(self.min_workers + 2, self.max_workers)
|
||||
|
||||
def _should_scale_up(self, current_count: int, queue_depth: int) -> bool:
|
||||
"""
|
||||
Determine if pool should scale up.
|
||||
|
||||
Args:
|
||||
current_count: Current number of workers
|
||||
queue_depth: Current queue depth
|
||||
|
||||
Returns:
|
||||
True if should scale up
|
||||
"""
|
||||
if current_count >= self.max_workers:
|
||||
return False
|
||||
|
||||
# Scale up if queue is deep
|
||||
if queue_depth > self.scale_up_threshold:
|
||||
return True
|
||||
|
||||
# Scale up if all workers are busy and queue is not empty
|
||||
active_count = self._get_active_count()
|
||||
if active_count == current_count and queue_depth > 0:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def _should_scale_down(self, current_count: int) -> bool:
|
||||
"""
|
||||
Determine if pool should scale down.
|
||||
|
||||
Args:
|
||||
current_count: Current number of workers
|
||||
|
||||
Returns:
|
||||
True if should scale down
|
||||
"""
|
||||
return current_count > self.min_workers
|
||||
|
||||
def _add_worker(self) -> None:
|
||||
"""Add a new worker to the pool."""
|
||||
worker_id = self._worker_counter
|
||||
self._worker_counter += 1
|
||||
|
||||
# Create worker with activity callbacks
|
||||
worker = Worker(
|
||||
ready_queue=self.ready_queue,
|
||||
event_queue=self.event_queue,
|
||||
graph=self.graph,
|
||||
worker_id=worker_id,
|
||||
flask_app=self.flask_app,
|
||||
context_vars=self.context_vars,
|
||||
on_idle_callback=self._on_worker_idle,
|
||||
on_active_callback=self._on_worker_active,
|
||||
)
|
||||
|
||||
worker.start()
|
||||
self.workers.append(worker)
|
||||
self._worker_activity[worker_id] = (False, time.time())
|
||||
|
||||
def _remove_worker(self, worker_id: int) -> None:
|
||||
"""
|
||||
Remove a specific worker from the pool.
|
||||
|
||||
Args:
|
||||
worker_id: ID of worker to remove
|
||||
"""
|
||||
worker_to_remove = None
|
||||
for worker in self.workers:
|
||||
if worker.worker_id == worker_id:
|
||||
worker_to_remove = worker
|
||||
break
|
||||
|
||||
if worker_to_remove:
|
||||
worker_to_remove.stop()
|
||||
self.workers.remove(worker_to_remove)
|
||||
self._worker_activity.pop(worker_id, None)
|
||||
|
||||
if worker_to_remove.is_alive():
|
||||
worker_to_remove.join(timeout=1.0)
|
||||
|
||||
def _on_worker_idle(self, worker_id: int) -> None:
|
||||
"""
|
||||
Callback when worker becomes idle.
|
||||
|
||||
Args:
|
||||
worker_id: ID of the idle worker
|
||||
"""
|
||||
with self._lock:
|
||||
self._worker_activity[worker_id] = (False, time.time())
|
||||
|
||||
def _on_worker_active(self, worker_id: int) -> None:
|
||||
"""
|
||||
Callback when worker becomes active.
|
||||
|
||||
Args:
|
||||
worker_id: ID of the active worker
|
||||
"""
|
||||
with self._lock:
|
||||
self._worker_activity[worker_id] = (True, time.time())
|
||||
|
||||
def _get_idle_workers(self, current_time: float) -> list[int]:
|
||||
"""
|
||||
Get list of workers that have been idle too long.
|
||||
|
||||
Args:
|
||||
current_time: Current timestamp
|
||||
|
||||
Returns:
|
||||
List of idle worker IDs sorted by idle time (longest first)
|
||||
"""
|
||||
idle_workers: list[tuple[int, float]] = []
|
||||
|
||||
for worker_id, (is_active, last_change) in self._worker_activity.items():
|
||||
if not is_active:
|
||||
idle_time = current_time - last_change
|
||||
if idle_time > self.scale_down_idle_time:
|
||||
idle_workers.append((worker_id, idle_time))
|
||||
|
||||
# Sort by idle time (longest first)
|
||||
idle_workers.sort(key=lambda x: x[1], reverse=True)
|
||||
return [worker_id for worker_id, _ in idle_workers]
|
||||
|
||||
def _get_active_count(self) -> int:
|
||||
"""
|
||||
Get count of currently active workers.
|
||||
|
||||
Returns:
|
||||
Number of active workers
|
||||
"""
|
||||
return sum(1 for is_active, _ in self._worker_activity.values() if is_active)
|
||||
|
||||
# ============= Public Status Methods =============
|
||||
|
||||
def get_worker_count(self) -> int:
|
||||
"""Get current number of workers."""
|
||||
with self._lock:
|
||||
return len(self.workers)
|
||||
|
||||
def get_status(self) -> dict[str, int]:
|
||||
"""
|
||||
Get pool status information.
|
||||
|
||||
Returns:
|
||||
Dictionary with status information
|
||||
"""
|
||||
with self._lock:
|
||||
return {
|
||||
"total_workers": len(self.workers),
|
||||
"active_workers": self._get_active_count(),
|
||||
"idle_workers": len(self.workers) - self._get_active_count(),
|
||||
"queue_depth": self.ready_queue.qsize(),
|
||||
"min_workers": self.min_workers,
|
||||
"max_workers": self.max_workers,
|
||||
}
|
||||
|
|
@ -0,0 +1,168 @@
|
|||
"""
|
||||
Simple worker pool that consolidates functionality.
|
||||
|
||||
This is a simpler implementation that merges WorkerPool, ActivityTracker,
|
||||
DynamicScaler, and WorkerFactory into a single class.
|
||||
"""
|
||||
|
||||
import queue
|
||||
import threading
|
||||
from typing import TYPE_CHECKING, final
|
||||
|
||||
from configs import dify_config
|
||||
from core.workflow.graph import Graph
|
||||
from core.workflow.graph_events import GraphNodeEventBase
|
||||
|
||||
from ..worker import Worker
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from contextvars import Context
|
||||
|
||||
from flask import Flask
|
||||
|
||||
|
||||
@final
|
||||
class SimpleWorkerPool:
|
||||
"""
|
||||
Simple worker pool with integrated management.
|
||||
|
||||
This class consolidates all worker management functionality into
|
||||
a single, simpler implementation without excessive abstraction.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
ready_queue: queue.Queue[str],
|
||||
event_queue: queue.Queue[GraphNodeEventBase],
|
||||
graph: Graph,
|
||||
flask_app: "Flask | None" = None,
|
||||
context_vars: "Context | None" = None,
|
||||
min_workers: int | None = None,
|
||||
max_workers: int | None = None,
|
||||
scale_up_threshold: int | None = None,
|
||||
scale_down_idle_time: float | None = None,
|
||||
) -> None:
|
||||
"""
|
||||
Initialize the simple worker pool.
|
||||
|
||||
Args:
|
||||
ready_queue: Queue of nodes ready for execution
|
||||
event_queue: Queue for worker events
|
||||
graph: The workflow graph
|
||||
flask_app: Optional Flask app for context preservation
|
||||
context_vars: Optional context variables
|
||||
min_workers: Minimum number of workers
|
||||
max_workers: Maximum number of workers
|
||||
scale_up_threshold: Queue depth to trigger scale up
|
||||
scale_down_idle_time: Seconds before scaling down idle workers
|
||||
"""
|
||||
self.ready_queue = ready_queue
|
||||
self.event_queue = event_queue
|
||||
self.graph = graph
|
||||
self.flask_app = flask_app
|
||||
self.context_vars = context_vars
|
||||
|
||||
# Scaling parameters with defaults
|
||||
self.min_workers = min_workers or dify_config.GRAPH_ENGINE_MIN_WORKERS
|
||||
self.max_workers = max_workers or dify_config.GRAPH_ENGINE_MAX_WORKERS
|
||||
self.scale_up_threshold = scale_up_threshold or dify_config.GRAPH_ENGINE_SCALE_UP_THRESHOLD
|
||||
self.scale_down_idle_time = scale_down_idle_time or dify_config.GRAPH_ENGINE_SCALE_DOWN_IDLE_TIME
|
||||
|
||||
# Worker management
|
||||
self.workers: list[Worker] = []
|
||||
self._worker_counter = 0
|
||||
self._lock = threading.RLock()
|
||||
self._running = False
|
||||
|
||||
def start(self, initial_count: int | None = None) -> None:
|
||||
"""
|
||||
Start the worker pool.
|
||||
|
||||
Args:
|
||||
initial_count: Number of workers to start with (auto-calculated if None)
|
||||
"""
|
||||
with self._lock:
|
||||
if self._running:
|
||||
return
|
||||
|
||||
self._running = True
|
||||
|
||||
# Calculate initial worker count
|
||||
if initial_count is None:
|
||||
node_count = len(self.graph.nodes)
|
||||
if node_count < 10:
|
||||
initial_count = self.min_workers
|
||||
elif node_count < 50:
|
||||
initial_count = min(self.min_workers + 1, self.max_workers)
|
||||
else:
|
||||
initial_count = min(self.min_workers + 2, self.max_workers)
|
||||
|
||||
# Create initial workers
|
||||
for _ in range(initial_count):
|
||||
self._create_worker()
|
||||
|
||||
def stop(self) -> None:
|
||||
"""Stop all workers in the pool."""
|
||||
with self._lock:
|
||||
self._running = False
|
||||
|
||||
# Stop all workers
|
||||
for worker in self.workers:
|
||||
worker.stop()
|
||||
|
||||
# Wait for workers to finish
|
||||
for worker in self.workers:
|
||||
if worker.is_alive():
|
||||
worker.join(timeout=10.0)
|
||||
|
||||
self.workers.clear()
|
||||
|
||||
def _create_worker(self) -> None:
|
||||
"""Create and start a new worker."""
|
||||
worker_id = self._worker_counter
|
||||
self._worker_counter += 1
|
||||
|
||||
worker = Worker(
|
||||
ready_queue=self.ready_queue,
|
||||
event_queue=self.event_queue,
|
||||
graph=self.graph,
|
||||
worker_id=worker_id,
|
||||
flask_app=self.flask_app,
|
||||
context_vars=self.context_vars,
|
||||
)
|
||||
|
||||
worker.start()
|
||||
self.workers.append(worker)
|
||||
|
||||
def check_and_scale(self) -> None:
|
||||
"""Check and perform scaling if needed."""
|
||||
with self._lock:
|
||||
if not self._running:
|
||||
return
|
||||
|
||||
current_count = len(self.workers)
|
||||
queue_depth = self.ready_queue.qsize()
|
||||
|
||||
# Simple scaling logic
|
||||
if queue_depth > self.scale_up_threshold and current_count < self.max_workers:
|
||||
self._create_worker()
|
||||
|
||||
def get_worker_count(self) -> int:
|
||||
"""Get current number of workers."""
|
||||
with self._lock:
|
||||
return len(self.workers)
|
||||
|
||||
def get_status(self) -> dict[str, int]:
|
||||
"""
|
||||
Get pool status information.
|
||||
|
||||
Returns:
|
||||
Dictionary with status information
|
||||
"""
|
||||
with self._lock:
|
||||
return {
|
||||
"total_workers": len(self.workers),
|
||||
"queue_depth": self.ready_queue.qsize(),
|
||||
"min_workers": self.min_workers,
|
||||
"max_workers": self.max_workers,
|
||||
}
|
||||
|
|
@ -1,76 +0,0 @@
|
|||
"""
|
||||
Factory for creating worker instances.
|
||||
"""
|
||||
|
||||
import contextvars
|
||||
import queue
|
||||
from collections.abc import Callable
|
||||
from typing import final
|
||||
|
||||
from flask import Flask
|
||||
|
||||
from core.workflow.graph import Graph
|
||||
from core.workflow.graph_events import GraphNodeEventBase
|
||||
|
||||
from ..worker import Worker
|
||||
|
||||
|
||||
@final
|
||||
class WorkerFactory:
|
||||
"""
|
||||
Factory for creating worker instances with proper context.
|
||||
|
||||
This encapsulates worker creation logic and ensures all workers
|
||||
are created with the necessary Flask and context variable setup.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
flask_app: Flask | None,
|
||||
context_vars: contextvars.Context,
|
||||
) -> None:
|
||||
"""
|
||||
Initialize the worker factory.
|
||||
|
||||
Args:
|
||||
flask_app: Flask application context
|
||||
context_vars: Context variables to propagate
|
||||
"""
|
||||
self.flask_app = flask_app
|
||||
self.context_vars = context_vars
|
||||
self._next_worker_id = 0
|
||||
|
||||
def create_worker(
|
||||
self,
|
||||
ready_queue: queue.Queue[str],
|
||||
event_queue: queue.Queue[GraphNodeEventBase],
|
||||
graph: Graph,
|
||||
on_idle_callback: Callable[[int], None] | None = None,
|
||||
on_active_callback: Callable[[int], None] | None = None,
|
||||
) -> Worker:
|
||||
"""
|
||||
Create a new worker instance.
|
||||
|
||||
Args:
|
||||
ready_queue: Queue of nodes ready for execution
|
||||
event_queue: Queue for worker events
|
||||
graph: The workflow graph
|
||||
on_idle_callback: Callback when worker becomes idle
|
||||
on_active_callback: Callback when worker becomes active
|
||||
|
||||
Returns:
|
||||
Configured worker instance
|
||||
"""
|
||||
worker_id = self._next_worker_id
|
||||
self._next_worker_id += 1
|
||||
|
||||
return Worker(
|
||||
ready_queue=ready_queue,
|
||||
event_queue=event_queue,
|
||||
graph=graph,
|
||||
worker_id=worker_id,
|
||||
flask_app=self.flask_app,
|
||||
context_vars=self.context_vars,
|
||||
on_idle_callback=on_idle_callback,
|
||||
on_active_callback=on_active_callback,
|
||||
)
|
||||
|
|
@ -1,148 +0,0 @@
|
|||
"""
|
||||
Worker pool management.
|
||||
"""
|
||||
|
||||
import queue
|
||||
import threading
|
||||
from typing import final
|
||||
|
||||
from core.workflow.graph import Graph
|
||||
from core.workflow.graph_events import GraphNodeEventBase
|
||||
|
||||
from ..worker import Worker
|
||||
from .activity_tracker import ActivityTracker
|
||||
from .dynamic_scaler import DynamicScaler
|
||||
from .worker_factory import WorkerFactory
|
||||
|
||||
|
||||
@final
|
||||
class WorkerPool:
|
||||
"""
|
||||
Manages a pool of worker threads for executing nodes.
|
||||
|
||||
This provides dynamic scaling, activity tracking, and lifecycle
|
||||
management for worker threads.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
ready_queue: queue.Queue[str],
|
||||
event_queue: queue.Queue[GraphNodeEventBase],
|
||||
graph: Graph,
|
||||
worker_factory: WorkerFactory,
|
||||
dynamic_scaler: DynamicScaler,
|
||||
activity_tracker: ActivityTracker,
|
||||
) -> None:
|
||||
"""
|
||||
Initialize the worker pool.
|
||||
|
||||
Args:
|
||||
ready_queue: Queue of nodes ready for execution
|
||||
event_queue: Queue for worker events
|
||||
graph: The workflow graph
|
||||
worker_factory: Factory for creating workers
|
||||
dynamic_scaler: Scaler for dynamic sizing
|
||||
activity_tracker: Tracker for worker activity
|
||||
"""
|
||||
self.ready_queue = ready_queue
|
||||
self.event_queue = event_queue
|
||||
self.graph = graph
|
||||
self.worker_factory = worker_factory
|
||||
self.dynamic_scaler = dynamic_scaler
|
||||
self.activity_tracker = activity_tracker
|
||||
|
||||
self.workers: list[Worker] = []
|
||||
self._lock = threading.RLock()
|
||||
self._running = False
|
||||
|
||||
def start(self, initial_count: int) -> None:
|
||||
"""
|
||||
Start the worker pool with initial workers.
|
||||
|
||||
Args:
|
||||
initial_count: Number of workers to start with
|
||||
"""
|
||||
with self._lock:
|
||||
if self._running:
|
||||
return
|
||||
|
||||
self._running = True
|
||||
|
||||
# Create initial workers
|
||||
for _ in range(initial_count):
|
||||
worker = self.worker_factory.create_worker(self.ready_queue, self.event_queue, self.graph)
|
||||
worker.start()
|
||||
self.workers.append(worker)
|
||||
|
||||
def stop(self) -> None:
|
||||
"""Stop all workers in the pool."""
|
||||
with self._lock:
|
||||
self._running = False
|
||||
|
||||
# Stop all workers
|
||||
for worker in self.workers:
|
||||
worker.stop()
|
||||
|
||||
# Wait for workers to finish
|
||||
for worker in self.workers:
|
||||
if worker.is_alive():
|
||||
worker.join(timeout=10.0)
|
||||
|
||||
self.workers.clear()
|
||||
|
||||
def scale_up(self) -> None:
|
||||
"""Add a worker to the pool if allowed."""
|
||||
with self._lock:
|
||||
if not self._running:
|
||||
return
|
||||
|
||||
if len(self.workers) >= self.dynamic_scaler.max_workers:
|
||||
return
|
||||
|
||||
worker = self.worker_factory.create_worker(self.ready_queue, self.event_queue, self.graph)
|
||||
worker.start()
|
||||
self.workers.append(worker)
|
||||
|
||||
def scale_down(self, worker_ids: list[int]) -> None:
|
||||
"""
|
||||
Remove specific workers from the pool.
|
||||
|
||||
Args:
|
||||
worker_ids: IDs of workers to remove
|
||||
"""
|
||||
with self._lock:
|
||||
if not self._running:
|
||||
return
|
||||
|
||||
if len(self.workers) <= self.dynamic_scaler.min_workers:
|
||||
return
|
||||
|
||||
workers_to_remove = [w for w in self.workers if w.worker_id in worker_ids]
|
||||
|
||||
for worker in workers_to_remove:
|
||||
worker.stop()
|
||||
self.workers.remove(worker)
|
||||
if worker.is_alive():
|
||||
worker.join(timeout=1.0)
|
||||
|
||||
def get_worker_count(self) -> int:
|
||||
"""Get current number of workers."""
|
||||
with self._lock:
|
||||
return len(self.workers)
|
||||
|
||||
def check_scaling(self, queue_depth: int, executing_count: int) -> None:
|
||||
"""
|
||||
Check and perform scaling if needed.
|
||||
|
||||
Args:
|
||||
queue_depth: Current queue depth
|
||||
executing_count: Number of executing nodes
|
||||
"""
|
||||
current_count = self.get_worker_count()
|
||||
|
||||
if self.dynamic_scaler.should_scale_up(current_count, queue_depth, executing_count):
|
||||
self.scale_up()
|
||||
|
||||
idle_workers = self.activity_tracker.get_idle_workers()
|
||||
if idle_workers:
|
||||
self.scale_down(idle_workers)
|
||||
Loading…
Reference in New Issue