diff --git a/api/core/model_manager.py b/api/core/model_manager.py index 0261a79749..e373f489d0 100644 --- a/api/core/model_manager.py +++ b/api/core/model_manager.py @@ -21,6 +21,7 @@ from core.model_runtime.model_providers.__base.speech2text_model import Speech2T from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel from core.model_runtime.model_providers.__base.tts_model import TTSModel from core.provider_manager import ProviderManager +from core.workflow.utils.generator_timeout import with_first_token_timeout from extensions.ext_redis import redis_client from models.provider import ProviderType from services.enterprise.plugin_manager_service import PluginCredentialType @@ -180,8 +181,6 @@ class ModelInstance: # Apply first token timeout wrapper for streaming responses if stream and first_token_timeout and first_token_timeout > 0 and isinstance(result, Generator): - from core.workflow.utils.generator_timeout import with_first_token_timeout - result = with_first_token_timeout(result, first_token_timeout) return cast(Union[LLMResult, Generator], result) diff --git a/api/core/workflow/nodes/llm/exc.py b/api/core/workflow/nodes/llm/exc.py index ff2e8ad90f..4d16095296 100644 --- a/api/core/workflow/nodes/llm/exc.py +++ b/api/core/workflow/nodes/llm/exc.py @@ -43,11 +43,3 @@ class FileTypeNotSupportError(LLMNodeError): class UnsupportedPromptContentTypeError(LLMNodeError): def __init__(self, *, type_name: str): super().__init__(f"Prompt content type {type_name} is not supported.") - - -class LLMFirstTokenTimeoutError(LLMNodeError): - """Raised when LLM request fails to receive first token within configured timeout.""" - - def __init__(self, timeout_ms: int): - self.timeout_ms = timeout_ms - super().__init__(f"LLM request timed out after {timeout_ms}ms without receiving first token") diff --git a/api/core/workflow/utils/generator_timeout.py b/api/core/workflow/utils/generator_timeout.py index 307ab70439..df4f380277 100644 --- a/api/core/workflow/utils/generator_timeout.py +++ b/api/core/workflow/utils/generator_timeout.py @@ -42,13 +42,15 @@ def with_first_token_timeout( FirstTokenTimeoutError: If first item doesn't arrive within timeout """ start_time = time.monotonic() - first_token_received = False - for item in generator: - if not first_token_received: - current_time = time.monotonic() - if current_time - start_time > timeout_seconds: - raise FirstTokenTimeoutError(int(timeout_seconds * 1000)) - first_token_received = True + # Handle first item separately to check timeout only once + try: + first_item = next(generator) + if time.monotonic() - start_time > timeout_seconds: + raise FirstTokenTimeoutError(int(timeout_seconds * 1000)) + yield first_item + except StopIteration: + return - yield item + # Yield remaining items without timeout checks + yield from generator diff --git a/api/tests/unit_tests/core/workflow/nodes/llm/test_first_token_timeout.py b/api/tests/unit_tests/core/workflow/nodes/llm/test_first_token_timeout.py index 58a86f0335..09a6bb6984 100644 --- a/api/tests/unit_tests/core/workflow/nodes/llm/test_first_token_timeout.py +++ b/api/tests/unit_tests/core/workflow/nodes/llm/test_first_token_timeout.py @@ -9,7 +9,6 @@ import pytest from core.model_runtime.entities.llm_entities import LLMResultChunk, LLMResultChunkDelta from core.model_runtime.entities.message_entities import AssistantPromptMessage from core.workflow.nodes.base.entities import RetryConfig -from core.workflow.nodes.llm.exc import LLMFirstTokenTimeoutError from core.workflow.utils.generator_timeout import FirstTokenTimeoutError, with_first_token_timeout @@ -87,26 +86,6 @@ class TestRetryConfigFirstTokenTimeout: assert restored_config.has_first_token_timeout is True -class TestLLMFirstTokenTimeoutError: - """Test cases for LLMFirstTokenTimeoutError exception.""" - - def test_error_message_format(self): - """Test that error message contains timeout value in milliseconds.""" - error = LLMFirstTokenTimeoutError(timeout_ms=3000) - - assert "3000ms" in str(error) - assert "first token" in str(error).lower() - - def test_inherits_from_llm_node_error(self): - """Test that LLMFirstTokenTimeoutError inherits from LLMNodeError.""" - from core.workflow.nodes.llm.exc import LLMNodeError - - error = LLMFirstTokenTimeoutError(timeout_ms=3000) - - assert isinstance(error, LLMNodeError) - assert isinstance(error, ValueError) - - class TestWithFirstTokenTimeout: """Test cases for with_first_token_timeout function."""