diff --git a/api/core/model_manager.py b/api/core/model_manager.py
index 0261a79749..e373f489d0 100644
--- a/api/core/model_manager.py
+++ b/api/core/model_manager.py
@@ -21,6 +21,7 @@ from core.model_runtime.model_providers.__base.speech2text_model import Speech2T
 from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel
 from core.model_runtime.model_providers.__base.tts_model import TTSModel
 from core.provider_manager import ProviderManager
+from core.workflow.utils.generator_timeout import with_first_token_timeout
 from extensions.ext_redis import redis_client
 from models.provider import ProviderType
 from services.enterprise.plugin_manager_service import PluginCredentialType
@@ -180,8 +181,6 @@ class ModelInstance:
 
         # Apply first token timeout wrapper for streaming responses
         if stream and first_token_timeout and first_token_timeout > 0 and isinstance(result, Generator):
-            from core.workflow.utils.generator_timeout import with_first_token_timeout
-
             result = with_first_token_timeout(result, first_token_timeout)
 
         return cast(Union[LLMResult, Generator], result)
diff --git a/api/core/workflow/nodes/llm/exc.py b/api/core/workflow/nodes/llm/exc.py
index ff2e8ad90f..4d16095296 100644
--- a/api/core/workflow/nodes/llm/exc.py
+++ b/api/core/workflow/nodes/llm/exc.py
@@ -43,11 +43,3 @@ class FileTypeNotSupportError(LLMNodeError):
 class UnsupportedPromptContentTypeError(LLMNodeError):
     def __init__(self, *, type_name: str):
         super().__init__(f"Prompt content type {type_name} is not supported.")
-
-
-class LLMFirstTokenTimeoutError(LLMNodeError):
-    """Raised when LLM request fails to receive first token within configured timeout."""
-
-    def __init__(self, timeout_ms: int):
-        self.timeout_ms = timeout_ms
-        super().__init__(f"LLM request timed out after {timeout_ms}ms without receiving first token")
diff --git a/api/core/workflow/utils/generator_timeout.py b/api/core/workflow/utils/generator_timeout.py
index 307ab70439..df4f380277 100644
--- a/api/core/workflow/utils/generator_timeout.py
+++ b/api/core/workflow/utils/generator_timeout.py
@@ -42,13 +42,15 @@ def with_first_token_timeout(
         FirstTokenTimeoutError: If first item doesn't arrive within timeout
     """
     start_time = time.monotonic()
-    first_token_received = False
 
-    for item in generator:
-        if not first_token_received:
-            current_time = time.monotonic()
-            if current_time - start_time > timeout_seconds:
-                raise FirstTokenTimeoutError(int(timeout_seconds * 1000))
-            first_token_received = True
+    # Handle first item separately to check timeout only once
+    try:
+        first_item = next(generator)
+        if time.monotonic() - start_time > timeout_seconds:
+            raise FirstTokenTimeoutError(int(timeout_seconds * 1000))
+        yield first_item
+    except StopIteration:
+        return
 
-        yield item
+    # Yield remaining items without timeout checks
+    yield from generator
diff --git a/api/tests/unit_tests/core/workflow/nodes/llm/test_first_token_timeout.py b/api/tests/unit_tests/core/workflow/nodes/llm/test_first_token_timeout.py
index 58a86f0335..09a6bb6984 100644
--- a/api/tests/unit_tests/core/workflow/nodes/llm/test_first_token_timeout.py
+++ b/api/tests/unit_tests/core/workflow/nodes/llm/test_first_token_timeout.py
@@ -9,7 +9,6 @@ import pytest
 from core.model_runtime.entities.llm_entities import LLMResultChunk, LLMResultChunkDelta
 from core.model_runtime.entities.message_entities import AssistantPromptMessage
 from core.workflow.nodes.base.entities import RetryConfig
-from core.workflow.nodes.llm.exc import LLMFirstTokenTimeoutError
 from core.workflow.utils.generator_timeout import FirstTokenTimeoutError, with_first_token_timeout
 
 
@@ -87,26 +86,6 @@ class TestRetryConfigFirstTokenTimeout:
         assert restored_config.has_first_token_timeout is True
 
 
-class TestLLMFirstTokenTimeoutError:
-    """Test cases for LLMFirstTokenTimeoutError exception."""
-
-    def test_error_message_format(self):
-        """Test that error message contains timeout value in milliseconds."""
-        error = LLMFirstTokenTimeoutError(timeout_ms=3000)
-
-        assert "3000ms" in str(error)
-        assert "first token" in str(error).lower()
-
-    def test_inherits_from_llm_node_error(self):
-        """Test that LLMFirstTokenTimeoutError inherits from LLMNodeError."""
-        from core.workflow.nodes.llm.exc import LLMNodeError
-
-        error = LLMFirstTokenTimeoutError(timeout_ms=3000)
-
-        assert isinstance(error, LLMNodeError)
-        assert isinstance(error, ValueError)
-
-
 class TestWithFirstTokenTimeout:
     """Test cases for with_first_token_timeout function."""