diff --git a/api/core/rag/extractor/watercrawl/client.py b/api/core/rag/extractor/watercrawl/client.py index 1f4adc0d418..b37bd38ec3e 100644 --- a/api/core/rag/extractor/watercrawl/client.py +++ b/api/core/rag/extractor/watercrawl/client.py @@ -12,6 +12,8 @@ from core.rag.extractor.watercrawl.exceptions import ( WaterCrawlPermissionError, ) +WATERCRAWL_REQUEST_TIMEOUT: httpx.Timeout = httpx.Timeout(30.0, connect=5.0) + class SpiderOptions(TypedDict): max_depth: int @@ -48,7 +50,7 @@ class BaseAPIClient: "User-Agent": "WaterCrawl-Plugin", "Accept-Language": "en-US", } - return httpx.Client(headers=headers, timeout=None) + return httpx.Client(headers=headers, timeout=WATERCRAWL_REQUEST_TIMEOUT) def _request( self, diff --git a/api/tests/unit_tests/core/rag/extractor/watercrawl/test_watercrawl.py b/api/tests/unit_tests/core/rag/extractor/watercrawl/test_watercrawl.py index 35e581ccc15..05985d30985 100644 --- a/api/tests/unit_tests/core/rag/extractor/watercrawl/test_watercrawl.py +++ b/api/tests/unit_tests/core/rag/extractor/watercrawl/test_watercrawl.py @@ -73,6 +73,9 @@ class TestBaseAPIClient: assert client.session == "session" assert captured["headers"]["X-API-Key"] == "k" assert captured["headers"]["User-Agent"] == "WaterCrawl-Plugin" + assert captured["timeout"] is not None + assert captured["timeout"].connect is not None + assert captured["timeout"].read is not None def test_request_stream_and_non_stream_paths(self, monkeypatch: pytest.MonkeyPatch): class FakeSession: