From fe64c5d4a8a4ef0fbe183aea66142f08c1e60646 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=90=BD=E5=B0=98?= Date: Tue, 16 Jun 2026 16:16:16 +0800 Subject: [PATCH] fix(watercrawl): bound result download timeout (#37495) --- api/core/rag/extractor/watercrawl/client.py | 2 +- .../core/rag/extractor/watercrawl/test_watercrawl.py | 9 ++++++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/api/core/rag/extractor/watercrawl/client.py b/api/core/rag/extractor/watercrawl/client.py index d1ce142dbd..2cd2440253 100644 --- a/api/core/rag/extractor/watercrawl/client.py +++ b/api/core/rag/extractor/watercrawl/client.py @@ -217,7 +217,7 @@ class WaterCrawlAPIClient(BaseAPIClient): return event_data["data"] def download_result(self, result_object: dict[str, Any]): - response = httpx.get(result_object["result"], timeout=None) + response = httpx.get(result_object["result"], timeout=30) try: response.raise_for_status() result_object["result"] = response.json() diff --git a/api/tests/unit_tests/core/rag/extractor/watercrawl/test_watercrawl.py b/api/tests/unit_tests/core/rag/extractor/watercrawl/test_watercrawl.py index 95878fc688..bf5913faa3 100644 --- a/api/tests/unit_tests/core/rag/extractor/watercrawl/test_watercrawl.py +++ b/api/tests/unit_tests/core/rag/extractor/watercrawl/test_watercrawl.py @@ -242,11 +242,18 @@ class TestWaterCrawlAPIClient: client = WaterCrawlAPIClient(api_key="k") response = _response(200, {"markdown": "body"}) - monkeypatch.setattr(client_module.httpx, "get", lambda *args, **kwargs: response) + captured = {} + + def fake_get(*args, **kwargs): + captured.update(kwargs) + return response + + monkeypatch.setattr(client_module.httpx, "get", fake_get) result = client.download_result({"result": "https://example.com/result.json"}) assert result["result"] == {"markdown": "body"} + assert captured["timeout"] is not None response.close.assert_called_once()