From f0ba6c0f21c67a37e6127a72b805505bc5d93ccb Mon Sep 17 00:00:00 2001 From: Yufeng He <40085740+he-yufeng@users.noreply.github.com> Date: Wed, 17 Jun 2026 17:16:12 +0800 Subject: [PATCH] fix(notion): add bounded timeouts to Notion API requests NotionExtractor's five httpx calls (database query, block children, block read, table rows, last-edited-time) were made without a timeout, so a slow or unresponsive Notion endpoint could hang document import/sync indefinitely and tie up a worker. Add a module-level NOTION_REQUEST_TIMEOUT and pass it to each call, matching the bounded-timeout pattern recently applied to the Nacos and Marketplace HTTP clients. Signed-off-by: Yufeng He <40085740+he-yufeng@users.noreply.github.com> --- api/core/rag/extractor/notion_extractor.py | 9 +++++ .../rag/extractor/test_notion_extractor.py | 40 +++++++++++++++++++ 2 files changed, 49 insertions(+) diff --git a/api/core/rag/extractor/notion_extractor.py b/api/core/rag/extractor/notion_extractor.py index 568ccb1912a..499700e2427 100644 --- a/api/core/rag/extractor/notion_extractor.py +++ b/api/core/rag/extractor/notion_extractor.py @@ -21,6 +21,10 @@ SEARCH_URL = "https://api.notion.com/v1/search" RETRIEVE_PAGE_URL_TMPL = "https://api.notion.com/v1/pages/{page_id}" RETRIEVE_DATABASE_URL_TMPL = "https://api.notion.com/v1/databases/{database_id}" + +# Bound how long Notion API requests wait so a slow or unresponsive Notion +# endpoint cannot stall document import or sync indefinitely. +NOTION_REQUEST_TIMEOUT = httpx.Timeout(30.0, connect=10.0) # if user want split by headings, use the corresponding splitter HEADING_SPLITTER = { "heading_1": "# ", @@ -110,6 +114,7 @@ class NotionExtractor(BaseExtractor): "Notion-Version": "2022-06-28", }, json=current_query, + timeout=NOTION_REQUEST_TIMEOUT, ) response_data = res.json() @@ -179,6 +184,7 @@ class NotionExtractor(BaseExtractor): "Notion-Version": "2022-06-28", }, params=query_dict, + timeout=NOTION_REQUEST_TIMEOUT, ) if res.status_code != 200: raise ValueError(f"Error fetching Notion block data: {res.text}") @@ -241,6 +247,7 @@ class NotionExtractor(BaseExtractor): "Notion-Version": "2022-06-28", }, params=query_dict, + timeout=NOTION_REQUEST_TIMEOUT, ) data = res.json() if "results" not in data or data["results"] is None: @@ -301,6 +308,7 @@ class NotionExtractor(BaseExtractor): "Notion-Version": "2022-06-28", }, params=query_dict, + timeout=NOTION_REQUEST_TIMEOUT, ) data = res.json() # get table headers text @@ -375,6 +383,7 @@ class NotionExtractor(BaseExtractor): "Notion-Version": "2022-06-28", }, json=query_dict, + timeout=NOTION_REQUEST_TIMEOUT, ) data = res.json() diff --git a/api/tests/unit_tests/core/rag/extractor/test_notion_extractor.py b/api/tests/unit_tests/core/rag/extractor/test_notion_extractor.py index 49f7b592dcc..bd4104d0f09 100644 --- a/api/tests/unit_tests/core/rag/extractor/test_notion_extractor.py +++ b/api/tests/unit_tests/core/rag/extractor/test_notion_extractor.py @@ -498,3 +498,43 @@ class TestNotionMetadataAndCredentialMethods: monkeypatch.setattr(notion_extractor, "DatasourceProviderService", FakeProviderServiceFound) assert notion_extractor.NotionExtractor._get_access_token("tenant", "cred") == "token-from-credential" + + +class TestNotionRequestTimeouts: + """Notion API calls must carry a bounded timeout so a slow or unresponsive + endpoint cannot hang document import/sync indefinitely.""" + + def _extractor(self, page_type: str = "page"): + return notion_extractor.NotionExtractor( + notion_workspace_id="ws", + notion_obj_id="obj", + notion_page_type=page_type, + tenant_id="tenant", + notion_access_token="token", + ) + + def test_database_query_passes_bounded_timeout(self, mocker: MockerFixture): + extractor = self._extractor(page_type="database") + mock_post = mocker.patch( + "httpx.post", + return_value=_mock_response({"results": [], "has_more": False, "next_cursor": None}), + ) + + extractor._get_notion_database_data("db-1") + + timeout = mock_post.call_args.kwargs["timeout"] + assert timeout is notion_extractor.NOTION_REQUEST_TIMEOUT + assert isinstance(timeout, httpx.Timeout) + + def test_last_edited_time_passes_bounded_timeout(self, mocker: MockerFixture): + extractor = self._extractor(page_type="page") + mock_request = mocker.patch( + "httpx.request", + return_value=_mock_response({"last_edited_time": "2024-01-01T00:00:00.000Z"}), + ) + + extractor.get_notion_last_edited_time() + + timeout = mock_request.call_args.kwargs["timeout"] + assert timeout is notion_extractor.NOTION_REQUEST_TIMEOUT + assert isinstance(timeout, httpx.Timeout)