fix(notion): add bounded timeouts to Notion API requests

NotionExtractor's five httpx calls (database query, block children, block
read, table rows, last-edited-time) were made without a timeout, so a slow
or unresponsive Notion endpoint could hang document import/sync indefinitely
and tie up a worker. Add a module-level NOTION_REQUEST_TIMEOUT and pass it to
each call, matching the bounded-timeout pattern recently applied to the Nacos
and Marketplace HTTP clients.

Signed-off-by: Yufeng He <40085740+he-yufeng@users.noreply.github.com>
This commit is contained in:
Yufeng He 2026-06-17 17:16:12 +08:00
parent 8782da42c8
commit f0ba6c0f21
2 changed files with 49 additions and 0 deletions

View File

@ -21,6 +21,10 @@ SEARCH_URL = "https://api.notion.com/v1/search"
RETRIEVE_PAGE_URL_TMPL = "https://api.notion.com/v1/pages/{page_id}"
RETRIEVE_DATABASE_URL_TMPL = "https://api.notion.com/v1/databases/{database_id}"
# Bound how long Notion API requests wait so a slow or unresponsive Notion
# endpoint cannot stall document import or sync indefinitely.
NOTION_REQUEST_TIMEOUT = httpx.Timeout(30.0, connect=10.0)
# if user want split by headings, use the corresponding splitter
HEADING_SPLITTER = {
"heading_1": "# ",
@ -110,6 +114,7 @@ class NotionExtractor(BaseExtractor):
"Notion-Version": "2022-06-28",
},
json=current_query,
timeout=NOTION_REQUEST_TIMEOUT,
)
response_data = res.json()
@ -179,6 +184,7 @@ class NotionExtractor(BaseExtractor):
"Notion-Version": "2022-06-28",
},
params=query_dict,
timeout=NOTION_REQUEST_TIMEOUT,
)
if res.status_code != 200:
raise ValueError(f"Error fetching Notion block data: {res.text}")
@ -241,6 +247,7 @@ class NotionExtractor(BaseExtractor):
"Notion-Version": "2022-06-28",
},
params=query_dict,
timeout=NOTION_REQUEST_TIMEOUT,
)
data = res.json()
if "results" not in data or data["results"] is None:
@ -301,6 +308,7 @@ class NotionExtractor(BaseExtractor):
"Notion-Version": "2022-06-28",
},
params=query_dict,
timeout=NOTION_REQUEST_TIMEOUT,
)
data = res.json()
# get table headers text
@ -375,6 +383,7 @@ class NotionExtractor(BaseExtractor):
"Notion-Version": "2022-06-28",
},
json=query_dict,
timeout=NOTION_REQUEST_TIMEOUT,
)
data = res.json()

View File

@ -498,3 +498,43 @@ class TestNotionMetadataAndCredentialMethods:
monkeypatch.setattr(notion_extractor, "DatasourceProviderService", FakeProviderServiceFound)
assert notion_extractor.NotionExtractor._get_access_token("tenant", "cred") == "token-from-credential"
class TestNotionRequestTimeouts:
"""Notion API calls must carry a bounded timeout so a slow or unresponsive
endpoint cannot hang document import/sync indefinitely."""
def _extractor(self, page_type: str = "page"):
return notion_extractor.NotionExtractor(
notion_workspace_id="ws",
notion_obj_id="obj",
notion_page_type=page_type,
tenant_id="tenant",
notion_access_token="token",
)
def test_database_query_passes_bounded_timeout(self, mocker: MockerFixture):
extractor = self._extractor(page_type="database")
mock_post = mocker.patch(
"httpx.post",
return_value=_mock_response({"results": [], "has_more": False, "next_cursor": None}),
)
extractor._get_notion_database_data("db-1")
timeout = mock_post.call_args.kwargs["timeout"]
assert timeout is notion_extractor.NOTION_REQUEST_TIMEOUT
assert isinstance(timeout, httpx.Timeout)
def test_last_edited_time_passes_bounded_timeout(self, mocker: MockerFixture):
extractor = self._extractor(page_type="page")
mock_request = mocker.patch(
"httpx.request",
return_value=_mock_response({"last_edited_time": "2024-01-01T00:00:00.000Z"}),
)
extractor.get_notion_last_edited_time()
timeout = mock_request.call_args.kwargs["timeout"]
assert timeout is notion_extractor.NOTION_REQUEST_TIMEOUT
assert isinstance(timeout, httpx.Timeout)