mirror of
https://github.com/langgenius/dify.git
synced 2026-06-26 06:41:10 +08:00
fix(notion): add bounded timeouts to Notion API requests
NotionExtractor's five httpx calls (database query, block children, block read, table rows, last-edited-time) were made without a timeout, so a slow or unresponsive Notion endpoint could hang document import/sync indefinitely and tie up a worker. Add a module-level NOTION_REQUEST_TIMEOUT and pass it to each call, matching the bounded-timeout pattern recently applied to the Nacos and Marketplace HTTP clients. Signed-off-by: Yufeng He <40085740+he-yufeng@users.noreply.github.com>
This commit is contained in:
parent
8782da42c8
commit
f0ba6c0f21
@ -21,6 +21,10 @@ SEARCH_URL = "https://api.notion.com/v1/search"
|
||||
|
||||
RETRIEVE_PAGE_URL_TMPL = "https://api.notion.com/v1/pages/{page_id}"
|
||||
RETRIEVE_DATABASE_URL_TMPL = "https://api.notion.com/v1/databases/{database_id}"
|
||||
|
||||
# Bound how long Notion API requests wait so a slow or unresponsive Notion
|
||||
# endpoint cannot stall document import or sync indefinitely.
|
||||
NOTION_REQUEST_TIMEOUT = httpx.Timeout(30.0, connect=10.0)
|
||||
# if user want split by headings, use the corresponding splitter
|
||||
HEADING_SPLITTER = {
|
||||
"heading_1": "# ",
|
||||
@ -110,6 +114,7 @@ class NotionExtractor(BaseExtractor):
|
||||
"Notion-Version": "2022-06-28",
|
||||
},
|
||||
json=current_query,
|
||||
timeout=NOTION_REQUEST_TIMEOUT,
|
||||
)
|
||||
|
||||
response_data = res.json()
|
||||
@ -179,6 +184,7 @@ class NotionExtractor(BaseExtractor):
|
||||
"Notion-Version": "2022-06-28",
|
||||
},
|
||||
params=query_dict,
|
||||
timeout=NOTION_REQUEST_TIMEOUT,
|
||||
)
|
||||
if res.status_code != 200:
|
||||
raise ValueError(f"Error fetching Notion block data: {res.text}")
|
||||
@ -241,6 +247,7 @@ class NotionExtractor(BaseExtractor):
|
||||
"Notion-Version": "2022-06-28",
|
||||
},
|
||||
params=query_dict,
|
||||
timeout=NOTION_REQUEST_TIMEOUT,
|
||||
)
|
||||
data = res.json()
|
||||
if "results" not in data or data["results"] is None:
|
||||
@ -301,6 +308,7 @@ class NotionExtractor(BaseExtractor):
|
||||
"Notion-Version": "2022-06-28",
|
||||
},
|
||||
params=query_dict,
|
||||
timeout=NOTION_REQUEST_TIMEOUT,
|
||||
)
|
||||
data = res.json()
|
||||
# get table headers text
|
||||
@ -375,6 +383,7 @@ class NotionExtractor(BaseExtractor):
|
||||
"Notion-Version": "2022-06-28",
|
||||
},
|
||||
json=query_dict,
|
||||
timeout=NOTION_REQUEST_TIMEOUT,
|
||||
)
|
||||
|
||||
data = res.json()
|
||||
|
||||
@ -498,3 +498,43 @@ class TestNotionMetadataAndCredentialMethods:
|
||||
monkeypatch.setattr(notion_extractor, "DatasourceProviderService", FakeProviderServiceFound)
|
||||
|
||||
assert notion_extractor.NotionExtractor._get_access_token("tenant", "cred") == "token-from-credential"
|
||||
|
||||
|
||||
class TestNotionRequestTimeouts:
|
||||
"""Notion API calls must carry a bounded timeout so a slow or unresponsive
|
||||
endpoint cannot hang document import/sync indefinitely."""
|
||||
|
||||
def _extractor(self, page_type: str = "page"):
|
||||
return notion_extractor.NotionExtractor(
|
||||
notion_workspace_id="ws",
|
||||
notion_obj_id="obj",
|
||||
notion_page_type=page_type,
|
||||
tenant_id="tenant",
|
||||
notion_access_token="token",
|
||||
)
|
||||
|
||||
def test_database_query_passes_bounded_timeout(self, mocker: MockerFixture):
|
||||
extractor = self._extractor(page_type="database")
|
||||
mock_post = mocker.patch(
|
||||
"httpx.post",
|
||||
return_value=_mock_response({"results": [], "has_more": False, "next_cursor": None}),
|
||||
)
|
||||
|
||||
extractor._get_notion_database_data("db-1")
|
||||
|
||||
timeout = mock_post.call_args.kwargs["timeout"]
|
||||
assert timeout is notion_extractor.NOTION_REQUEST_TIMEOUT
|
||||
assert isinstance(timeout, httpx.Timeout)
|
||||
|
||||
def test_last_edited_time_passes_bounded_timeout(self, mocker: MockerFixture):
|
||||
extractor = self._extractor(page_type="page")
|
||||
mock_request = mocker.patch(
|
||||
"httpx.request",
|
||||
return_value=_mock_response({"last_edited_time": "2024-01-01T00:00:00.000Z"}),
|
||||
)
|
||||
|
||||
extractor.get_notion_last_edited_time()
|
||||
|
||||
timeout = mock_request.call_args.kwargs["timeout"]
|
||||
assert timeout is notion_extractor.NOTION_REQUEST_TIMEOUT
|
||||
assert isinstance(timeout, httpx.Timeout)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user