fix: prevent recursion error when SharePoint folder is empty (#36372)

This commit is contained in:
EvanYao 2026-05-20 11:56:49 +08:00 committed by GitHub
parent 718ab8433e
commit 7bc5c89e3c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 158 additions and 2 deletions

View File

@ -791,10 +791,25 @@ class PipelineGenerator(BaseAppGenerator):
all_files: list,
datasource_info: Mapping[str, Any],
next_page_parameters: dict[str, Any] | None = None,
_visited_folder_ids: set[str] | None = None,
):
"""
Get files in a folder.
Recursively lists all files inside the given folder prefix.
``_visited_folder_ids`` tracks folders already expanded so that a
self-referencing folder (where the API returns the folder as its own
child) cannot cause infinite recursion.
"""
if _visited_folder_ids is None:
_visited_folder_ids = set()
# Guard: skip folders we have already expanded to prevent infinite
# recursion from self-referencing folder entries in the API response.
if prefix in _visited_folder_ids:
return
_visited_folder_ids.add(prefix)
result_generator = datasource_runtime.online_drive_browse_files(
user_id=user_id,
request=OnlineDriveBrowseFilesRequest(
@ -806,10 +821,14 @@ class PipelineGenerator(BaseAppGenerator):
provider_type=datasource_runtime.datasource_provider_type(),
)
is_truncated = False
has_files = False
for result in result_generator:
for files in result.result:
for file in files.files:
has_files = True
if file.type == "folder":
if file.id in _visited_folder_ids:
continue
self._get_files_in_folder(
datasource_runtime,
file.id,
@ -818,6 +837,7 @@ class PipelineGenerator(BaseAppGenerator):
all_files,
datasource_info,
None,
_visited_folder_ids,
)
else:
all_files.append(
@ -830,7 +850,17 @@ class PipelineGenerator(BaseAppGenerator):
is_truncated = files.is_truncated
next_page_parameters = files.next_page_parameters
if is_truncated:
# Guard: only follow pagination when the API actually returned files.
# An empty folder that incorrectly reports ``is_truncated=True`` would
# otherwise recurse forever on the same empty page.
if is_truncated and has_files:
self._get_files_in_folder(
datasource_runtime, prefix, bucket, user_id, all_files, datasource_info, next_page_parameters
datasource_runtime,
prefix,
bucket,
user_id,
all_files,
datasource_info,
next_page_parameters,
_visited_folder_ids,
)

View File

@ -717,3 +717,129 @@ def test_get_files_in_folder_recurses_and_collects(generator):
)
assert {f["id"] for f in all_files} == {"f1", "f2"}
def test_get_files_in_folder_handles_empty_folder(generator):
"""An empty folder must return an empty file list without recursion errors."""
class FilesPage:
def __init__(self, files, is_truncated=False, next_page_parameters=None):
self.files = files
self.is_truncated = is_truncated
self.next_page_parameters = next_page_parameters
class Result:
def __init__(self, result):
self.result = result
class Runtime:
def datasource_provider_type(self):
return DatasourceProviderType.ONLINE_DRIVE
def online_drive_browse_files(self, user_id, request, provider_type):
# Empty folder: returns a page with no files, not truncated
return iter([Result([FilesPage([], False, None)])])
runtime = Runtime()
all_files: list = []
generator._get_files_in_folder(
datasource_runtime=runtime,
prefix="empty-folder",
bucket="b",
user_id="user",
all_files=all_files,
datasource_info={},
)
assert all_files == []
def test_get_files_in_folder_handles_empty_folder_with_false_truncation(generator):
"""An empty folder that incorrectly reports is_truncated=True must not recurse forever."""
call_count = 0
class FilesPage:
def __init__(self, files, is_truncated=False, next_page_parameters=None):
self.files = files
self.is_truncated = is_truncated
self.next_page_parameters = next_page_parameters
class Result:
def __init__(self, result):
self.result = result
class Runtime:
def datasource_provider_type(self):
return DatasourceProviderType.ONLINE_DRIVE
def online_drive_browse_files(self, user_id, request, provider_type):
nonlocal call_count
call_count += 1
# Empty folder that incorrectly claims truncation
return iter([Result([FilesPage([], True, {"page": 2})])])
runtime = Runtime()
all_files: list = []
generator._get_files_in_folder(
datasource_runtime=runtime,
prefix="buggy-folder",
bucket="b",
user_id="user",
all_files=all_files,
datasource_info={},
)
assert all_files == []
# Should only be called once -- the empty-page guard prevents further recursion
assert call_count == 1
def test_get_files_in_folder_handles_self_referencing_folder(generator):
"""A folder that lists itself as a child must not recurse infinitely."""
class File:
def __init__(self, id, name, type):
self.id = id
self.name = name
self.type = type
class FilesPage:
def __init__(self, files, is_truncated=False, next_page_parameters=None):
self.files = files
self.is_truncated = is_truncated
self.next_page_parameters = next_page_parameters
class Result:
def __init__(self, result):
self.result = result
call_count = 0
class Runtime:
def datasource_provider_type(self):
return DatasourceProviderType.ONLINE_DRIVE
def online_drive_browse_files(self, user_id, request, provider_type):
nonlocal call_count
call_count += 1
# The folder returns itself as a child (self-reference)
return iter([Result([FilesPage([File("self-ref", "myfolder", "folder")], False, None)])])
runtime = Runtime()
all_files: list = []
generator._get_files_in_folder(
datasource_runtime=runtime,
prefix="self-ref",
bucket="b",
user_id="user",
all_files=all_files,
datasource_info={},
)
assert all_files == []
# Should only be called once -- the visited-set guard prevents re-entry
assert call_count == 1