mirror of
https://github.com/langgenius/dify.git
synced 2026-06-07 16:32:01 +08:00
fix: prevent recursion error when SharePoint folder is empty (#36372)
This commit is contained in:
parent
718ab8433e
commit
7bc5c89e3c
@ -791,10 +791,25 @@ class PipelineGenerator(BaseAppGenerator):
|
||||
all_files: list,
|
||||
datasource_info: Mapping[str, Any],
|
||||
next_page_parameters: dict[str, Any] | None = None,
|
||||
_visited_folder_ids: set[str] | None = None,
|
||||
):
|
||||
"""
|
||||
Get files in a folder.
|
||||
|
||||
Recursively lists all files inside the given folder prefix.
|
||||
``_visited_folder_ids`` tracks folders already expanded so that a
|
||||
self-referencing folder (where the API returns the folder as its own
|
||||
child) cannot cause infinite recursion.
|
||||
"""
|
||||
if _visited_folder_ids is None:
|
||||
_visited_folder_ids = set()
|
||||
|
||||
# Guard: skip folders we have already expanded to prevent infinite
|
||||
# recursion from self-referencing folder entries in the API response.
|
||||
if prefix in _visited_folder_ids:
|
||||
return
|
||||
_visited_folder_ids.add(prefix)
|
||||
|
||||
result_generator = datasource_runtime.online_drive_browse_files(
|
||||
user_id=user_id,
|
||||
request=OnlineDriveBrowseFilesRequest(
|
||||
@ -806,10 +821,14 @@ class PipelineGenerator(BaseAppGenerator):
|
||||
provider_type=datasource_runtime.datasource_provider_type(),
|
||||
)
|
||||
is_truncated = False
|
||||
has_files = False
|
||||
for result in result_generator:
|
||||
for files in result.result:
|
||||
for file in files.files:
|
||||
has_files = True
|
||||
if file.type == "folder":
|
||||
if file.id in _visited_folder_ids:
|
||||
continue
|
||||
self._get_files_in_folder(
|
||||
datasource_runtime,
|
||||
file.id,
|
||||
@ -818,6 +837,7 @@ class PipelineGenerator(BaseAppGenerator):
|
||||
all_files,
|
||||
datasource_info,
|
||||
None,
|
||||
_visited_folder_ids,
|
||||
)
|
||||
else:
|
||||
all_files.append(
|
||||
@ -830,7 +850,17 @@ class PipelineGenerator(BaseAppGenerator):
|
||||
is_truncated = files.is_truncated
|
||||
next_page_parameters = files.next_page_parameters
|
||||
|
||||
if is_truncated:
|
||||
# Guard: only follow pagination when the API actually returned files.
|
||||
# An empty folder that incorrectly reports ``is_truncated=True`` would
|
||||
# otherwise recurse forever on the same empty page.
|
||||
if is_truncated and has_files:
|
||||
self._get_files_in_folder(
|
||||
datasource_runtime, prefix, bucket, user_id, all_files, datasource_info, next_page_parameters
|
||||
datasource_runtime,
|
||||
prefix,
|
||||
bucket,
|
||||
user_id,
|
||||
all_files,
|
||||
datasource_info,
|
||||
next_page_parameters,
|
||||
_visited_folder_ids,
|
||||
)
|
||||
|
||||
@ -717,3 +717,129 @@ def test_get_files_in_folder_recurses_and_collects(generator):
|
||||
)
|
||||
|
||||
assert {f["id"] for f in all_files} == {"f1", "f2"}
|
||||
|
||||
|
||||
def test_get_files_in_folder_handles_empty_folder(generator):
|
||||
"""An empty folder must return an empty file list without recursion errors."""
|
||||
|
||||
class FilesPage:
|
||||
def __init__(self, files, is_truncated=False, next_page_parameters=None):
|
||||
self.files = files
|
||||
self.is_truncated = is_truncated
|
||||
self.next_page_parameters = next_page_parameters
|
||||
|
||||
class Result:
|
||||
def __init__(self, result):
|
||||
self.result = result
|
||||
|
||||
class Runtime:
|
||||
def datasource_provider_type(self):
|
||||
return DatasourceProviderType.ONLINE_DRIVE
|
||||
|
||||
def online_drive_browse_files(self, user_id, request, provider_type):
|
||||
# Empty folder: returns a page with no files, not truncated
|
||||
return iter([Result([FilesPage([], False, None)])])
|
||||
|
||||
runtime = Runtime()
|
||||
all_files: list = []
|
||||
|
||||
generator._get_files_in_folder(
|
||||
datasource_runtime=runtime,
|
||||
prefix="empty-folder",
|
||||
bucket="b",
|
||||
user_id="user",
|
||||
all_files=all_files,
|
||||
datasource_info={},
|
||||
)
|
||||
|
||||
assert all_files == []
|
||||
|
||||
|
||||
def test_get_files_in_folder_handles_empty_folder_with_false_truncation(generator):
|
||||
"""An empty folder that incorrectly reports is_truncated=True must not recurse forever."""
|
||||
|
||||
call_count = 0
|
||||
|
||||
class FilesPage:
|
||||
def __init__(self, files, is_truncated=False, next_page_parameters=None):
|
||||
self.files = files
|
||||
self.is_truncated = is_truncated
|
||||
self.next_page_parameters = next_page_parameters
|
||||
|
||||
class Result:
|
||||
def __init__(self, result):
|
||||
self.result = result
|
||||
|
||||
class Runtime:
|
||||
def datasource_provider_type(self):
|
||||
return DatasourceProviderType.ONLINE_DRIVE
|
||||
|
||||
def online_drive_browse_files(self, user_id, request, provider_type):
|
||||
nonlocal call_count
|
||||
call_count += 1
|
||||
# Empty folder that incorrectly claims truncation
|
||||
return iter([Result([FilesPage([], True, {"page": 2})])])
|
||||
|
||||
runtime = Runtime()
|
||||
all_files: list = []
|
||||
|
||||
generator._get_files_in_folder(
|
||||
datasource_runtime=runtime,
|
||||
prefix="buggy-folder",
|
||||
bucket="b",
|
||||
user_id="user",
|
||||
all_files=all_files,
|
||||
datasource_info={},
|
||||
)
|
||||
|
||||
assert all_files == []
|
||||
# Should only be called once -- the empty-page guard prevents further recursion
|
||||
assert call_count == 1
|
||||
|
||||
|
||||
def test_get_files_in_folder_handles_self_referencing_folder(generator):
|
||||
"""A folder that lists itself as a child must not recurse infinitely."""
|
||||
|
||||
class File:
|
||||
def __init__(self, id, name, type):
|
||||
self.id = id
|
||||
self.name = name
|
||||
self.type = type
|
||||
|
||||
class FilesPage:
|
||||
def __init__(self, files, is_truncated=False, next_page_parameters=None):
|
||||
self.files = files
|
||||
self.is_truncated = is_truncated
|
||||
self.next_page_parameters = next_page_parameters
|
||||
|
||||
class Result:
|
||||
def __init__(self, result):
|
||||
self.result = result
|
||||
|
||||
call_count = 0
|
||||
|
||||
class Runtime:
|
||||
def datasource_provider_type(self):
|
||||
return DatasourceProviderType.ONLINE_DRIVE
|
||||
|
||||
def online_drive_browse_files(self, user_id, request, provider_type):
|
||||
nonlocal call_count
|
||||
call_count += 1
|
||||
# The folder returns itself as a child (self-reference)
|
||||
return iter([Result([FilesPage([File("self-ref", "myfolder", "folder")], False, None)])])
|
||||
|
||||
runtime = Runtime()
|
||||
all_files: list = []
|
||||
|
||||
generator._get_files_in_folder(
|
||||
datasource_runtime=runtime,
|
||||
prefix="self-ref",
|
||||
bucket="b",
|
||||
user_id="user",
|
||||
all_files=all_files,
|
||||
datasource_info={},
|
||||
)
|
||||
|
||||
assert all_files == []
|
||||
# Should only be called once -- the visited-set guard prevents re-entry
|
||||
assert call_count == 1
|
||||
|
||||
Loading…
Reference in New Issue
Block a user