diff --git a/api/models/dataset.py b/api/models/dataset.py index 89da8ac8ce..e00b5ad00d 100644 --- a/api/models/dataset.py +++ b/api/models/dataset.py @@ -820,7 +820,8 @@ class DocumentSegment(Base): signed_urls.append((match.start(), match.end(), signed_url)) # For tools directory - direct file formats (e.g., .png, .jpg, etc.) - pattern = r"/files/tools/([a-f0-9\-]+)\.([a-zA-Z0-9]+)(?:\?.*?)?" + # Match URL including any query parameters up to common URL boundaries (space, parenthesis, quotes) + pattern = r"/files/tools/([a-f0-9\-]+)\.([a-zA-Z0-9]+)(?:\?[^\s\)\"\']*)?" matches = re.finditer(pattern, text) for match in matches: upload_file_id = match.group(1)