From 92181dbe0970ad22892954610033626e0c2e2ada Mon Sep 17 00:00:00 2001 From: Bodhiswattwa Chakraborty <84554427+lord-Rheagar@users.noreply.github.com> Date: Fri, 22 May 2026 07:15:20 +0530 Subject: [PATCH] fix(api): preserve remote file URL query params (#36478) --- api/controllers/common/helpers.py | 18 ++++++++++ api/controllers/console/remote_files.py | 5 ++- api/controllers/web/remote_files.py | 5 ++- .../controllers/console/test_remote_files.py | 22 ++++++++++++ .../controllers/web/test_remote_files.py | 34 +++++++++++++++++++ 5 files changed, 78 insertions(+), 6 deletions(-) diff --git a/api/controllers/common/helpers.py b/api/controllers/common/helpers.py index 84903733b5..56a07a8b4a 100644 --- a/api/controllers/common/helpers.py +++ b/api/controllers/common/helpers.py @@ -36,6 +36,24 @@ class FileInfo(BaseModel): size: int +def decode_remote_url(url: str, query_string: bytes | str = b"") -> str: + decoded_url = urllib.parse.unquote(url) + if isinstance(query_string, bytes): + raw_query = query_string.decode() + else: + raw_query = query_string + if not raw_query: + return decoded_url + + if decoded_url.endswith(("?", "&")): + separator = "" + elif urllib.parse.urlsplit(decoded_url).query: + separator = "&" + else: + separator = "?" + return f"{decoded_url}{separator}{raw_query}" + + def guess_file_info_from_response(response: httpx.Response): url = str(response.url) # Try to extract filename from URL diff --git a/api/controllers/console/remote_files.py b/api/controllers/console/remote_files.py index fd3ed78986..19f1fd8aab 100644 --- a/api/controllers/console/remote_files.py +++ b/api/controllers/console/remote_files.py @@ -1,6 +1,5 @@ -import urllib.parse - import httpx +from flask import request from flask_restx import Resource from pydantic import BaseModel, Field @@ -34,7 +33,7 @@ class GetRemoteFileInfo(Resource): @console_ns.response(200, "Success", console_ns.models[RemoteFileInfo.__name__]) @login_required def get(self, url: str): - decoded_url = urllib.parse.unquote(url) + decoded_url = helpers.decode_remote_url(url, request.query_string) resp = ssrf_proxy.head(decoded_url) if resp.status_code != httpx.codes.OK: resp = ssrf_proxy.get(decoded_url, timeout=3) diff --git a/api/controllers/web/remote_files.py b/api/controllers/web/remote_files.py index ff583acd5c..e9f727097b 100644 --- a/api/controllers/web/remote_files.py +++ b/api/controllers/web/remote_files.py @@ -1,6 +1,5 @@ -import urllib.parse - import httpx +from flask import request from pydantic import BaseModel, Field, HttpUrl import services @@ -59,7 +58,7 @@ class RemoteFileInfoApi(WebApiResource): Raises: HTTPException: If the remote file cannot be accessed """ - decoded_url = urllib.parse.unquote(url) + decoded_url = helpers.decode_remote_url(url, request.query_string) resp = ssrf_proxy.head(decoded_url) if resp.status_code != httpx.codes.OK: # failed back to get method diff --git a/api/tests/unit_tests/controllers/console/test_remote_files.py b/api/tests/unit_tests/controllers/console/test_remote_files.py index 1be402c8ab..8e86709b66 100644 --- a/api/tests/unit_tests/controllers/console/test_remote_files.py +++ b/api/tests/unit_tests/controllers/console/test_remote_files.py @@ -98,6 +98,28 @@ def test_get_remote_file_info_uses_head_when_successful(app, monkeypatch: pytest get_mock.assert_not_called() +def test_get_remote_file_info_preserves_unencoded_target_query(app, monkeypatch: pytest.MonkeyPatch) -> None: + api = remote_files_module.GetRemoteFileInfo() + handler = _unwrap(api.get) + target_url = "http://example.com/api/aiagent/httpview/txt" + query = "fileNameKey=cankao1_ce4305bc-be20-4c5d-8732-de1741d28e27" + + head_resp = _FakeResponse( + status_code=200, + headers={"Content-Type": "text/plain", "Content-Length": "128"}, + method="HEAD", + ) + head_mock = MagicMock(return_value=head_resp) + monkeypatch.setattr(remote_files_module.ssrf_proxy, "head", head_mock) + monkeypatch.setattr(remote_files_module.ssrf_proxy, "get", MagicMock()) + + with app.test_request_context(f"/remote-files/{target_url}?{query}", method="GET"): + payload = handler(api, url=target_url) + + assert payload == {"file_type": "text/plain", "file_length": 128} + head_mock.assert_called_once_with(f"{target_url}?{query}") + + def test_get_remote_file_info_falls_back_to_get_and_uses_default_headers(app, monkeypatch: pytest.MonkeyPatch) -> None: api = remote_files_module.GetRemoteFileInfo() handler = _unwrap(api.get) diff --git a/api/tests/unit_tests/controllers/web/test_remote_files.py b/api/tests/unit_tests/controllers/web/test_remote_files.py index 8554f440b7..93f0ca9944 100644 --- a/api/tests/unit_tests/controllers/web/test_remote_files.py +++ b/api/tests/unit_tests/controllers/web/test_remote_files.py @@ -2,6 +2,7 @@ from __future__ import annotations +import urllib.parse from types import SimpleNamespace from unittest.mock import MagicMock, patch @@ -36,6 +37,39 @@ class TestRemoteFileInfoApi: assert result["file_type"] == "application/pdf" assert result["file_length"] == 1024 + mock_proxy.head.assert_called_once_with("https://example.com/file.pdf") + + @patch("controllers.web.remote_files.ssrf_proxy") + def test_preserves_unencoded_target_query(self, mock_proxy: MagicMock, app: Flask) -> None: + mock_resp = MagicMock() + mock_resp.status_code = 200 + mock_resp.headers = {"Content-Type": "text/plain", "Content-Length": "128"} + mock_proxy.head.return_value = mock_resp + + target_url = "http://example.com/api/aiagent/httpview/txt" + query = "fileNameKey=cankao1_ce4305bc-be20-4c5d-8732-de1741d28e27" + + with app.test_request_context(f"/remote-files/{target_url}?{query}"): + result = RemoteFileInfoApi().get(_app_model(), _end_user(), target_url) + + assert result["file_type"] == "text/plain" + mock_proxy.head.assert_called_once_with(f"{target_url}?{query}") + + @patch("controllers.web.remote_files.ssrf_proxy") + def test_preserves_encoded_target_query(self, mock_proxy: MagicMock, app: Flask) -> None: + mock_resp = MagicMock() + mock_resp.status_code = 200 + mock_resp.headers = {"Content-Type": "text/plain", "Content-Length": "128"} + mock_proxy.head.return_value = mock_resp + + target_url = "http://example.com/api/aiagent/httpview/txt?fileNameKey=cankao1" + encoded_url = urllib.parse.quote(target_url, safe="") + + with app.test_request_context(f"/remote-files/{encoded_url}"): + result = RemoteFileInfoApi().get(_app_model(), _end_user(), encoded_url) + + assert result["file_type"] == "text/plain" + mock_proxy.head.assert_called_once_with(target_url) @patch("controllers.web.remote_files.ssrf_proxy") def test_fallback_to_get(self, mock_proxy: MagicMock, app: Flask) -> None: