fix(api): preserve remote file URL query params (#36478)

This commit is contained in:
Bodhiswattwa Chakraborty 2026-05-22 07:15:20 +05:30 committed by GitHub
parent 30deef45d9
commit 92181dbe09
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 78 additions and 6 deletions

View File

@ -36,6 +36,24 @@ class FileInfo(BaseModel):
size: int
def decode_remote_url(url: str, query_string: bytes | str = b"") -> str:
decoded_url = urllib.parse.unquote(url)
if isinstance(query_string, bytes):
raw_query = query_string.decode()
else:
raw_query = query_string
if not raw_query:
return decoded_url
if decoded_url.endswith(("?", "&")):
separator = ""
elif urllib.parse.urlsplit(decoded_url).query:
separator = "&"
else:
separator = "?"
return f"{decoded_url}{separator}{raw_query}"
def guess_file_info_from_response(response: httpx.Response):
url = str(response.url)
# Try to extract filename from URL

View File

@ -1,6 +1,5 @@
import urllib.parse
import httpx
from flask import request
from flask_restx import Resource
from pydantic import BaseModel, Field
@ -34,7 +33,7 @@ class GetRemoteFileInfo(Resource):
@console_ns.response(200, "Success", console_ns.models[RemoteFileInfo.__name__])
@login_required
def get(self, url: str):
decoded_url = urllib.parse.unquote(url)
decoded_url = helpers.decode_remote_url(url, request.query_string)
resp = ssrf_proxy.head(decoded_url)
if resp.status_code != httpx.codes.OK:
resp = ssrf_proxy.get(decoded_url, timeout=3)

View File

@ -1,6 +1,5 @@
import urllib.parse
import httpx
from flask import request
from pydantic import BaseModel, Field, HttpUrl
import services
@ -59,7 +58,7 @@ class RemoteFileInfoApi(WebApiResource):
Raises:
HTTPException: If the remote file cannot be accessed
"""
decoded_url = urllib.parse.unquote(url)
decoded_url = helpers.decode_remote_url(url, request.query_string)
resp = ssrf_proxy.head(decoded_url)
if resp.status_code != httpx.codes.OK:
# failed back to get method

View File

@ -98,6 +98,28 @@ def test_get_remote_file_info_uses_head_when_successful(app, monkeypatch: pytest
get_mock.assert_not_called()
def test_get_remote_file_info_preserves_unencoded_target_query(app, monkeypatch: pytest.MonkeyPatch) -> None:
api = remote_files_module.GetRemoteFileInfo()
handler = _unwrap(api.get)
target_url = "http://example.com/api/aiagent/httpview/txt"
query = "fileNameKey=cankao1_ce4305bc-be20-4c5d-8732-de1741d28e27"
head_resp = _FakeResponse(
status_code=200,
headers={"Content-Type": "text/plain", "Content-Length": "128"},
method="HEAD",
)
head_mock = MagicMock(return_value=head_resp)
monkeypatch.setattr(remote_files_module.ssrf_proxy, "head", head_mock)
monkeypatch.setattr(remote_files_module.ssrf_proxy, "get", MagicMock())
with app.test_request_context(f"/remote-files/{target_url}?{query}", method="GET"):
payload = handler(api, url=target_url)
assert payload == {"file_type": "text/plain", "file_length": 128}
head_mock.assert_called_once_with(f"{target_url}?{query}")
def test_get_remote_file_info_falls_back_to_get_and_uses_default_headers(app, monkeypatch: pytest.MonkeyPatch) -> None:
api = remote_files_module.GetRemoteFileInfo()
handler = _unwrap(api.get)

View File

@ -2,6 +2,7 @@
from __future__ import annotations
import urllib.parse
from types import SimpleNamespace
from unittest.mock import MagicMock, patch
@ -36,6 +37,39 @@ class TestRemoteFileInfoApi:
assert result["file_type"] == "application/pdf"
assert result["file_length"] == 1024
mock_proxy.head.assert_called_once_with("https://example.com/file.pdf")
@patch("controllers.web.remote_files.ssrf_proxy")
def test_preserves_unencoded_target_query(self, mock_proxy: MagicMock, app: Flask) -> None:
mock_resp = MagicMock()
mock_resp.status_code = 200
mock_resp.headers = {"Content-Type": "text/plain", "Content-Length": "128"}
mock_proxy.head.return_value = mock_resp
target_url = "http://example.com/api/aiagent/httpview/txt"
query = "fileNameKey=cankao1_ce4305bc-be20-4c5d-8732-de1741d28e27"
with app.test_request_context(f"/remote-files/{target_url}?{query}"):
result = RemoteFileInfoApi().get(_app_model(), _end_user(), target_url)
assert result["file_type"] == "text/plain"
mock_proxy.head.assert_called_once_with(f"{target_url}?{query}")
@patch("controllers.web.remote_files.ssrf_proxy")
def test_preserves_encoded_target_query(self, mock_proxy: MagicMock, app: Flask) -> None:
mock_resp = MagicMock()
mock_resp.status_code = 200
mock_resp.headers = {"Content-Type": "text/plain", "Content-Length": "128"}
mock_proxy.head.return_value = mock_resp
target_url = "http://example.com/api/aiagent/httpview/txt?fileNameKey=cankao1"
encoded_url = urllib.parse.quote(target_url, safe="")
with app.test_request_context(f"/remote-files/{encoded_url}"):
result = RemoteFileInfoApi().get(_app_model(), _end_user(), encoded_url)
assert result["file_type"] == "text/plain"
mock_proxy.head.assert_called_once_with(target_url)
@patch("controllers.web.remote_files.ssrf_proxy")
def test_fallback_to_get(self, mock_proxy: MagicMock, app: Flask) -> None: