mirror of
https://github.com/langgenius/dify.git
synced 2026-06-07 16:32:01 +08:00
fix(api): preserve remote file URL query params (#36478)
This commit is contained in:
parent
30deef45d9
commit
92181dbe09
@ -36,6 +36,24 @@ class FileInfo(BaseModel):
|
||||
size: int
|
||||
|
||||
|
||||
def decode_remote_url(url: str, query_string: bytes | str = b"") -> str:
|
||||
decoded_url = urllib.parse.unquote(url)
|
||||
if isinstance(query_string, bytes):
|
||||
raw_query = query_string.decode()
|
||||
else:
|
||||
raw_query = query_string
|
||||
if not raw_query:
|
||||
return decoded_url
|
||||
|
||||
if decoded_url.endswith(("?", "&")):
|
||||
separator = ""
|
||||
elif urllib.parse.urlsplit(decoded_url).query:
|
||||
separator = "&"
|
||||
else:
|
||||
separator = "?"
|
||||
return f"{decoded_url}{separator}{raw_query}"
|
||||
|
||||
|
||||
def guess_file_info_from_response(response: httpx.Response):
|
||||
url = str(response.url)
|
||||
# Try to extract filename from URL
|
||||
|
||||
@ -1,6 +1,5 @@
|
||||
import urllib.parse
|
||||
|
||||
import httpx
|
||||
from flask import request
|
||||
from flask_restx import Resource
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
@ -34,7 +33,7 @@ class GetRemoteFileInfo(Resource):
|
||||
@console_ns.response(200, "Success", console_ns.models[RemoteFileInfo.__name__])
|
||||
@login_required
|
||||
def get(self, url: str):
|
||||
decoded_url = urllib.parse.unquote(url)
|
||||
decoded_url = helpers.decode_remote_url(url, request.query_string)
|
||||
resp = ssrf_proxy.head(decoded_url)
|
||||
if resp.status_code != httpx.codes.OK:
|
||||
resp = ssrf_proxy.get(decoded_url, timeout=3)
|
||||
|
||||
@ -1,6 +1,5 @@
|
||||
import urllib.parse
|
||||
|
||||
import httpx
|
||||
from flask import request
|
||||
from pydantic import BaseModel, Field, HttpUrl
|
||||
|
||||
import services
|
||||
@ -59,7 +58,7 @@ class RemoteFileInfoApi(WebApiResource):
|
||||
Raises:
|
||||
HTTPException: If the remote file cannot be accessed
|
||||
"""
|
||||
decoded_url = urllib.parse.unquote(url)
|
||||
decoded_url = helpers.decode_remote_url(url, request.query_string)
|
||||
resp = ssrf_proxy.head(decoded_url)
|
||||
if resp.status_code != httpx.codes.OK:
|
||||
# failed back to get method
|
||||
|
||||
@ -98,6 +98,28 @@ def test_get_remote_file_info_uses_head_when_successful(app, monkeypatch: pytest
|
||||
get_mock.assert_not_called()
|
||||
|
||||
|
||||
def test_get_remote_file_info_preserves_unencoded_target_query(app, monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
api = remote_files_module.GetRemoteFileInfo()
|
||||
handler = _unwrap(api.get)
|
||||
target_url = "http://example.com/api/aiagent/httpview/txt"
|
||||
query = "fileNameKey=cankao1_ce4305bc-be20-4c5d-8732-de1741d28e27"
|
||||
|
||||
head_resp = _FakeResponse(
|
||||
status_code=200,
|
||||
headers={"Content-Type": "text/plain", "Content-Length": "128"},
|
||||
method="HEAD",
|
||||
)
|
||||
head_mock = MagicMock(return_value=head_resp)
|
||||
monkeypatch.setattr(remote_files_module.ssrf_proxy, "head", head_mock)
|
||||
monkeypatch.setattr(remote_files_module.ssrf_proxy, "get", MagicMock())
|
||||
|
||||
with app.test_request_context(f"/remote-files/{target_url}?{query}", method="GET"):
|
||||
payload = handler(api, url=target_url)
|
||||
|
||||
assert payload == {"file_type": "text/plain", "file_length": 128}
|
||||
head_mock.assert_called_once_with(f"{target_url}?{query}")
|
||||
|
||||
|
||||
def test_get_remote_file_info_falls_back_to_get_and_uses_default_headers(app, monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
api = remote_files_module.GetRemoteFileInfo()
|
||||
handler = _unwrap(api.get)
|
||||
|
||||
@ -2,6 +2,7 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import urllib.parse
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
@ -36,6 +37,39 @@ class TestRemoteFileInfoApi:
|
||||
|
||||
assert result["file_type"] == "application/pdf"
|
||||
assert result["file_length"] == 1024
|
||||
mock_proxy.head.assert_called_once_with("https://example.com/file.pdf")
|
||||
|
||||
@patch("controllers.web.remote_files.ssrf_proxy")
|
||||
def test_preserves_unencoded_target_query(self, mock_proxy: MagicMock, app: Flask) -> None:
|
||||
mock_resp = MagicMock()
|
||||
mock_resp.status_code = 200
|
||||
mock_resp.headers = {"Content-Type": "text/plain", "Content-Length": "128"}
|
||||
mock_proxy.head.return_value = mock_resp
|
||||
|
||||
target_url = "http://example.com/api/aiagent/httpview/txt"
|
||||
query = "fileNameKey=cankao1_ce4305bc-be20-4c5d-8732-de1741d28e27"
|
||||
|
||||
with app.test_request_context(f"/remote-files/{target_url}?{query}"):
|
||||
result = RemoteFileInfoApi().get(_app_model(), _end_user(), target_url)
|
||||
|
||||
assert result["file_type"] == "text/plain"
|
||||
mock_proxy.head.assert_called_once_with(f"{target_url}?{query}")
|
||||
|
||||
@patch("controllers.web.remote_files.ssrf_proxy")
|
||||
def test_preserves_encoded_target_query(self, mock_proxy: MagicMock, app: Flask) -> None:
|
||||
mock_resp = MagicMock()
|
||||
mock_resp.status_code = 200
|
||||
mock_resp.headers = {"Content-Type": "text/plain", "Content-Length": "128"}
|
||||
mock_proxy.head.return_value = mock_resp
|
||||
|
||||
target_url = "http://example.com/api/aiagent/httpview/txt?fileNameKey=cankao1"
|
||||
encoded_url = urllib.parse.quote(target_url, safe="")
|
||||
|
||||
with app.test_request_context(f"/remote-files/{encoded_url}"):
|
||||
result = RemoteFileInfoApi().get(_app_model(), _end_user(), encoded_url)
|
||||
|
||||
assert result["file_type"] == "text/plain"
|
||||
mock_proxy.head.assert_called_once_with(target_url)
|
||||
|
||||
@patch("controllers.web.remote_files.ssrf_proxy")
|
||||
def test_fallback_to_get(self, mock_proxy: MagicMock, app: Flask) -> None:
|
||||
|
||||
Loading…
Reference in New Issue
Block a user