From 5bc6e8a433003177916f86b4790cfc52128b3cd4 Mon Sep 17 00:00:00 2001 From: Yongtao Huang Date: Mon, 22 Sep 2025 10:58:29 +0800 Subject: [PATCH] Fix: correct regex for file-preview URL re-signing (#25620) Fixes #25619 The regex patterns for file-preview and image-preview contained an unescaped `?`, which caused incorrect matches such as `file-previe` or `image-previw`. This led to malformed URLs being incorrectly re-signed. Changes: - Escape `?` in both file-preview and image-preview regex patterns. - Ensure only valid URLs are re-signed. Added unit tests to cover: - Valid file-preview and image-preview URLs (correctly re-signed). - Misspelled file/image preview URLs (no longer incorrectly matched). Other: - Fix a deprecated function `datetime.utcnow()` Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> Co-authored-by: Asuka Minato Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com> --- api/models/model.py | 4 +- .../tasks/test_batch_clean_document_task.py | 4 +- ...rkflow_node_execution_conflict_handling.py | 10 +-- api/tests/unit_tests/models/test_model.py | 83 +++++++++++++++++++ 4 files changed, 92 insertions(+), 9 deletions(-) create mode 100644 api/tests/unit_tests/models/test_model.py diff --git a/api/models/model.py b/api/models/model.py index 4342095802..9bcb81b41b 100644 --- a/api/models/model.py +++ b/api/models/model.py @@ -1044,7 +1044,7 @@ class Message(Base): sign_url = sign_tool_file(tool_file_id=tool_file_id, extension=extension) elif "file-preview" in url: # get upload file id - upload_file_id_pattern = r"\/files\/([\w-]+)\/file-preview?\?timestamp=" + upload_file_id_pattern = r"\/files\/([\w-]+)\/file-preview\?timestamp=" result = re.search(upload_file_id_pattern, url) if not result: continue @@ -1055,7 +1055,7 @@ class Message(Base): sign_url = file_helpers.get_signed_file_url(upload_file_id) elif "image-preview" in url: # image-preview is deprecated, use file-preview instead - upload_file_id_pattern = r"\/files\/([\w-]+)\/image-preview?\?timestamp=" + upload_file_id_pattern = r"\/files\/([\w-]+)\/image-preview\?timestamp=" result = re.search(upload_file_id_pattern, url) if not result: continue diff --git a/api/tests/test_containers_integration_tests/tasks/test_batch_clean_document_task.py b/api/tests/test_containers_integration_tests/tasks/test_batch_clean_document_task.py index 03b1539399..3d17a8ac9d 100644 --- a/api/tests/test_containers_integration_tests/tasks/test_batch_clean_document_task.py +++ b/api/tests/test_containers_integration_tests/tasks/test_batch_clean_document_task.py @@ -13,6 +13,7 @@ import pytest from faker import Faker from extensions.ext_database import db +from libs.datetime_utils import naive_utc_now from models.account import Account, Tenant, TenantAccountJoin, TenantAccountRole from models.dataset import Dataset, Document, DocumentSegment from models.model import UploadFile @@ -202,7 +203,6 @@ class TestBatchCleanDocumentTask: UploadFile: Created upload file instance """ fake = Faker() - from datetime import datetime from models.enums import CreatorUserRole @@ -216,7 +216,7 @@ class TestBatchCleanDocumentTask: mime_type="text/plain", created_by_role=CreatorUserRole.ACCOUNT, created_by=account.id, - created_at=datetime.utcnow(), + created_at=naive_utc_now(), used=False, ) diff --git a/api/tests/unit_tests/core/repositories/test_workflow_node_execution_conflict_handling.py b/api/tests/unit_tests/core/repositories/test_workflow_node_execution_conflict_handling.py index e4fe991561..07f28f162a 100644 --- a/api/tests/unit_tests/core/repositories/test_workflow_node_execution_conflict_handling.py +++ b/api/tests/unit_tests/core/repositories/test_workflow_node_execution_conflict_handling.py @@ -1,6 +1,5 @@ """Unit tests for workflow node execution conflict handling.""" -from datetime import datetime from unittest.mock import MagicMock, Mock import psycopg2.errors @@ -16,6 +15,7 @@ from core.workflow.entities.workflow_node_execution import ( WorkflowNodeExecutionStatus, ) from core.workflow.enums import NodeType +from libs.datetime_utils import naive_utc_now from models import Account, WorkflowNodeExecutionTriggeredFrom @@ -74,7 +74,7 @@ class TestWorkflowNodeExecutionConflictHandling: title="Test Node", index=1, status=WorkflowNodeExecutionStatus.RUNNING, - created_at=datetime.utcnow(), + created_at=naive_utc_now(), ) original_id = execution.id @@ -112,7 +112,7 @@ class TestWorkflowNodeExecutionConflictHandling: title="Test Node", index=1, status=WorkflowNodeExecutionStatus.SUCCEEDED, - created_at=datetime.utcnow(), + created_at=naive_utc_now(), ) # Save should update existing record @@ -157,7 +157,7 @@ class TestWorkflowNodeExecutionConflictHandling: title="Test Node", index=1, status=WorkflowNodeExecutionStatus.RUNNING, - created_at=datetime.utcnow(), + created_at=naive_utc_now(), ) # Save should raise IntegrityError after max retries @@ -199,7 +199,7 @@ class TestWorkflowNodeExecutionConflictHandling: title="Test Node", index=1, status=WorkflowNodeExecutionStatus.RUNNING, - created_at=datetime.utcnow(), + created_at=naive_utc_now(), ) # Save should raise error immediately diff --git a/api/tests/unit_tests/models/test_model.py b/api/tests/unit_tests/models/test_model.py new file mode 100644 index 0000000000..1a2003a9cf --- /dev/null +++ b/api/tests/unit_tests/models/test_model.py @@ -0,0 +1,83 @@ +import importlib +import types + +import pytest + +from models.model import Message + + +@pytest.fixture(autouse=True) +def patch_file_helpers(monkeypatch: pytest.MonkeyPatch): + """ + Patch file_helpers.get_signed_file_url to a deterministic stub. + """ + model_module = importlib.import_module("models.model") + dummy = types.SimpleNamespace(get_signed_file_url=lambda fid: f"https://signed.example/{fid}") + # Inject/override file_helpers on models.model + monkeypatch.setattr(model_module, "file_helpers", dummy, raising=False) + + +def _wrap_md(url: str) -> str: + """ + Wrap a raw URL into the markdown that re_sign_file_url_answer expects: + [link]() + """ + return f"please click [file]({url}) to download." + + +def test_file_preview_valid_replaced(): + """ + Valid file-preview URL must be re-signed: + - Extract upload_file_id correctly + - Replace the original URL with the signed URL + """ + upload_id = "abc-123" + url = f"/files/{upload_id}/file-preview?timestamp=111&nonce=222&sign=333" + msg = Message(answer=_wrap_md(url)) + + out = msg.re_sign_file_url_answer + assert f"https://signed.example/{upload_id}" in out + assert url not in out + + +def test_file_preview_misspelled_not_replaced(): + """ + Misspelled endpoint 'file-previe?timestamp=' should NOT be rewritten. + """ + upload_id = "zzz-001" + # path deliberately misspelled: file-previe? (missing 'w') + # and we append ¬e=file-preview to trick the old `"file-preview" in url` check. + url = f"/files/{upload_id}/file-previe?timestamp=111&nonce=222&sign=333¬e=file-preview" + original = _wrap_md(url) + msg = Message(answer=original) + + out = msg.re_sign_file_url_answer + # Expect NO replacement, should not rewrite misspelled file-previe URL + assert out == original + + +def test_image_preview_valid_replaced(): + """ + Valid image-preview URL must be re-signed. + """ + upload_id = "img-789" + url = f"/files/{upload_id}/image-preview?timestamp=123&nonce=456&sign=789" + msg = Message(answer=_wrap_md(url)) + + out = msg.re_sign_file_url_answer + assert f"https://signed.example/{upload_id}" in out + assert url not in out + + +def test_image_preview_misspelled_not_replaced(): + """ + Misspelled endpoint 'image-previe?timestamp=' should NOT be rewritten. + """ + upload_id = "img-err-42" + url = f"/files/{upload_id}/image-previe?timestamp=1&nonce=2&sign=3¬e=image-preview" + original = _wrap_md(url) + msg = Message(answer=original) + + out = msg.re_sign_file_url_answer + # Expect NO replacement, should not rewrite misspelled image-previe URL + assert out == original