From 46a3a2ae0967f20222cbb4e4e9a648c85644d4f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yanli=20=E7=9B=90=E7=B2=92?= Date: Sun, 1 Mar 2026 17:38:45 +0800 Subject: [PATCH] fix: sort signed URL replacements by position --- api/models/dataset.py | 2 + .../unit_tests/models/test_dataset_models.py | 37 +++++++++++++++++++ 2 files changed, 39 insertions(+) diff --git a/api/models/dataset.py b/api/models/dataset.py index 73202d579d..c0f3b3a4e0 100644 --- a/api/models/dataset.py +++ b/api/models/dataset.py @@ -8,6 +8,7 @@ import os import pickle import re import time +from operator import itemgetter from datetime import datetime from json import JSONDecodeError from typing import Any, cast @@ -867,6 +868,7 @@ class DocumentSegment(Base): signed_urls.append((match.start(), match.end(), signed_url)) # Reconstruct the text with signed URLs + signed_urls.sort(key=itemgetter(0)) offset = 0 for start, end, signed_url in signed_urls: text = text[: start + offset] + signed_url + text[end + offset :] diff --git a/api/tests/unit_tests/models/test_dataset_models.py b/api/tests/unit_tests/models/test_dataset_models.py index 283066cd3d..e4585f0a2a 100644 --- a/api/tests/unit_tests/models/test_dataset_models.py +++ b/api/tests/unit_tests/models/test_dataset_models.py @@ -653,6 +653,43 @@ class TestDocumentSegmentIndexing: assert f"/files/{denied_upload_file_id}/file-preview?timestamp=" not in signed assert f"/files/{denied_upload_file_id}/file-preview)" in signed + def test_document_segment_sign_content_handles_mixed_preview_order(self): + """Test that sign_content preserves content when file-preview appears before image-preview.""" + # Arrange + file_preview_id = "1602650a-4fe4-423c-85a2-af76c083e3c4" + image_preview_id = "e2a4f7b1-1234-5678-9abc-def012345678" + segment = DocumentSegment( + tenant_id=str(uuid4()), + dataset_id=str(uuid4()), + document_id=str(uuid4()), + position=1, + content=( + f"file-first: ![file](/files/{file_preview_id}/file-preview) " + f"then-image: ![image](/files/{image_preview_id}/image-preview)" + ), + word_count=1, + tokens=1, + created_by=str(uuid4()), + ) + mock_scalars_result = self._mock_scalars_result([file_preview_id, image_preview_id]) + + # Act + with ( + patch.object(dataset_module.dify_config, "SECRET_KEY", "secret", create=True), + patch("models.dataset.db.session.scalars", return_value=mock_scalars_result), + patch("models.dataset.time.time", return_value=1700000000), + patch("models.dataset.os.urandom", return_value=b"\x00" * 16), + ): + signed = segment.get_sign_content() + + # Assert + file_signed = f"/files/{file_preview_id}/file-preview?timestamp=" + image_signed = f"/files/{image_preview_id}/image-preview?timestamp=" + assert file_signed in signed + assert image_signed in signed + assert signed.index(file_signed) < signed.index(image_signed) + assert signed.count("&sign=") == 2 + def test_document_segment_with_answer_field(self): """Test creating a document segment with answer field for QA model.""" # Arrange