fix: sort signed URL replacements by position

This commit is contained in:
Yanli 盐粒 2026-03-01 17:38:45 +08:00
parent 30af50cb47
commit 46a3a2ae09
2 changed files with 39 additions and 0 deletions

View File

@ -8,6 +8,7 @@ import os
import pickle
import re
import time
from operator import itemgetter
from datetime import datetime
from json import JSONDecodeError
from typing import Any, cast
@ -867,6 +868,7 @@ class DocumentSegment(Base):
signed_urls.append((match.start(), match.end(), signed_url))
# Reconstruct the text with signed URLs
signed_urls.sort(key=itemgetter(0))
offset = 0
for start, end, signed_url in signed_urls:
text = text[: start + offset] + signed_url + text[end + offset :]

View File

@ -653,6 +653,43 @@ class TestDocumentSegmentIndexing:
assert f"/files/{denied_upload_file_id}/file-preview?timestamp=" not in signed
assert f"/files/{denied_upload_file_id}/file-preview)" in signed
def test_document_segment_sign_content_handles_mixed_preview_order(self):
"""Test that sign_content preserves content when file-preview appears before image-preview."""
# Arrange
file_preview_id = "1602650a-4fe4-423c-85a2-af76c083e3c4"
image_preview_id = "e2a4f7b1-1234-5678-9abc-def012345678"
segment = DocumentSegment(
tenant_id=str(uuid4()),
dataset_id=str(uuid4()),
document_id=str(uuid4()),
position=1,
content=(
f"file-first: ![file](/files/{file_preview_id}/file-preview) "
f"then-image: ![image](/files/{image_preview_id}/image-preview)"
),
word_count=1,
tokens=1,
created_by=str(uuid4()),
)
mock_scalars_result = self._mock_scalars_result([file_preview_id, image_preview_id])
# Act
with (
patch.object(dataset_module.dify_config, "SECRET_KEY", "secret", create=True),
patch("models.dataset.db.session.scalars", return_value=mock_scalars_result),
patch("models.dataset.time.time", return_value=1700000000),
patch("models.dataset.os.urandom", return_value=b"\x00" * 16),
):
signed = segment.get_sign_content()
# Assert
file_signed = f"/files/{file_preview_id}/file-preview?timestamp="
image_signed = f"/files/{image_preview_id}/image-preview?timestamp="
assert file_signed in signed
assert image_signed in signed
assert signed.index(file_signed) < signed.index(image_signed)
assert signed.count("&sign=") == 2
def test_document_segment_with_answer_field(self):
"""Test creating a document segment with answer field for QA model."""
# Arrange