From f720a3bed2b23da49cca81e05473090ab2b69692 Mon Sep 17 00:00:00 2001 From: FFXN <31929997+FFXN@users.noreply.github.com> Date: Sat, 9 May 2026 18:06:01 +0800 Subject: [PATCH] fix: Image rendering in the knowledge base failed. (#35914) Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- api/core/rag/datasource/retrieval_service.py | 6 +-- api/core/rag/extractor/pdf_extractor.py | 2 +- api/core/rag/extractor/word_extractor.py | 2 +- api/core/rag/retrieval/dataset_retrieval.py | 4 +- api/core/tools/signature.py | 10 ++-- api/models/dataset.py | 6 +-- .../datasource/test_datasource_retrieval.py | 4 +- .../rag/retrieval/test_dataset_retrieval.py | 2 +- .../unit_tests/core/tools/test_signature.py | 14 +++--- .../unit_tests/models/test_dataset_models.py | 47 +++++++++++++++++++ 10 files changed, 73 insertions(+), 24 deletions(-) diff --git a/api/core/rag/datasource/retrieval_service.py b/api/core/rag/datasource/retrieval_service.py index b985ebbe1d..7769878e70 100644 --- a/api/core/rag/datasource/retrieval_service.py +++ b/api/core/rag/datasource/retrieval_service.py @@ -21,7 +21,7 @@ from core.rag.index_processor.constant.query_type import QueryType from core.rag.models.document import Document from core.rag.rerank.rerank_type import RerankMode from core.rag.retrieval.retrieval_methods import RetrievalMethod -from core.tools.signature import sign_upload_file +from core.tools.signature import sign_upload_file_preview_url from extensions.ext_database import db from graphon.model_runtime.entities.model_entities import ModelType from models.dataset import ( @@ -893,7 +893,7 @@ class RetrievalService: "name": upload_file.name, "extension": "." + upload_file.extension, "mime_type": upload_file.mime_type, - "source_url": sign_upload_file(upload_file.id, upload_file.extension), + "source_url": sign_upload_file_preview_url(upload_file.id, upload_file.extension), "size": upload_file.size, } return {"attachment_info": attachment_info, "segment_id": attachment_binding.segment_id} @@ -920,7 +920,7 @@ class RetrievalService: "name": upload_file.name, "extension": "." + upload_file.extension, "mime_type": upload_file.mime_type, - "source_url": sign_upload_file(upload_file.id, upload_file.extension), + "source_url": sign_upload_file_preview_url(upload_file.id, upload_file.extension), "size": upload_file.size, } if attachment_binding: diff --git a/api/core/rag/extractor/pdf_extractor.py b/api/core/rag/extractor/pdf_extractor.py index 02f0efc908..25f6fe3e2a 100644 --- a/api/core/rag/extractor/pdf_extractor.py +++ b/api/core/rag/extractor/pdf_extractor.py @@ -115,7 +115,7 @@ class PdfExtractor(BaseExtractor): """ image_content = [] upload_files = [] - base_url = dify_config.INTERNAL_FILES_URL or dify_config.FILES_URL + base_url = dify_config.FILES_URL try: image_objects = page.get_objects(filter=(pdfium_c.FPDF_PAGEOBJ_IMAGE,)) diff --git a/api/core/rag/extractor/word_extractor.py b/api/core/rag/extractor/word_extractor.py index 0330a43b28..60f8906181 100644 --- a/api/core/rag/extractor/word_extractor.py +++ b/api/core/rag/extractor/word_extractor.py @@ -110,7 +110,7 @@ class WordExtractor(BaseExtractor): def _extract_images_from_docx(self, doc): image_count = 0 image_map = {} - base_url = dify_config.INTERNAL_FILES_URL or dify_config.FILES_URL + base_url = dify_config.FILES_URL for r_id, rel in doc.part.rels.items(): if "image" in rel.target_ref: diff --git a/api/core/rag/retrieval/dataset_retrieval.py b/api/core/rag/retrieval/dataset_retrieval.py index 5631b3a921..010566d203 100644 --- a/api/core/rag/retrieval/dataset_retrieval.py +++ b/api/core/rag/retrieval/dataset_retrieval.py @@ -52,7 +52,7 @@ from core.rag.retrieval.template_prompts import ( METADATA_FILTER_USER_PROMPT_2, METADATA_FILTER_USER_PROMPT_3, ) -from core.tools.signature import sign_upload_file +from core.tools.signature import sign_upload_file_preview_url from core.tools.utils.dataset_retriever.dataset_retriever_base_tool import DatasetRetrieverBaseTool from core.workflow.file_reference import build_file_reference from core.workflow.nodes.knowledge_retrieval import exc @@ -529,7 +529,7 @@ class DatasetRetrieval: ), size=upload_file.size, storage_key=upload_file.key, - url=sign_upload_file(upload_file.id, upload_file.extension), + url=sign_upload_file_preview_url(upload_file.id, upload_file.extension), ) context_files.append(attachment_info) if show_retrieve_source: diff --git a/api/core/tools/signature.py b/api/core/tools/signature.py index 1807226924..3c7b523ff1 100644 --- a/api/core/tools/signature.py +++ b/api/core/tools/signature.py @@ -26,12 +26,14 @@ def sign_tool_file(tool_file_id: str, extension: str, for_external: bool = True) return f"{file_preview_url}?timestamp={timestamp}&nonce={nonce}&sign={encoded_sign}" -def sign_upload_file(upload_file_id: str, extension: str) -> str: +def sign_upload_file_preview_url(upload_file_id: str, extension: str) -> str: """ - sign file to get a temporary url for plugin access + Sign an upload file to get a temporary image preview URL. + + The URL generated by this function is only for external preview and download, + not for internal communication. """ - # Use internal URL for plugin/tool file access in Docker environments - base_url = dify_config.INTERNAL_FILES_URL or dify_config.FILES_URL + base_url = dify_config.FILES_URL file_preview_url = f"{base_url}/files/{upload_file_id}/image-preview" timestamp = str(int(time.time())) diff --git a/api/models/dataset.py b/api/models/dataset.py index a00e9f7640..ed7727e0f1 100644 --- a/api/models/dataset.py +++ b/api/models/dataset.py @@ -24,7 +24,7 @@ from core.rag.index_processor.constant.built_in_field import BuiltInField, Metad from core.rag.index_processor.constant.index_type import IndexStructureType, IndexTechniqueType from core.rag.index_processor.constant.query_type import QueryType from core.rag.retrieval.retrieval_methods import RetrievalMethod -from core.tools.signature import sign_upload_file +from core.tools.signature import sign_upload_file_preview_url from extensions.ext_storage import storage from libs.uuid_utils import uuidv7 @@ -1020,7 +1020,7 @@ class DocumentSegment(Base): encoded_sign = base64.urlsafe_b64encode(sign).decode() params = f"timestamp={timestamp}&nonce={nonce}&sign={encoded_sign}" - reference_url = dify_config.CONSOLE_API_URL or "" + reference_url = dify_config.FILES_URL or dify_config.CONSOLE_API_URL or "" base_url = f"{reference_url}/files/{upload_file_id}/image-preview" source_url = f"{base_url}?{params}" attachment_list.append( @@ -1162,7 +1162,7 @@ class DatasetQuery(TypeBase): "size": file_info.size, "extension": file_info.extension, "mime_type": file_info.mime_type, - "source_url": sign_upload_file(file_info.id, file_info.extension), + "source_url": sign_upload_file_preview_url(file_info.id, file_info.extension), } else: query["file_info"] = None diff --git a/api/tests/unit_tests/core/rag/datasource/test_datasource_retrieval.py b/api/tests/unit_tests/core/rag/datasource/test_datasource_retrieval.py index d38213dd89..f72351ffa2 100644 --- a/api/tests/unit_tests/core/rag/datasource/test_datasource_retrieval.py +++ b/api/tests/unit_tests/core/rag/datasource/test_datasource_retrieval.py @@ -1038,7 +1038,7 @@ class TestRetrievalServiceInternals: assert any(doc.metadata["doc_id"] == "processed-doc" for doc in all_documents) processor_instance.invoke.assert_called_once() - @patch("core.rag.datasource.retrieval_service.sign_upload_file", return_value="signed://file") + @patch("core.rag.datasource.retrieval_service.sign_upload_file_preview_url", return_value="signed://file") def test_get_segment_attachment_info_success(self, mock_sign): upload_file = SimpleNamespace( id="upload-1", @@ -1118,7 +1118,7 @@ class TestRetrievalServiceInternals: assert result == [] - @patch("core.rag.datasource.retrieval_service.sign_upload_file", return_value="signed://file") + @patch("core.rag.datasource.retrieval_service.sign_upload_file_preview_url", return_value="signed://file") def test_get_segment_attachment_infos_success(self, mock_sign): upload_file_1 = SimpleNamespace( id="upload-1", diff --git a/api/tests/unit_tests/core/rag/retrieval/test_dataset_retrieval.py b/api/tests/unit_tests/core/rag/retrieval/test_dataset_retrieval.py index b556ddf528..9334ad9b2f 100644 --- a/api/tests/unit_tests/core/rag/retrieval/test_dataset_retrieval.py +++ b/api/tests/unit_tests/core/rag/retrieval/test_dataset_retrieval.py @@ -4562,7 +4562,7 @@ class TestRetrieveCoverage: "core.rag.retrieval.dataset_retrieval.RetrievalService.format_retrieval_documents", return_value=[record], ), - patch("core.rag.retrieval.dataset_retrieval.sign_upload_file", return_value="https://signed"), + patch("core.rag.retrieval.dataset_retrieval.sign_upload_file_preview_url", return_value="https://signed"), patch("core.rag.retrieval.dataset_retrieval.db.session.execute") as mock_execute, ): bound_model_instance = Mock() diff --git a/api/tests/unit_tests/core/tools/test_signature.py b/api/tests/unit_tests/core/tools/test_signature.py index 353988d7a6..a75fdee908 100644 --- a/api/tests/unit_tests/core/tools/test_signature.py +++ b/api/tests/unit_tests/core/tools/test_signature.py @@ -9,7 +9,7 @@ import pytest from core.tools.signature import ( get_signed_file_url_for_plugin, sign_tool_file, - sign_upload_file, + sign_upload_file_preview_url, verify_plugin_file_signature, verify_tool_file_signature, ) @@ -89,32 +89,32 @@ def test_verify_tool_file_signature_rejects_expired_signature(monkeypatch: pytes assert verify_tool_file_signature("tool-file-id", timestamp, nonce, sign) is False -def test_sign_upload_file_prefers_internal_url(monkeypatch: pytest.MonkeyPatch) -> None: +def test_sign_upload_file_preview_url_uses_files_url(monkeypatch: pytest.MonkeyPatch) -> None: monkeypatch.setattr("core.tools.signature.time.time", lambda: 1700000000) monkeypatch.setattr("core.tools.signature.os.urandom", lambda _: b"\x03" * 16) monkeypatch.setattr("core.tools.signature.dify_config.SECRET_KEY", "unit-secret") monkeypatch.setattr("core.tools.signature.dify_config.FILES_URL", "https://files.example.com") monkeypatch.setattr("core.tools.signature.dify_config.INTERNAL_FILES_URL", "https://internal.example.com") - url = sign_upload_file("upload-id", ".png") + url = sign_upload_file_preview_url("upload-id", ".png") parsed = urlparse(url) query = parse_qs(parsed.query) - assert parsed.netloc == "internal.example.com" + assert parsed.netloc == "files.example.com" assert parsed.path == "/files/upload-id/image-preview" assert query["timestamp"][0] assert query["nonce"][0] assert query["sign"][0] -def test_sign_upload_file_uses_files_url_fallback(monkeypatch: pytest.MonkeyPatch) -> None: +def test_sign_upload_file_preview_url_ignores_internal_files_url(monkeypatch: pytest.MonkeyPatch) -> None: monkeypatch.setattr("core.tools.signature.time.time", lambda: 1700000000) monkeypatch.setattr("core.tools.signature.os.urandom", lambda _: b"\x05" * 16) monkeypatch.setattr("core.tools.signature.dify_config.SECRET_KEY", "unit-secret") monkeypatch.setattr("core.tools.signature.dify_config.FILES_URL", "https://files.example.com") - monkeypatch.setattr("core.tools.signature.dify_config.INTERNAL_FILES_URL", "") + monkeypatch.setattr("core.tools.signature.dify_config.INTERNAL_FILES_URL", "https://internal.example.com") - url = sign_upload_file("upload-id", ".png") + url = sign_upload_file_preview_url("upload-id", ".png") parsed = urlparse(url) query = parse_qs(parsed.query) diff --git a/api/tests/unit_tests/models/test_dataset_models.py b/api/tests/unit_tests/models/test_dataset_models.py index 51d95c4239..3f14ebe8bf 100644 --- a/api/tests/unit_tests/models/test_dataset_models.py +++ b/api/tests/unit_tests/models/test_dataset_models.py @@ -12,7 +12,9 @@ This test suite covers: import json import pickle from datetime import UTC, datetime +from types import SimpleNamespace from unittest.mock import Mock, patch +from urllib.parse import parse_qs, urlparse from uuid import uuid4 from core.rag.index_processor.constant.index_type import IndexTechniqueType @@ -676,6 +678,51 @@ class TestDocumentSegmentIndexing: # Assert assert segment.hit_count == 5 + def test_document_segment_attachments_prefers_files_url_for_source_url(self, monkeypatch): + """Test attachment source URLs use FILES_URL before falling back to CONSOLE_API_URL.""" + # Arrange + segment = DocumentSegment( + tenant_id="tenant-1", + dataset_id="dataset-1", + document_id="document-1", + position=1, + content="Test", + word_count=1, + tokens=2, + created_by="user-1", + ) + segment.id = "segment-1" + attachment = SimpleNamespace( + id="upload-1", + name="image.png", + size=128, + extension="png", + mime_type="image/png", + ) + + monkeypatch.setattr("models.dataset.time.time", lambda: 1700000000) + monkeypatch.setattr("models.dataset.os.urandom", lambda _: b"\x01" * 16) + monkeypatch.setattr("models.dataset.dify_config.SECRET_KEY", "unit-secret") + monkeypatch.setattr("models.dataset.dify_config.FILES_URL", "https://files.example.com") + monkeypatch.setattr("models.dataset.dify_config.CONSOLE_API_URL", "https://console.example.com") + + with patch("models.dataset.db") as mock_db: + mock_db.session.execute.return_value.all.return_value = [(Mock(), attachment)] + + # Act + attachments = segment.attachments + + # Assert + assert len(attachments) == 1 + source_url = attachments[0]["source_url"] + parsed = urlparse(source_url) + query = parse_qs(parsed.query) + assert parsed.netloc == "files.example.com" + assert parsed.path == "/files/upload-1/image-preview" + assert query["timestamp"] == ["1700000000"] + assert query["nonce"] == ["01010101010101010101010101010101"] + assert query["sign"][0] + def test_document_segment_error_tracking(self): """Test document segment error tracking.""" # Arrange