From 32605181bdcd7022549cd672d98651842617288a Mon Sep 17 00:00:00 2001 From: wangxiaolei Date: Sun, 21 Dec 2025 16:53:37 +0800 Subject: [PATCH] feat: first use INTERNAL_FILES_URL first, then FILES_URL (#29962) --- api/core/rag/extractor/word_extractor.py | 9 ++--- .../core/rag/extractor/test_word_extractor.py | 33 +++++++++++++++++++ 2 files changed, 36 insertions(+), 6 deletions(-) diff --git a/api/core/rag/extractor/word_extractor.py b/api/core/rag/extractor/word_extractor.py index 044b118635..f67f613e9d 100644 --- a/api/core/rag/extractor/word_extractor.py +++ b/api/core/rag/extractor/word_extractor.py @@ -83,6 +83,7 @@ class WordExtractor(BaseExtractor): def _extract_images_from_docx(self, doc): image_count = 0 image_map = {} + base_url = dify_config.INTERNAL_FILES_URL or dify_config.FILES_URL for r_id, rel in doc.part.rels.items(): if "image" in rel.target_ref: @@ -121,8 +122,7 @@ class WordExtractor(BaseExtractor): used_at=naive_utc_now(), ) db.session.add(upload_file) - # Use r_id as key for external images since target_part is undefined - image_map[r_id] = f"![image]({dify_config.FILES_URL}/files/{upload_file.id}/file-preview)" + image_map[r_id] = f"![image]({base_url}/files/{upload_file.id}/file-preview)" else: image_ext = rel.target_ref.split(".")[-1] if image_ext is None: @@ -150,10 +150,7 @@ class WordExtractor(BaseExtractor): used_at=naive_utc_now(), ) db.session.add(upload_file) - # Use target_part as key for internal images - image_map[rel.target_part] = ( - f"![image]({dify_config.FILES_URL}/files/{upload_file.id}/file-preview)" - ) + image_map[rel.target_part] = f"![image]({base_url}/files/{upload_file.id}/file-preview)" db.session.commit() return image_map diff --git a/api/tests/unit_tests/core/rag/extractor/test_word_extractor.py b/api/tests/unit_tests/core/rag/extractor/test_word_extractor.py index fd0b0e2e44..3203aab8c3 100644 --- a/api/tests/unit_tests/core/rag/extractor/test_word_extractor.py +++ b/api/tests/unit_tests/core/rag/extractor/test_word_extractor.py @@ -132,3 +132,36 @@ def test_extract_images_from_docx(monkeypatch): # DB interactions should be recorded assert len(db_stub.session.added) == 2 assert db_stub.session.committed is True + + +def test_extract_images_from_docx_uses_internal_files_url(): + """Test that INTERNAL_FILES_URL takes precedence over FILES_URL for plugin access.""" + # Test the URL generation logic directly + from configs import dify_config + + # Mock the configuration values + original_files_url = getattr(dify_config, "FILES_URL", None) + original_internal_files_url = getattr(dify_config, "INTERNAL_FILES_URL", None) + + try: + # Set both URLs - INTERNAL should take precedence + dify_config.FILES_URL = "http://external.example.com" + dify_config.INTERNAL_FILES_URL = "http://internal.docker:5001" + + # Test the URL generation logic (same as in word_extractor.py) + upload_file_id = "test_file_id" + + # This is the pattern we fixed in the word extractor + base_url = dify_config.INTERNAL_FILES_URL or dify_config.FILES_URL + generated_url = f"{base_url}/files/{upload_file_id}/file-preview" + + # Verify that INTERNAL_FILES_URL is used instead of FILES_URL + assert "http://internal.docker:5001" in generated_url, f"Expected internal URL, got: {generated_url}" + assert "http://external.example.com" not in generated_url, f"Should not use external URL, got: {generated_url}" + + finally: + # Restore original values + if original_files_url is not None: + dify_config.FILES_URL = original_files_url + if original_internal_files_url is not None: + dify_config.INTERNAL_FILES_URL = original_internal_files_url