feat: first use INTERNAL_FILES_URL first, then FILES_URL (#29962)

This commit is contained in:
wangxiaolei 2025-12-21 16:53:37 +08:00 committed by GitHub
parent 471fc94455
commit 32605181bd
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 36 additions and 6 deletions

View File

@ -83,6 +83,7 @@ class WordExtractor(BaseExtractor):
def _extract_images_from_docx(self, doc):
image_count = 0
image_map = {}
base_url = dify_config.INTERNAL_FILES_URL or dify_config.FILES_URL
for r_id, rel in doc.part.rels.items():
if "image" in rel.target_ref:
@ -121,8 +122,7 @@ class WordExtractor(BaseExtractor):
used_at=naive_utc_now(),
)
db.session.add(upload_file)
# Use r_id as key for external images since target_part is undefined
image_map[r_id] = f"![image]({dify_config.FILES_URL}/files/{upload_file.id}/file-preview)"
image_map[r_id] = f"![image]({base_url}/files/{upload_file.id}/file-preview)"
else:
image_ext = rel.target_ref.split(".")[-1]
if image_ext is None:
@ -150,10 +150,7 @@ class WordExtractor(BaseExtractor):
used_at=naive_utc_now(),
)
db.session.add(upload_file)
# Use target_part as key for internal images
image_map[rel.target_part] = (
f"![image]({dify_config.FILES_URL}/files/{upload_file.id}/file-preview)"
)
image_map[rel.target_part] = f"![image]({base_url}/files/{upload_file.id}/file-preview)"
db.session.commit()
return image_map

View File

@ -132,3 +132,36 @@ def test_extract_images_from_docx(monkeypatch):
# DB interactions should be recorded
assert len(db_stub.session.added) == 2
assert db_stub.session.committed is True
def test_extract_images_from_docx_uses_internal_files_url():
"""Test that INTERNAL_FILES_URL takes precedence over FILES_URL for plugin access."""
# Test the URL generation logic directly
from configs import dify_config
# Mock the configuration values
original_files_url = getattr(dify_config, "FILES_URL", None)
original_internal_files_url = getattr(dify_config, "INTERNAL_FILES_URL", None)
try:
# Set both URLs - INTERNAL should take precedence
dify_config.FILES_URL = "http://external.example.com"
dify_config.INTERNAL_FILES_URL = "http://internal.docker:5001"
# Test the URL generation logic (same as in word_extractor.py)
upload_file_id = "test_file_id"
# This is the pattern we fixed in the word extractor
base_url = dify_config.INTERNAL_FILES_URL or dify_config.FILES_URL
generated_url = f"{base_url}/files/{upload_file_id}/file-preview"
# Verify that INTERNAL_FILES_URL is used instead of FILES_URL
assert "http://internal.docker:5001" in generated_url, f"Expected internal URL, got: {generated_url}"
assert "http://external.example.com" not in generated_url, f"Should not use external URL, got: {generated_url}"
finally:
# Restore original values
if original_files_url is not None:
dify_config.FILES_URL = original_files_url
if original_internal_files_url is not None:
dify_config.INTERNAL_FILES_URL = original_internal_files_url