diff --git a/api/models/dataset.py b/api/models/dataset.py index b559654032..ebffcac2fb 100644 --- a/api/models/dataset.py +++ b/api/models/dataset.py @@ -784,7 +784,7 @@ class DocumentSegment(Base): text = self.content # For data before v0.10.0 - pattern = r"/files/([a-f0-9\-]+)/image-preview" + pattern = r"/files/([a-f0-9\-]+)/image-preview(?:\?.*?)?" matches = re.finditer(pattern, text) for match in matches: upload_file_id = match.group(1) @@ -796,11 +796,12 @@ class DocumentSegment(Base): encoded_sign = base64.urlsafe_b64encode(sign).decode() params = f"timestamp={timestamp}&nonce={nonce}&sign={encoded_sign}" - signed_url = f"{match.group(0)}?{params}" + base_url = f"/files/{upload_file_id}/image-preview" + signed_url = f"{base_url}?{params}" signed_urls.append((match.start(), match.end(), signed_url)) # For data after v0.10.0 - pattern = r"/files/([a-f0-9\-]+)/file-preview" + pattern = r"/files/([a-f0-9\-]+)/file-preview(?:\?.*?)?" matches = re.finditer(pattern, text) for match in matches: upload_file_id = match.group(1) @@ -812,7 +813,26 @@ class DocumentSegment(Base): encoded_sign = base64.urlsafe_b64encode(sign).decode() params = f"timestamp={timestamp}&nonce={nonce}&sign={encoded_sign}" - signed_url = f"{match.group(0)}?{params}" + base_url = f"/files/{upload_file_id}/file-preview" + signed_url = f"{base_url}?{params}" + signed_urls.append((match.start(), match.end(), signed_url)) + + # For tools directory - direct file formats (e.g., .png, .jpg, etc.) + pattern = r"/files/tools/([a-f0-9\-]+)\.([a-zA-Z0-9]+)(?:\?.*?)?" + matches = re.finditer(pattern, text) + for match in matches: + upload_file_id = match.group(1) + file_extension = match.group(2) + nonce = os.urandom(16).hex() + timestamp = str(int(time.time())) + data_to_sign = f"file-preview|{upload_file_id}|{timestamp}|{nonce}" + secret_key = dify_config.SECRET_KEY.encode() if dify_config.SECRET_KEY else b"" + sign = hmac.new(secret_key, data_to_sign.encode(), hashlib.sha256).digest() + encoded_sign = base64.urlsafe_b64encode(sign).decode() + + params = f"timestamp={timestamp}&nonce={nonce}&sign={encoded_sign}" + base_url = f"/files/tools/{upload_file_id}.{file_extension}" + signed_url = f"{base_url}?{params}" signed_urls.append((match.start(), match.end(), signed_url)) # Reconstruct the text with signed URLs