feat: enhance file upload process with content type detection and command building

This commit is contained in:
Harry 2026-02-05 18:00:45 +08:00
parent 306ef79526
commit cb7c086377
4 changed files with 32 additions and 7 deletions

View File

@ -10,6 +10,8 @@ from core.sandbox.inspector.base import SandboxFileSource
from core.sandbox.inspector.script_utils import (
build_detect_kind_command,
build_list_command,
build_upload_command,
guess_content_type,
parse_kind_output,
parse_list_output,
)
@ -129,8 +131,9 @@ class SandboxFileArchiveSource(SandboxFileSource):
sandbox_storage = SandboxFileService.get_storage()
is_file = kind == "file"
filename = (os.path.basename(path) or "file") if is_file else f"{export_name}.tar.gz"
export_key = SandboxFilePaths.export(self._tenant_id, self._app_id, self._sandbox_id, export_id, filename)
export_key = SandboxFilePaths.export(self._tenant_id, self._app_id, self._sandbox_id, export_id)
upload_url = sandbox_storage.get_upload_url(export_key, self._EXPORT_EXPIRES_IN_SECONDS)
content_type = guess_content_type(filename)
# Build pipeline: for directories, tar first then upload; for files, upload directly
archive_temp = f"/tmp/{export_id}.tar.gz"
@ -146,7 +149,7 @@ class SandboxFileArchiveSource(SandboxFileSource):
on=not is_file,
)
.add(
["curl", "-sf", "-X", "PUT", "-T", src_path, upload_url],
build_upload_command(src_path, upload_url, content_type=content_type),
error_message="Failed to upload file",
)
.add(["rm", "-f", archive_temp], on=not is_file)

View File

@ -9,6 +9,8 @@ from core.sandbox.inspector.base import SandboxFileSource
from core.sandbox.inspector.script_utils import (
build_detect_kind_command,
build_list_command,
build_upload_command,
guess_content_type,
parse_kind_output,
parse_list_output,
)
@ -88,11 +90,11 @@ class SandboxFileRuntimeSource(SandboxFileSource):
self._app_id,
self._sandbox_id,
export_id,
filename,
)
sandbox_storage = SandboxFileService.get_storage()
upload_url = sandbox_storage.get_upload_url(export_key, self._EXPORT_EXPIRES_IN_SECONDS)
content_type = guess_content_type(filename)
if kind == "dir":
archive_path = f"/tmp/{export_id}.tar.gz"
@ -104,7 +106,7 @@ class SandboxFileRuntimeSource(SandboxFileSource):
error_message="Failed to archive directory in sandbox",
)
.add(
["curl", "-s", "-f", "-X", "PUT", "-T", archive_path, upload_url],
build_upload_command(archive_path, upload_url, content_type=content_type),
error_message="Failed to upload directory archive from sandbox",
)
.execute(timeout=self._UPLOAD_TIMEOUT_SECONDS, raise_on_error=True)
@ -124,7 +126,7 @@ class SandboxFileRuntimeSource(SandboxFileSource):
(
pipeline(self._runtime)
.add(
["curl", "-s", "-f", "-X", "PUT", "-T", path, upload_url],
build_upload_command(path, upload_url, content_type=content_type),
error_message="Failed to upload file from sandbox",
)
.execute(timeout=self._UPLOAD_TIMEOUT_SECONDS, raise_on_error=True)

View File

@ -3,6 +3,7 @@
from __future__ import annotations
import json
import mimetypes
from typing import TypedDict, cast
_PYTHON_EXEC_CMD = 'if command -v python3 >/dev/null 2>&1; then py=python3; else py=python; fi; "$py" -c "$0" "$@"'
@ -96,3 +97,22 @@ def parse_kind_output(stdout: bytes, *, not_found_message: str) -> str:
if kind not in ("dir", "file"):
raise ValueError(not_found_message)
return kind
def guess_content_type(filename: str) -> str | None:
content_type, _ = mimetypes.guess_type(filename, strict=False)
if content_type is None:
return None
if content_type.startswith("text/"):
return f"{content_type}; charset=utf-8"
if content_type == "application/json":
return "application/json; charset=utf-8"
return content_type
def build_upload_command(src_path: str, upload_url: str, *, content_type: str | None) -> list[str]:
command = ["curl", "-s", "-f", "-X", "PUT", "-T", src_path]
if content_type:
command.extend(["-H", f"Content-Type: {content_type}"])
command.append(upload_url)
return command

View File

@ -11,9 +11,9 @@ class SandboxFilePaths:
"""Facade for generating sandbox file storage keys."""
@staticmethod
def export(tenant_id: str, app_id: str, sandbox_id: str, export_id: str, filename: str) -> str:
def export(tenant_id: str, app_id: str, sandbox_id: str, export_id: str) -> str:
"""sandbox_files/{tenant}/{app}/{sandbox}/{export_id}/{filename}"""
return f"sandbox_files/{tenant_id}/{app_id}/{sandbox_id}/{export_id}/{filename}"
return f"sandbox_files/{tenant_id}/{app_id}/{sandbox_id}/{export_id}"
@staticmethod
def archive(tenant_id: str, app_id: str, sandbox_id: str) -> str: