feat(zip-sandbox): special use of sandbox implementation

This commit is contained in:
Harry 2026-01-26 20:24:45 +08:00
parent 89eb7b17db
commit 9094f9d313
7 changed files with 426 additions and 24 deletions

View File

@ -11,7 +11,8 @@ from uuid import UUID, uuid4
from core.sandbox.entities.files import SandboxFileDownloadTicket, SandboxFileNode
from core.sandbox.manager import SandboxManager
from core.sandbox.security.archive_signer import SandboxArchivePath
from core.sandbox.security.sandbox_file_signer import SandboxFileDownloadPath, SandboxFileSigner
from core.sandbox.security.sandbox_file_signer import SandboxFileDownloadPath
from core.sandbox.storage import sandbox_file_storage
from core.virtual_environment.__base.exec import CommandExecutionError
from core.virtual_environment.__base.helpers import execute
from core.virtual_environment.__base.virtual_environment import VirtualEnvironment
@ -137,11 +138,7 @@ print(json.dumps(entries))
filename=filename,
)
upload_url = SandboxFileSigner.build_signed_url(
export_path=export_path,
expires_in=self._EXPORT_EXPIRES_IN_SECONDS,
action=SandboxFileSigner.OPERATION_UPLOAD,
)
upload_url = sandbox_file_storage.get_upload_url(export_path, expires_in=self._EXPORT_EXPIRES_IN_SECONDS)
if kind == "dir":
archive_path = f"/tmp/{export_id}.tar.gz"
@ -182,11 +179,7 @@ print(json.dumps(entries))
except CommandExecutionError as exc:
raise RuntimeError(str(exc)) from exc
download_url = SandboxFileSigner.build_signed_url(
export_path=export_path,
expires_in=self._EXPORT_EXPIRES_IN_SECONDS,
action=SandboxFileSigner.OPERATION_DOWNLOAD,
)
download_url = sandbox_file_storage.get_download_url(export_path, expires_in=self._EXPORT_EXPIRES_IN_SECONDS)
return SandboxFileDownloadTicket(
download_url=download_url,
expires_in=self._EXPORT_EXPIRES_IN_SECONDS,
@ -340,12 +333,10 @@ class SandboxFileArchiveSource(SandboxFileSource):
extracted = tf.extractfile(member)
if extracted is None:
raise ValueError("File not found in sandbox archive")
storage.save(export_path.get_storage_key(), extracted.read())
sandbox_file_storage.save(export_path, extracted.read())
download_url = SandboxFileSigner.build_signed_url(
export_path=export_path,
expires_in=self._EXPORT_EXPIRES_IN_SECONDS,
action=SandboxFileSigner.OPERATION_DOWNLOAD,
download_url = sandbox_file_storage.get_download_url(
export_path, expires_in=self._EXPORT_EXPIRES_IN_SECONDS
)
return SandboxFileDownloadTicket(
download_url=download_url,
@ -408,12 +399,10 @@ class SandboxFileArchiveSource(SandboxFileSource):
ti.size = int(m.size)
out.addfile(ti, fileobj=extracted)
storage.save(export_path.get_storage_key(), Path(export_local).read_bytes())
sandbox_file_storage.save(export_path, Path(export_local).read_bytes())
download_url = SandboxFileSigner.build_signed_url(
export_path=export_path,
expires_in=self._EXPORT_EXPIRES_IN_SECONDS,
action=SandboxFileSigner.OPERATION_DOWNLOAD,
download_url = sandbox_file_storage.get_download_url(
export_path, expires_in=self._EXPORT_EXPIRES_IN_SECONDS
)
return SandboxFileDownloadTicket(
download_url=download_url,

View File

@ -1,4 +1,12 @@
from .archive_storage import ArchiveSandboxStorage
from .noop_storage import NoopSandboxStorage
from .sandbox_file_storage import SandboxFileStorage, sandbox_file_storage
from .sandbox_storage import SandboxStorage
__all__ = ["ArchiveSandboxStorage", "SandboxStorage"]
__all__ = [
"ArchiveSandboxStorage",
"NoopSandboxStorage",
"SandboxFileStorage",
"SandboxStorage",
"sandbox_file_storage",
]

View File

@ -0,0 +1,18 @@
from core.sandbox.storage.sandbox_storage import SandboxStorage
from core.virtual_environment.__base.virtual_environment import VirtualEnvironment
class NoopSandboxStorage(SandboxStorage):
"""A no-op storage implementation that does nothing on mount/unmount."""
def mount(self, sandbox: VirtualEnvironment) -> bool:
return False
def unmount(self, sandbox: VirtualEnvironment) -> bool:
return False
def exists(self) -> bool:
return False
def delete(self) -> None:
return

View File

@ -0,0 +1,72 @@
from __future__ import annotations
from typing import Any
from core.sandbox.security.sandbox_file_signer import SandboxFileDownloadPath, SandboxFileSigner
from extensions.ext_redis import redis_client
from extensions.ext_storage import storage
from extensions.storage.base_storage import BaseStorage
from extensions.storage.cached_presign_storage import CachedPresignStorage
from extensions.storage.silent_storage import SilentStorage
class SandboxFileStorage:
_base_storage: BaseStorage
_storage: CachedPresignStorage
def __init__(self, storage: BaseStorage, *, redis_client: Any) -> None:
self._base_storage = storage
self._storage = CachedPresignStorage(
storage=storage,
redis_client=redis_client,
cache_key_prefix="sandbox_file_downloads",
)
def save(self, download_path: SandboxFileDownloadPath, content: bytes) -> None:
self._storage.save(download_path.get_storage_key(), content)
def get_download_url(self, download_path: SandboxFileDownloadPath, expires_in: int = 3600) -> str:
storage_key = download_path.get_storage_key()
try:
return self._storage.get_download_url(storage_key, expires_in)
except NotImplementedError:
return SandboxFileSigner.build_signed_url(
export_path=download_path,
expires_in=expires_in,
action=SandboxFileSigner.OPERATION_DOWNLOAD,
)
def get_upload_url(self, download_path: SandboxFileDownloadPath, expires_in: int = 3600) -> str:
storage_key = download_path.get_storage_key()
try:
return self._storage.get_upload_url(storage_key, expires_in)
except NotImplementedError:
return SandboxFileSigner.build_signed_url(
export_path=download_path,
expires_in=expires_in,
action=SandboxFileSigner.OPERATION_UPLOAD,
)
class _LazySandboxFileStorage:
_instance: SandboxFileStorage | None
def __init__(self) -> None:
self._instance = None
def _get_instance(self) -> SandboxFileStorage:
if self._instance is None:
if not hasattr(storage, "storage_runner"):
raise RuntimeError(
"Storage is not initialized; call storage.init_app before using sandbox_file_storage"
)
self._instance = SandboxFileStorage(
storage=SilentStorage(storage.storage_runner), redis_client=redis_client
)
return self._instance
def __getattr__(self, name: str):
return getattr(self._get_instance(), name)
sandbox_file_storage = _LazySandboxFileStorage()

View File

@ -176,8 +176,9 @@ class CommandPipeline:
_steps: list[_PipelineStep] = field(default_factory=list) # pyright: ignore[reportUnknownVariableType]
def add(self, command: list[str], *, error_message: str = "Command failed") -> CommandPipeline:
self._steps.append(_PipelineStep(argv=command, error_message=error_message))
def add(self, command: list[str], *, error_message: str = "Command failed", on: bool = True) -> CommandPipeline:
if on:
self._steps.append(_PipelineStep(argv=command, error_message=error_message))
return self
def execute(self, *, timeout: float | None = 30, raise_on_error: bool = False) -> list[CommandResult]:

View File

@ -0,0 +1,6 @@
from .session import SandboxArchiveFile, ZipSandbox
__all__ = [
"SandboxArchiveFile",
"ZipSandbox",
]

View File

@ -0,0 +1,308 @@
from __future__ import annotations
import hashlib
import posixpath
from dataclasses import dataclass
from io import BytesIO
from pathlib import PurePosixPath
from types import TracebackType
from typing import Any
from urllib.parse import urlparse
from uuid import uuid4
from core.sandbox.builder import SandboxBuilder
from core.sandbox.entities.sandbox_type import SandboxType
from core.sandbox.manager import SandboxManager
from core.sandbox.sandbox import Sandbox
from core.sandbox.storage.noop_storage import NoopSandboxStorage
from core.virtual_environment.__base.exec import CommandExecutionError, PipelineExecutionError
from core.virtual_environment.__base.helpers import execute, pipeline
from core.virtual_environment.__base.virtual_environment import VirtualEnvironment
from services.sandbox.sandbox_provider_service import SandboxProviderService
@dataclass(frozen=True)
class SandboxArchiveFile:
file_path: str
size_bytes: int
sha256: str
class ZipSandbox:
"""A sandbox specifically for archive (tar) operations.
Usage:
with ZipSandbox(tenant_id=..., user_id=...) as zs:
zs.write_file("a.txt", b"hello")
archive = zs.tar()
zs.upload(path=archive.file_path, target_url=url)
# VM automatically released on exit
"""
_DEFAULT_TIMEOUT_SECONDS = 60 * 5
def __init__(
self,
*,
tenant_id: str | None = None,
user_id: str | None = None,
app_id: str = "zip-sandbox",
sandbox_provider_type: str | None = None,
sandbox_provider_options: dict[str, Any] | None = None,
# For testing: allow injecting a VM directly
_vm: VirtualEnvironment | None = None,
) -> None:
self._tenant_id = tenant_id
self._user_id = user_id
self._app_id = app_id
self._sandbox_provider_type = sandbox_provider_type
self._sandbox_provider_options = sandbox_provider_options
self._injected_vm = _vm
self._sandbox: Sandbox | None = None
self._sandbox_id: str | None = None
self._vm: VirtualEnvironment | None = None
def __enter__(self) -> ZipSandbox:
self._start()
return self
def __exit__(
self,
exc_type: type[BaseException] | None,
exc: BaseException | None,
tb: TracebackType | None,
) -> None:
self._stop()
def _start(self) -> None:
if self._vm is not None:
raise RuntimeError("ZipSandbox already started")
# If VM is injected (for testing), use it directly
if self._injected_vm is not None:
self._vm = self._injected_vm
self._sandbox_id = uuid4().hex
return
if not self._tenant_id:
raise ValueError("tenant_id is required")
if not self._user_id:
raise ValueError("user_id is required")
if self._sandbox_provider_type is None or self._sandbox_provider_options is None:
provider = SandboxProviderService.get_sandbox_provider(self._tenant_id)
provider_type = provider.provider_type
provider_options = dict(provider.config)
else:
provider_type = self._sandbox_provider_type
provider_options = dict(self._sandbox_provider_options)
self._sandbox_id = uuid4().hex
storage = NoopSandboxStorage()
self._sandbox = (
SandboxBuilder(self._tenant_id, SandboxType(provider_type))
.options(provider_options)
.user(self._user_id)
.app(self._app_id)
.storage(storage, assets_id="zip-sandbox")
.build()
)
self._sandbox.wait_ready(timeout=60)
self._vm = self._sandbox.vm
SandboxManager.register(self._sandbox_id, self._vm)
def _stop(self) -> None:
if self._vm is None:
return
if self._sandbox_id:
SandboxManager.unregister(self._sandbox_id)
if self._sandbox is not None:
self._sandbox.release()
self._vm = None
self._sandbox = None
self._sandbox_id = None
@property
def vm(self) -> VirtualEnvironment:
if self._vm is None:
raise RuntimeError("ZipSandbox not started. Use 'with ZipSandbox(...) as zs:'")
return self._vm
# ========== Path utilities ==========
@staticmethod
def _normalize_workspace_path(path: str | None) -> str:
raw = (path or ".").strip()
if raw == "":
raw = "."
p = PurePosixPath(raw)
if p.is_absolute():
raise ValueError("path must be relative")
if any(part == ".." for part in p.parts):
raise ValueError("path must not contain '..'")
normalized = str(p)
return "." if normalized in (".", "") else normalized
@staticmethod
def _dest_path_for_url(dest_dir: str, url: str) -> str:
parsed = urlparse(url)
path = parsed.path or ""
name = posixpath.basename(path)
if not name:
name = "download.bin"
return posixpath.join(dest_dir, name)
# ========== File operations ==========
def write_file(self, path: str, data: bytes) -> None:
path = self._normalize_workspace_path(path)
if path in ("", "."):
raise ValueError("path must point to a file")
try:
self.vm.upload_file(path, BytesIO(data))
except Exception as exc:
raise RuntimeError(f"Failed to write file to sandbox: {exc}") from exc
def read_file(self, path: str, *, max_bytes: int = 10 * 1024 * 1024) -> bytes:
path = self._normalize_workspace_path(path)
if max_bytes <= 0:
raise ValueError("max_bytes must be positive")
try:
data = self.vm.download_file(path).getvalue()
except Exception as exc:
raise RuntimeError(f"Failed to read file from sandbox: {exc}") from exc
if len(data) > max_bytes:
raise ValueError(f"File too large: {len(data)} > {max_bytes}")
return data
# ========== Download operations ==========
def download(self, urls: list[str], *, dest_dir: str = ".") -> list[str]:
if not urls:
return []
dest_dir = self._normalize_workspace_path(dest_dir)
paths = [self._dest_path_for_url(dest_dir, u) for u in urls]
p = pipeline(self.vm)
p.add(["mkdir", "-p", dest_dir], error_message="Failed to create download directory")
for url, out_path in zip(urls, paths, strict=True):
p.add(["curl", "-fsSL", url, "-o", out_path], error_message="Failed to download file")
try:
p.execute(timeout=self._DEFAULT_TIMEOUT_SECONDS, raise_on_error=True)
except Exception as exc:
raise RuntimeError(str(exc)) from exc
return paths
def download_archive(self, archive_url: str, *, path: str = "input.tar.gz") -> str:
path = self._normalize_workspace_path(path)
dir_path = posixpath.dirname(path)
p = pipeline(self.vm)
if dir_path not in ("", "."):
p.add(["mkdir", "-p", dir_path], error_message=f"Failed to create archive download directory {dir_path}")
p.add(["curl", "-fsSL", archive_url, "-o", path], error_message=f"Failed to download archive to {path}")
try:
p.execute(timeout=self._DEFAULT_TIMEOUT_SECONDS, raise_on_error=True)
except Exception as exc:
raise RuntimeError(str(exc)) from exc
return path
# ========== Upload operations ==========
def upload(self, *, path: str, target_url: str) -> None:
path = self._normalize_workspace_path(path)
if path in ("", "."):
raise ValueError("path must point to a file")
try:
execute(
self.vm,
["curl", "-fsSL", "-X", "PUT", "-T", path, target_url],
timeout=self._DEFAULT_TIMEOUT_SECONDS,
error_message="Failed to upload file from sandbox",
)
except CommandExecutionError as exc:
raise RuntimeError(str(exc)) from exc
# ========== Archive operations ==========
def tar(self, src: str = ".", *, out_path: str | None = None) -> SandboxArchiveFile:
src = self._normalize_workspace_path(src)
if out_path is None:
out_path = f"{uuid4().hex}.tar"
out_path = self._normalize_workspace_path(out_path)
lower_out = out_path.lower()
if not (lower_out.endswith(".tar") or lower_out.endswith(".tar.gz") or lower_out.endswith(".tgz")):
raise ValueError("out_path must end with .tar/.tar.gz/.tgz")
out_dir = posixpath.dirname(out_path)
is_gz = lower_out.endswith(".tar.gz") or lower_out.endswith(".tgz")
tar_flag = "-czf" if is_gz else "-cf"
is_cwd = src in (".", "")
# Avoid "archive cannot contain itself" when archiving the current directory.
# Create the archive outside the workspace tree and move it into place.
tmp_archive = f"/tmp/{uuid4().hex}{'.tar.gz' if is_gz else '.tar'}"
try:
(
pipeline(self.vm)
.add(
["mkdir", "-p", out_dir],
error_message="Failed to create archive output directory",
on=out_dir not in ("", "."),
)
.add(
["tar", tar_flag, tmp_archive, "-C", ".", "."],
error_message="Failed to create tar archive",
on=is_cwd,
)
.add(["tar", tar_flag, tmp_archive, src], error_message="Failed to create tar archive", on=not is_cwd)
.add(["mv", "-f", tmp_archive, out_path], error_message="Failed to move tar archive into place")
.execute(timeout=self._DEFAULT_TIMEOUT_SECONDS, raise_on_error=True)
)
except PipelineExecutionError as exc:
raise RuntimeError(str(exc)) from exc
# Compute size + sha256 on host side (avoid requiring sha256sum in sandbox).
try:
data = self.vm.download_file(out_path).getvalue()
except Exception as exc:
raise RuntimeError(f"Failed to read tar result from sandbox: {exc}") from exc
return SandboxArchiveFile(file_path=out_path, size_bytes=len(data), sha256=hashlib.sha256(data).hexdigest())
def untar(self, *, archive_path: str, dest_dir: str = "unpacked") -> str:
archive_path = self._normalize_workspace_path(archive_path)
dest_dir = self._normalize_workspace_path(dest_dir)
lower = archive_path.lower()
is_gz = lower.endswith(".tar.gz") or lower.endswith(".tgz")
extract_flag = "-xzf" if is_gz else "-xf"
try:
(
pipeline(self.vm)
.add(["mkdir", "-p", dest_dir], error_message="Failed to create untar destination directory")
.add(["tar", extract_flag, archive_path, "-C", dest_dir], error_message="Failed to extract tar archive")
.execute(timeout=self._DEFAULT_TIMEOUT_SECONDS, raise_on_error=True)
)
except PipelineExecutionError as exc:
raise RuntimeError(str(exc)) from exc
return dest_dir