diff --git a/api/core/sandbox/inspector.py b/api/core/sandbox/inspector.py index 166b7ca434..d506f45da9 100644 --- a/api/core/sandbox/inspector.py +++ b/api/core/sandbox/inspector.py @@ -11,7 +11,8 @@ from uuid import UUID, uuid4 from core.sandbox.entities.files import SandboxFileDownloadTicket, SandboxFileNode from core.sandbox.manager import SandboxManager from core.sandbox.security.archive_signer import SandboxArchivePath -from core.sandbox.security.sandbox_file_signer import SandboxFileDownloadPath, SandboxFileSigner +from core.sandbox.security.sandbox_file_signer import SandboxFileDownloadPath +from core.sandbox.storage import sandbox_file_storage from core.virtual_environment.__base.exec import CommandExecutionError from core.virtual_environment.__base.helpers import execute from core.virtual_environment.__base.virtual_environment import VirtualEnvironment @@ -137,11 +138,7 @@ print(json.dumps(entries)) filename=filename, ) - upload_url = SandboxFileSigner.build_signed_url( - export_path=export_path, - expires_in=self._EXPORT_EXPIRES_IN_SECONDS, - action=SandboxFileSigner.OPERATION_UPLOAD, - ) + upload_url = sandbox_file_storage.get_upload_url(export_path, expires_in=self._EXPORT_EXPIRES_IN_SECONDS) if kind == "dir": archive_path = f"/tmp/{export_id}.tar.gz" @@ -182,11 +179,7 @@ print(json.dumps(entries)) except CommandExecutionError as exc: raise RuntimeError(str(exc)) from exc - download_url = SandboxFileSigner.build_signed_url( - export_path=export_path, - expires_in=self._EXPORT_EXPIRES_IN_SECONDS, - action=SandboxFileSigner.OPERATION_DOWNLOAD, - ) + download_url = sandbox_file_storage.get_download_url(export_path, expires_in=self._EXPORT_EXPIRES_IN_SECONDS) return SandboxFileDownloadTicket( download_url=download_url, expires_in=self._EXPORT_EXPIRES_IN_SECONDS, @@ -340,12 +333,10 @@ class SandboxFileArchiveSource(SandboxFileSource): extracted = tf.extractfile(member) if extracted is None: raise ValueError("File not found in sandbox archive") - storage.save(export_path.get_storage_key(), extracted.read()) + sandbox_file_storage.save(export_path, extracted.read()) - download_url = SandboxFileSigner.build_signed_url( - export_path=export_path, - expires_in=self._EXPORT_EXPIRES_IN_SECONDS, - action=SandboxFileSigner.OPERATION_DOWNLOAD, + download_url = sandbox_file_storage.get_download_url( + export_path, expires_in=self._EXPORT_EXPIRES_IN_SECONDS ) return SandboxFileDownloadTicket( download_url=download_url, @@ -408,12 +399,10 @@ class SandboxFileArchiveSource(SandboxFileSource): ti.size = int(m.size) out.addfile(ti, fileobj=extracted) - storage.save(export_path.get_storage_key(), Path(export_local).read_bytes()) + sandbox_file_storage.save(export_path, Path(export_local).read_bytes()) - download_url = SandboxFileSigner.build_signed_url( - export_path=export_path, - expires_in=self._EXPORT_EXPIRES_IN_SECONDS, - action=SandboxFileSigner.OPERATION_DOWNLOAD, + download_url = sandbox_file_storage.get_download_url( + export_path, expires_in=self._EXPORT_EXPIRES_IN_SECONDS ) return SandboxFileDownloadTicket( download_url=download_url, diff --git a/api/core/sandbox/storage/__init__.py b/api/core/sandbox/storage/__init__.py index 7206370c10..c7c405204e 100644 --- a/api/core/sandbox/storage/__init__.py +++ b/api/core/sandbox/storage/__init__.py @@ -1,4 +1,12 @@ from .archive_storage import ArchiveSandboxStorage +from .noop_storage import NoopSandboxStorage +from .sandbox_file_storage import SandboxFileStorage, sandbox_file_storage from .sandbox_storage import SandboxStorage -__all__ = ["ArchiveSandboxStorage", "SandboxStorage"] +__all__ = [ + "ArchiveSandboxStorage", + "NoopSandboxStorage", + "SandboxFileStorage", + "SandboxStorage", + "sandbox_file_storage", +] diff --git a/api/core/sandbox/storage/noop_storage.py b/api/core/sandbox/storage/noop_storage.py new file mode 100644 index 0000000000..d67cade98b --- /dev/null +++ b/api/core/sandbox/storage/noop_storage.py @@ -0,0 +1,18 @@ +from core.sandbox.storage.sandbox_storage import SandboxStorage +from core.virtual_environment.__base.virtual_environment import VirtualEnvironment + + +class NoopSandboxStorage(SandboxStorage): + """A no-op storage implementation that does nothing on mount/unmount.""" + + def mount(self, sandbox: VirtualEnvironment) -> bool: + return False + + def unmount(self, sandbox: VirtualEnvironment) -> bool: + return False + + def exists(self) -> bool: + return False + + def delete(self) -> None: + return diff --git a/api/core/sandbox/storage/sandbox_file_storage.py b/api/core/sandbox/storage/sandbox_file_storage.py new file mode 100644 index 0000000000..da7c17b402 --- /dev/null +++ b/api/core/sandbox/storage/sandbox_file_storage.py @@ -0,0 +1,72 @@ +from __future__ import annotations + +from typing import Any + +from core.sandbox.security.sandbox_file_signer import SandboxFileDownloadPath, SandboxFileSigner +from extensions.ext_redis import redis_client +from extensions.ext_storage import storage +from extensions.storage.base_storage import BaseStorage +from extensions.storage.cached_presign_storage import CachedPresignStorage +from extensions.storage.silent_storage import SilentStorage + + +class SandboxFileStorage: + _base_storage: BaseStorage + _storage: CachedPresignStorage + + def __init__(self, storage: BaseStorage, *, redis_client: Any) -> None: + self._base_storage = storage + self._storage = CachedPresignStorage( + storage=storage, + redis_client=redis_client, + cache_key_prefix="sandbox_file_downloads", + ) + + def save(self, download_path: SandboxFileDownloadPath, content: bytes) -> None: + self._storage.save(download_path.get_storage_key(), content) + + def get_download_url(self, download_path: SandboxFileDownloadPath, expires_in: int = 3600) -> str: + storage_key = download_path.get_storage_key() + try: + return self._storage.get_download_url(storage_key, expires_in) + except NotImplementedError: + return SandboxFileSigner.build_signed_url( + export_path=download_path, + expires_in=expires_in, + action=SandboxFileSigner.OPERATION_DOWNLOAD, + ) + + def get_upload_url(self, download_path: SandboxFileDownloadPath, expires_in: int = 3600) -> str: + storage_key = download_path.get_storage_key() + try: + return self._storage.get_upload_url(storage_key, expires_in) + except NotImplementedError: + return SandboxFileSigner.build_signed_url( + export_path=download_path, + expires_in=expires_in, + action=SandboxFileSigner.OPERATION_UPLOAD, + ) + + +class _LazySandboxFileStorage: + _instance: SandboxFileStorage | None + + def __init__(self) -> None: + self._instance = None + + def _get_instance(self) -> SandboxFileStorage: + if self._instance is None: + if not hasattr(storage, "storage_runner"): + raise RuntimeError( + "Storage is not initialized; call storage.init_app before using sandbox_file_storage" + ) + self._instance = SandboxFileStorage( + storage=SilentStorage(storage.storage_runner), redis_client=redis_client + ) + return self._instance + + def __getattr__(self, name: str): + return getattr(self._get_instance(), name) + + +sandbox_file_storage = _LazySandboxFileStorage() diff --git a/api/core/virtual_environment/__base/helpers.py b/api/core/virtual_environment/__base/helpers.py index 0b36485560..555dc880ca 100644 --- a/api/core/virtual_environment/__base/helpers.py +++ b/api/core/virtual_environment/__base/helpers.py @@ -176,8 +176,9 @@ class CommandPipeline: _steps: list[_PipelineStep] = field(default_factory=list) # pyright: ignore[reportUnknownVariableType] - def add(self, command: list[str], *, error_message: str = "Command failed") -> CommandPipeline: - self._steps.append(_PipelineStep(argv=command, error_message=error_message)) + def add(self, command: list[str], *, error_message: str = "Command failed", on: bool = True) -> CommandPipeline: + if on: + self._steps.append(_PipelineStep(argv=command, error_message=error_message)) return self def execute(self, *, timeout: float | None = 30, raise_on_error: bool = False) -> list[CommandResult]: diff --git a/api/core/zip_sandbox/__init__.py b/api/core/zip_sandbox/__init__.py new file mode 100644 index 0000000000..5d261b238a --- /dev/null +++ b/api/core/zip_sandbox/__init__.py @@ -0,0 +1,6 @@ +from .session import SandboxArchiveFile, ZipSandbox + +__all__ = [ + "SandboxArchiveFile", + "ZipSandbox", +] diff --git a/api/core/zip_sandbox/session.py b/api/core/zip_sandbox/session.py new file mode 100644 index 0000000000..cb10b5aef3 --- /dev/null +++ b/api/core/zip_sandbox/session.py @@ -0,0 +1,308 @@ +from __future__ import annotations + +import hashlib +import posixpath +from dataclasses import dataclass +from io import BytesIO +from pathlib import PurePosixPath +from types import TracebackType +from typing import Any +from urllib.parse import urlparse +from uuid import uuid4 + +from core.sandbox.builder import SandboxBuilder +from core.sandbox.entities.sandbox_type import SandboxType +from core.sandbox.manager import SandboxManager +from core.sandbox.sandbox import Sandbox +from core.sandbox.storage.noop_storage import NoopSandboxStorage +from core.virtual_environment.__base.exec import CommandExecutionError, PipelineExecutionError +from core.virtual_environment.__base.helpers import execute, pipeline +from core.virtual_environment.__base.virtual_environment import VirtualEnvironment +from services.sandbox.sandbox_provider_service import SandboxProviderService + + +@dataclass(frozen=True) +class SandboxArchiveFile: + file_path: str + size_bytes: int + sha256: str + + +class ZipSandbox: + """A sandbox specifically for archive (tar) operations. + + Usage: + with ZipSandbox(tenant_id=..., user_id=...) as zs: + zs.write_file("a.txt", b"hello") + archive = zs.tar() + zs.upload(path=archive.file_path, target_url=url) + # VM automatically released on exit + """ + + _DEFAULT_TIMEOUT_SECONDS = 60 * 5 + + def __init__( + self, + *, + tenant_id: str | None = None, + user_id: str | None = None, + app_id: str = "zip-sandbox", + sandbox_provider_type: str | None = None, + sandbox_provider_options: dict[str, Any] | None = None, + # For testing: allow injecting a VM directly + _vm: VirtualEnvironment | None = None, + ) -> None: + self._tenant_id = tenant_id + self._user_id = user_id + self._app_id = app_id + self._sandbox_provider_type = sandbox_provider_type + self._sandbox_provider_options = sandbox_provider_options + self._injected_vm = _vm + + self._sandbox: Sandbox | None = None + self._sandbox_id: str | None = None + self._vm: VirtualEnvironment | None = None + + def __enter__(self) -> ZipSandbox: + self._start() + return self + + def __exit__( + self, + exc_type: type[BaseException] | None, + exc: BaseException | None, + tb: TracebackType | None, + ) -> None: + self._stop() + + def _start(self) -> None: + if self._vm is not None: + raise RuntimeError("ZipSandbox already started") + + # If VM is injected (for testing), use it directly + if self._injected_vm is not None: + self._vm = self._injected_vm + self._sandbox_id = uuid4().hex + return + + if not self._tenant_id: + raise ValueError("tenant_id is required") + if not self._user_id: + raise ValueError("user_id is required") + + if self._sandbox_provider_type is None or self._sandbox_provider_options is None: + provider = SandboxProviderService.get_sandbox_provider(self._tenant_id) + provider_type = provider.provider_type + provider_options = dict(provider.config) + else: + provider_type = self._sandbox_provider_type + provider_options = dict(self._sandbox_provider_options) + + self._sandbox_id = uuid4().hex + + storage = NoopSandboxStorage() + self._sandbox = ( + SandboxBuilder(self._tenant_id, SandboxType(provider_type)) + .options(provider_options) + .user(self._user_id) + .app(self._app_id) + .storage(storage, assets_id="zip-sandbox") + .build() + ) + self._sandbox.wait_ready(timeout=60) + self._vm = self._sandbox.vm + + SandboxManager.register(self._sandbox_id, self._vm) + + def _stop(self) -> None: + if self._vm is None: + return + + if self._sandbox_id: + SandboxManager.unregister(self._sandbox_id) + + if self._sandbox is not None: + self._sandbox.release() + + self._vm = None + self._sandbox = None + self._sandbox_id = None + + @property + def vm(self) -> VirtualEnvironment: + if self._vm is None: + raise RuntimeError("ZipSandbox not started. Use 'with ZipSandbox(...) as zs:'") + return self._vm + + # ========== Path utilities ========== + + @staticmethod + def _normalize_workspace_path(path: str | None) -> str: + raw = (path or ".").strip() + if raw == "": + raw = "." + + p = PurePosixPath(raw) + if p.is_absolute(): + raise ValueError("path must be relative") + if any(part == ".." for part in p.parts): + raise ValueError("path must not contain '..'") + + normalized = str(p) + return "." if normalized in (".", "") else normalized + + @staticmethod + def _dest_path_for_url(dest_dir: str, url: str) -> str: + parsed = urlparse(url) + path = parsed.path or "" + name = posixpath.basename(path) + if not name: + name = "download.bin" + return posixpath.join(dest_dir, name) + + # ========== File operations ========== + + def write_file(self, path: str, data: bytes) -> None: + path = self._normalize_workspace_path(path) + if path in ("", "."): + raise ValueError("path must point to a file") + + try: + self.vm.upload_file(path, BytesIO(data)) + except Exception as exc: + raise RuntimeError(f"Failed to write file to sandbox: {exc}") from exc + + def read_file(self, path: str, *, max_bytes: int = 10 * 1024 * 1024) -> bytes: + path = self._normalize_workspace_path(path) + if max_bytes <= 0: + raise ValueError("max_bytes must be positive") + + try: + data = self.vm.download_file(path).getvalue() + except Exception as exc: + raise RuntimeError(f"Failed to read file from sandbox: {exc}") from exc + + if len(data) > max_bytes: + raise ValueError(f"File too large: {len(data)} > {max_bytes}") + return data + + # ========== Download operations ========== + + def download(self, urls: list[str], *, dest_dir: str = ".") -> list[str]: + if not urls: + return [] + + dest_dir = self._normalize_workspace_path(dest_dir) + + paths = [self._dest_path_for_url(dest_dir, u) for u in urls] + p = pipeline(self.vm) + p.add(["mkdir", "-p", dest_dir], error_message="Failed to create download directory") + for url, out_path in zip(urls, paths, strict=True): + p.add(["curl", "-fsSL", url, "-o", out_path], error_message="Failed to download file") + + try: + p.execute(timeout=self._DEFAULT_TIMEOUT_SECONDS, raise_on_error=True) + except Exception as exc: + raise RuntimeError(str(exc)) from exc + + return paths + + def download_archive(self, archive_url: str, *, path: str = "input.tar.gz") -> str: + path = self._normalize_workspace_path(path) + + dir_path = posixpath.dirname(path) + p = pipeline(self.vm) + if dir_path not in ("", "."): + p.add(["mkdir", "-p", dir_path], error_message=f"Failed to create archive download directory {dir_path}") + p.add(["curl", "-fsSL", archive_url, "-o", path], error_message=f"Failed to download archive to {path}") + try: + p.execute(timeout=self._DEFAULT_TIMEOUT_SECONDS, raise_on_error=True) + except Exception as exc: + raise RuntimeError(str(exc)) from exc + + return path + + # ========== Upload operations ========== + + def upload(self, *, path: str, target_url: str) -> None: + path = self._normalize_workspace_path(path) + if path in ("", "."): + raise ValueError("path must point to a file") + + try: + execute( + self.vm, + ["curl", "-fsSL", "-X", "PUT", "-T", path, target_url], + timeout=self._DEFAULT_TIMEOUT_SECONDS, + error_message="Failed to upload file from sandbox", + ) + except CommandExecutionError as exc: + raise RuntimeError(str(exc)) from exc + + # ========== Archive operations ========== + + def tar(self, src: str = ".", *, out_path: str | None = None) -> SandboxArchiveFile: + src = self._normalize_workspace_path(src) + if out_path is None: + out_path = f"{uuid4().hex}.tar" + out_path = self._normalize_workspace_path(out_path) + lower_out = out_path.lower() + if not (lower_out.endswith(".tar") or lower_out.endswith(".tar.gz") or lower_out.endswith(".tgz")): + raise ValueError("out_path must end with .tar/.tar.gz/.tgz") + + out_dir = posixpath.dirname(out_path) + is_gz = lower_out.endswith(".tar.gz") or lower_out.endswith(".tgz") + tar_flag = "-czf" if is_gz else "-cf" + is_cwd = src in (".", "") + + # Avoid "archive cannot contain itself" when archiving the current directory. + # Create the archive outside the workspace tree and move it into place. + tmp_archive = f"/tmp/{uuid4().hex}{'.tar.gz' if is_gz else '.tar'}" + + try: + ( + pipeline(self.vm) + .add( + ["mkdir", "-p", out_dir], + error_message="Failed to create archive output directory", + on=out_dir not in ("", "."), + ) + .add( + ["tar", tar_flag, tmp_archive, "-C", ".", "."], + error_message="Failed to create tar archive", + on=is_cwd, + ) + .add(["tar", tar_flag, tmp_archive, src], error_message="Failed to create tar archive", on=not is_cwd) + .add(["mv", "-f", tmp_archive, out_path], error_message="Failed to move tar archive into place") + .execute(timeout=self._DEFAULT_TIMEOUT_SECONDS, raise_on_error=True) + ) + except PipelineExecutionError as exc: + raise RuntimeError(str(exc)) from exc + + # Compute size + sha256 on host side (avoid requiring sha256sum in sandbox). + try: + data = self.vm.download_file(out_path).getvalue() + except Exception as exc: + raise RuntimeError(f"Failed to read tar result from sandbox: {exc}") from exc + + return SandboxArchiveFile(file_path=out_path, size_bytes=len(data), sha256=hashlib.sha256(data).hexdigest()) + + def untar(self, *, archive_path: str, dest_dir: str = "unpacked") -> str: + archive_path = self._normalize_workspace_path(archive_path) + dest_dir = self._normalize_workspace_path(dest_dir) + + lower = archive_path.lower() + is_gz = lower.endswith(".tar.gz") or lower.endswith(".tgz") + extract_flag = "-xzf" if is_gz else "-xf" + + try: + ( + pipeline(self.vm) + .add(["mkdir", "-p", dest_dir], error_message="Failed to create untar destination directory") + .add(["tar", extract_flag, archive_path, "-C", dest_dir], error_message="Failed to extract tar archive") + .execute(timeout=self._DEFAULT_TIMEOUT_SECONDS, raise_on_error=True) + ) + except PipelineExecutionError as exc: + raise RuntimeError(str(exc)) from exc + + return dest_dir