"""web/s3.py — thin boto3 wrapper for output uploads and presigned downloads. Used by the build pipeline (to upload generated PPTXs) and the download endpoint (to mint short-lived URLs the browser fetches directly from S3, bypassing the Flask server). Config (via .env / environment): S3_BUCKET, AWS_REGION, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY S3_OUTPUTS_PREFIX (default 'outputs/') PRESIGNED_URL_TTL (default 3600 seconds) The boto3 client is process-cached: first call constructs it, subsequent calls reuse. Thread-safe under boto3. """ from __future__ import annotations import os import threading from typing import Optional import boto3 from botocore.config import Config _PPTX_CONTENT_TYPE = ( "application/vnd.openxmlformats-officedocument.presentationml.presentation" ) _client = None _lock = threading.Lock() def _get_client(): """boto3 S3 client wired to the regional endpoint with SigV4. Pinning ``endpoint_url`` to the regional host (``s3..amazonaws.com``) is required for presigned URLs in any region except us-east-1 — otherwise AWS redirects the client from the global host to the regional one, but the redirected request fails signature verification because the host changed after the URL was signed. """ global _client if _client is not None: return _client with _lock: if _client is None: region = os.environ["AWS_REGION"] _client = boto3.client( "s3", region_name=region, aws_access_key_id=os.environ["AWS_ACCESS_KEY_ID"], aws_secret_access_key=os.environ["AWS_SECRET_ACCESS_KEY"], endpoint_url=f"https://s3.{region}.amazonaws.com", config=Config(signature_version="s3v4", s3={"addressing_style": "virtual"}), ) return _client def _bucket() -> str: return os.environ["S3_BUCKET"] def _outputs_prefix() -> str: return os.environ.get("S3_OUTPUTS_PREFIX", "outputs/") def upload_output(local_path: str, job_id: str) -> str: """Upload a generated PPTX. Returns the resulting S3 key. Sets Content-Type to the OOXML pptx mime so browsers infer the right save-as filename extension when fetched via presigned URL. """ key = f"{_outputs_prefix()}{job_id}.pptx" _get_client().upload_file( local_path, _bucket(), key, ExtraArgs={"ContentType": _PPTX_CONTENT_TYPE}, ) return key def presigned_get_url(key: str, ttl_seconds: Optional[int] = None, download_filename: Optional[str] = None) -> str: """Generate a short-lived signed URL for downloading an S3 object. *download_filename* sets a Content-Disposition header on the response so the browser saves the file with a friendly name (instead of the raw object key like '.pptx'). """ if ttl_seconds is None: ttl_seconds = int(os.environ.get("PRESIGNED_URL_TTL", "3600")) params = {"Bucket": _bucket(), "Key": key} if download_filename: params["ResponseContentDisposition"] = ( f'attachment; filename="{download_filename}"' ) return _get_client().generate_presigned_url( "get_object", Params=params, ExpiresIn=ttl_seconds, ) def delete_output(key: str) -> None: """Delete an output object from S3 (used for cleanup / job deletion).""" _get_client().delete_object(Bucket=_bucket(), Key=key)