From 149591812a54cd832409010322103d24130380f0 Mon Sep 17 00:00:00 2001
From: Varshith Bathini <varshith15@gmail.com>
Date: Mon, 29 Dec 2025 17:42:59 +0000
Subject: [PATCH 1/6] feat: benchmark

---
 .gitignore                                    |   2 +
 README.md                                     |  46 ++
 benchmark.py                                  | 542 ++++++++++++++++++
 pyproject.toml                                |   5 +
 .../krea_realtime_video/docs/usage.md         |   2 +-
 .../core/pipelines/longlive/docs/usage.md     |   2 +-
 .../pipelines/streamdiffusionv2/docs/usage.md |   2 +-
 uv.lock                                       |  28 +
 8 files changed, 626 insertions(+), 3 deletions(-)
 create mode 100644 benchmark.py

diff --git a/.gitignore b/.gitignore
index 62928309..60b4c7e2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -16,6 +16,8 @@ src/scope/core/pipelines/**/*.mp4
 
 notes/
 
+benchmark_*.json
+
 # Cursor IDE files
 .cursorrules
 .cursorignore
diff --git a/README.md b/README.md
index 58251d98..e513f129 100644
--- a/README.md
+++ b/README.md
@@ -131,6 +131,52 @@ After your first generation you can:
 - Use [LoRAs](./docs/lora.md) to customize the concepts and styles used in your generations.
 - Use [Spout](./docs/spout.md) (Windows only) to share real-time video between Scope and other local applications.
 
+## Benchmarking
+
+Scope includes a comprehensive benchmarking suite to test pipeline performance across different configurations and hardware setups. This is useful for:
+
+- Understanding performance characteristics of different GPUs (H100, A6000, 4090, etc.)
+- Determining optimal configurations (resolution, denoising steps) for your hardware
+- Identifying optimization opportunities
+
+### Quick Start
+
+Install benchmark dependencies:
+
+```bash
+uv sync --group benchmark
+```
+
+Run a comprehensive benchmark (all pipelines, all configurations):
+
+```bash
+uv run benchmark.py
+```
+
+### Example Usage
+
+```bash
+# Benchmark specific pipelines
+uv run benchmark.py --pipelines streamdiffusionv2 longlive
+
+# Custom resolutions
+uv run benchmark.py --resolutions 480x832 768x1344
+
+# Custom iterations (defaults: warmup=10, iterations=100)
+uv run benchmark.py --warmup 5 --iterations 50
+
+# Save results to specific file
+uv run benchmark.py --output h100_results.json
+```
+
+### Output
+
+The benchmark generates a JSON file with:
+- Hardware specifications (GPU, CPU, memory)
+- Average performance metrics per configuration
+- Peak resource utilization (VRAM, GPU utilization, CPU usage)
+
+
 ## Firewalls
 
 If you run Scope in a cloud environment with restrictive firewall settings (eg. Runpod), Scope supports using [TURN servers](https://webrtc.org/getting-started/turn-server) to establish a connection between your browser and the streaming server.
diff --git a/benchmark.py b/benchmark.py
new file mode 100644
index 00000000..726fd5fe
--- /dev/null
+++ b/benchmark.py
@@ -0,0 +1,542 @@
+#!/usr/bin/env python3
+"""
+Simple Benchmarking Script for Scope Pipelines.
+
+Usage:
+    uv run benchmark.py [options]
+"""
+
+import argparse
+import gc
+import json
+import platform
+import threading
+import time
+import statistics
+from datetime import datetime
+from pathlib import Path
+from typing import Any
+
+import psutil
+import torch
+from omegaconf import OmegaConf
+
+# Optional dependencies
+try:
+    import cpuinfo
+except ImportError:
+    cpuinfo = None
+
+try:
+    import pynvml
+    PYNVML_AVAILABLE = True
+except ImportError:
+    PYNVML_AVAILABLE = False
+
+
+# Scope imports
+from scope.core.config import get_model_file_path, get_models_dir
+from scope.core.pipelines.registry import PipelineRegistry
+from scope.core.pipelines.utils import Quantization
+
+
+# =================================================================================================
+# HARDWARE INFO
+# =================================================================================================
+
+class HardwareInfo:
+    """Collects and stores hardware information."""
+
+    def __init__(self):
+        self._info = self._collect_info()
+
+    def _collect_info(self) -> dict[str, Any]:
+        return {
+            "gpu": self._get_gpu_info(),
+            "cpu": self._get_cpu_info(),
+            "memory": self._get_memory_info(),
+            "platform": self._get_platform_info(),
+        }
+
+    def _get_gpu_info(self) -> dict[str, Any]:
+        gpu_info = {"available": torch.cuda.is_available(), "count": 0, "devices": []}
+        if not torch.cuda.is_available():
+            return gpu_info
+
+        gpu_info["count"] = torch.cuda.device_count()
+        gpu_info["cuda_version"] = torch.version.cuda
+
+        if PYNVML_AVAILABLE:
+            try:
+                pynvml.nvmlInit()
+                for i in range(gpu_info["count"]):
+                    handle = pynvml.nvmlDeviceGetHandleByIndex(i)
+                    name = pynvml.nvmlDeviceGetName(handle)
+                    if isinstance(name, bytes): name = name.decode("utf-8")
+
+                    mem = pynvml.nvmlDeviceGetMemoryInfo(handle)
+                    driver = pynvml.nvmlSystemGetDriverVersion()
+                    if isinstance(driver, bytes): driver = driver.decode("utf-8")
+
+                    gpu_info["devices"].append({
+                        "index": i,
+                        "name": name,
+                        "memory_total_gb": mem.total / (1024**3),
+                        "driver_version": driver,
+                    })
+                pynvml.nvmlShutdown()
+            except Exception:
+                pass
+
+        if not gpu_info["devices"]:
+            for i in range(gpu_info["count"]):
+                props = torch.cuda.get_device_properties(i)
+                gpu_info["devices"].append({
+                    "index": i,
+                    "name": props.name,
+                    "memory_total_gb": props.total_memory / (1024**3),
+                })
+
+        return gpu_info
+
+    def _get_cpu_info(self) -> dict[str, Any]:
+        return {
+            "physical_cores": psutil.cpu_count(logical=False),
+            "logical_cores": psutil.cpu_count(logical=True),
+            "model": platform.processor(),
+        }
+
+    def _get_memory_info(self) -> dict[str, Any]:
+        mem = psutil.virtual_memory()
+        return {"total_gb": mem.total / (1024**3), "available_gb": mem.available / (1024**3)}
+
+    def _get_platform_info(self) -> dict[str, Any]:
+        return {
+            "system": platform.system(),
+            "release": platform.release(),
+            "python_version": platform.python_version(),
+            "pytorch_version": torch.__version__,
+        }
+
+    def to_dict(self) -> dict[str, Any]:
+        return self._info
+
+    def get_primary_gpu_vram_gb(self) -> float:
+        if not self._info["gpu"]["available"] or not self._info["gpu"]["devices"]:
+            return 0.0
+        return self._info["gpu"]["devices"][0]["memory_total_gb"]
+
+
+# =================================================================================================
+# RESOURCE MONITOR
+# =================================================================================================
+
+class ResourceMonitor:
+    def __init__(self, interval_ms: int = 100, device_index: int = 0):
+        self.interval_ms = interval_ms
+        self.device_index = device_index
+        self._monitoring = False
+        self._thread = None
+        self._samples = []
+        self._lock = threading.Lock()
+        self._process = psutil.Process()
+        self._pynvml_initialized = False
+        self._gpu_handle = None
+
+        if PYNVML_AVAILABLE and torch.cuda.is_available():
+            try:
+                pynvml.nvmlInit()
+                self._gpu_handle = pynvml.nvmlDeviceGetHandleByIndex(device_index)
+                self._pynvml_initialized = True
+            except Exception:
+                pass
+
+    def start(self):
+        if self._monitoring: return
+        self._monitoring = True
+        self._samples = []
+        self._thread = threading.Thread(target=self._monitor_loop, daemon=True)
+        self._thread.start()
+
+    def stop(self):
+        if not self._monitoring: return
+        self._monitoring = False
+        if self._thread:
+            self._thread.join(timeout=2.0)
+            self._thread = None
+
+    def _monitor_loop(self):
+        while self._monitoring:
+            sample = self._collect_sample()
+            with self._lock:
+                self._samples.append(sample)
+            time.sleep(self.interval_ms / 1000.0)
+
+    def _collect_sample(self) -> dict[str, Any]:
+        sample = {}
+        if torch.cuda.is_available():
+            try:
+                sample["gpu_memory_allocated_gb"] = torch.cuda.memory_allocated(self.device_index) / (1024**3)
+                if self._pynvml_initialized and self._gpu_handle:
+                    util = pynvml.nvmlDeviceGetUtilizationRates(self._gpu_handle)
+                    sample["gpu_utilization_percent"] = util.gpu
+            except Exception:
+                pass
+
+        try:
+            sample["system_cpu_percent"] = psutil.cpu_percent()
+        except Exception:
+            pass
+        return sample
+
+    def get_statistics(self) -> dict[str, float]:
+        with self._lock: samples = self._samples.copy()
+        if not samples: return {}
+
+        stats = {}
+        keys = ["gpu_memory_allocated_gb", "gpu_utilization_percent", "system_cpu_percent"]
+        for key in keys:
+            values = [s[key] for s in samples if key in s]
+            if values:
+                stats[f"{key}_avg"] = sum(values) / len(values)
+                stats[f"{key}_max"] = max(values)
+        return stats
+
+    def cleanup(self):
+        self.stop()
+        if self._pynvml_initialized:
+            try: pynvml.nvmlShutdown()
+            except Exception: pass
+
+
+# =================================================================================================
+# CONFIGURATION MATRIX
+# =================================================================================================
+
+class ConfigurationMatrix:
+    # Default resolutions to test
+    STANDARD_RESOLUTIONS = [
+        (320, 576),
+        (480, 832),
+        (512, 512),
+        (576, 1024),
+        (768, 1344),
+    ]
+
+    # Defaults (Single run per resolution)
+    DEFAULT_PROMPT = "A realistic video of a serene landscape with rolling hills, a clear blue sky, and a gentle stream."
+
+    PIPELINE_CONSTRAINTS = {
+        "krea_realtime_video": {
+            "min_vram_gb": 32,
+            "high_res_vram_gb": 40,
+            "high_res_threshold": (480, 832),
+        },
+    }
+
+    def __init__(self, hardware_vram_gb: float, pipelines=None, resolutions=None, steps=None):
+        self.hardware_vram_gb = hardware_vram_gb
+        self.selected_pipelines = pipelines
+        self.custom_resolutions = resolutions
+        self.steps = steps or [4] # Default to 4 if not specified
+
+    def build(self) -> list[dict]:
+        all_pipelines = PipelineRegistry.list_pipelines()
+
+        if self.selected_pipelines:
+            pipelines = [p for p in all_pipelines if p in self.selected_pipelines]
+        else:
+            pipelines = [p for p in all_pipelines if p != "passthrough"]
+
+        configurations = []
+        for pid in pipelines:
+            if not self._check_constraints(pid):
+                print(f"Skipping {pid}: insufficient VRAM ({self.hardware_vram_gb:.1f}GB)")
+                continue
+
+            # Determine resolutions
+            resolutions = self._get_resolutions(pid)
+
+            for h, w in resolutions:
+                config = {
+                    "pipeline_id": pid,
+                    "height": h,
+                    "width": w,
+                    "denoising_steps": self.steps,
+                    "prompt": self.DEFAULT_PROMPT,
+                }
+                configurations.append(config)
+
+        return configurations
+
+    def _check_constraints(self, pid: str) -> bool:
+        constraints = self.PIPELINE_CONSTRAINTS.get(pid, {})
+        return self.hardware_vram_gb >= constraints.get("min_vram_gb", 0)
+
+    def _get_resolutions(self, pid: str) -> list[tuple[int, int]]:
+        if self.custom_resolutions:
+            return self.custom_resolutions
+
+        # Default config for the pipeline
+        pipeline_class = PipelineRegistry.get(pid)
+        if not pipeline_class: return []
+        default_cfg = pipeline_class.get_config_class()()
+
+        # Start with default resolution
+        res_set = {(default_cfg.height, default_cfg.width)}
+
+        # Add standard ones that fit VRAM constraints
+        constraints = self.PIPELINE_CONSTRAINTS.get(pid, {})
+        high_res_vram = constraints.get("high_res_vram_gb")
+        threshold = constraints.get("high_res_threshold")
+
+        for h, w in self.STANDARD_RESOLUTIONS:
+            if high_res_vram and threshold:
+                th_h, th_w = threshold
+                if (h > th_h or w > th_w) and self.hardware_vram_gb < high_res_vram:
+                    continue
+            res_set.add((h, w))
+
+        return sorted(list(res_set))
+
+
+# =================================================================================================
+# BENCHMARK RUNNER
+# =================================================================================================
+
+class BenchmarkRunner:
+    def __init__(self, warmup_iterations=2, iterations=5, compile_model=False):
+        self.warmup_iterations = warmup_iterations
+        self.iterations = iterations
+        self.compile_model = compile_model
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+    def run_config(self, config: dict) -> dict:
+        pipeline_id = config["pipeline_id"]
+        print(f"\n--- Benchmarking {pipeline_id} [{config['height']}x{config['width']}] ---")
+
+        pipeline = None
+        try:
+            pipeline = self._init_pipeline(config)
+            inputs = {"prompts": [{"text": config["prompt"], "weight": 100}]}
+
+            # Warmup Phase
+            if self.warmup_iterations > 0:
+                print(f"Warmup ({self.warmup_iterations} iterations)...")
+                for _ in range(self.warmup_iterations):
+                    pipeline(**inputs)
+                self._clear_memory()
+
+            # Measurement Phase
+            print(f"Measuring ({self.iterations} iterations)...")
+            monitor = ResourceMonitor()
+            latencies = []
+            frame_counts = []
+
+            monitor.start()
+            for _ in range(self.iterations):
+                if torch.cuda.is_available():
+                    torch.cuda.reset_peak_memory_stats()
+
+                t0 = time.time()
+                output = pipeline(**inputs)
+                latencies.append(time.time() - t0)
+
+                # Check output for frame count (batch size)
+                # Some pipelines return a tensor (T, C, H, W) or (B, T, C, H, W)
+                # If it's 4D (T, C, H, W), dim 0 is frames.
+                # If it's 5D (B, T, C, H, W), dim 1 is frames * batch size.
+                current_frames = 1
+                if hasattr(output, "shape") and len(output.shape) >= 1:
+                    current_frames = output.shape[0]
+                frame_counts.append(current_frames)
+
+            monitor.stop()
+            resource_stats = monitor.get_statistics()
+            monitor.cleanup()
+
+            # Metrics Calculation
+            if not latencies:
+                return {"error": "No successful iterations"}
+
+            avg_latency = statistics.mean(latencies)
+            min_latency = min(latencies)
+            max_latency = max(latencies)
+            jitter = statistics.stdev(latencies) if len(latencies) > 1 else 0.0
+
+            # Calculate FPS based on frames generated per call
+            avg_frames_per_call = statistics.mean(frame_counts) if frame_counts else 1.0
+
+            fps_avg = avg_frames_per_call / avg_latency if avg_latency > 0 else 0
+            fps_min = avg_frames_per_call / max_latency if max_latency > 0 else 0
+            fps_max = avg_frames_per_call / min_latency if min_latency > 0 else 0
+
+            results = {
+                "fps_avg": round(fps_avg, 2),
+                "fps_min": round(fps_min, 2),
+                "fps_max": round(fps_max, 2),
+                "latency_avg_sec": round(avg_latency, 4),
+                "latency_min_sec": round(min_latency, 4),
+                "latency_max_sec": round(max_latency, 4),
+                "jitter_sec": round(jitter, 6),
+                **resource_stats
+            }
+
+            print(f"-> FPS: {results['fps_avg']} | Latency: {results['latency_avg_sec']}s | Jitter: {results['jitter_sec']}s")
+            return results
+
+        except Exception as e:
+            print(f"ERROR: {e}")
+            return {"error": str(e)}
+        finally:
+            del pipeline
+            self._clear_memory()
+
+    def _init_pipeline(self, config: dict):
+        pid = config["pipeline_id"]
+        pipeline_class = PipelineRegistry.get(pid)
+        if not pipeline_class: raise ValueError(f"Unknown pipeline: {pid}")
+
+        # Path Logic
+        model_dir = Path("src/scope/core/pipelines") / pid
+        if not model_dir.exists(): # Handle running from src vs root
+             model_dir = Path(__file__).parent / "src/scope/core/pipelines" / pid
+
+        model_config = OmegaConf.load(model_dir / "model.yaml")
+        pipeline_config = {
+            "model_dir": str(get_models_dir()),
+            "model_config": model_config,
+            "height": config["height"],
+            "width": config["width"],
+            "denoising_steps": config["denoising_steps"],
+        }
+
+        # Hardcoded paths matching original test scripts
+        def model_path(p): return str(get_model_file_path(p))
+        wan_enc = model_path("WanVideo_comfy/umt5-xxl-enc-fp8_e4m3fn.safetensors")
+        wan_tok = model_path("Wan2.1-T2V-1.3B/google/umt5-xxl")
+
+        paths = {}
+        if pid == "streamdiffusionv2":
+            paths = {"generator_path": model_path("StreamDiffusionV2/wan_causal_dmd_v2v/model.pt")}
+        elif pid == "longlive":
+            paths = {
+                "generator_path": model_path("LongLive-1.3B/models/longlive_base.pt"),
+                "lora_path": model_path("LongLive-1.3B/models/lora.pt")
+            }
+        elif pid == "krea_realtime_video":
+            paths = {
+                "generator_path": model_path("krea-realtime-video/krea-realtime-video-14b.safetensors"),
+                "vae_path": model_path("Wan2.1-T2V-1.3B/Wan2.1_VAE.pth")
+            }
+        elif pid == "reward_forcing":
+            paths = {"generator_path": model_path("Reward-Forcing-T2V-1.3B/rewardforcing.pt")}
+
+        pipeline_config.update(paths)
+        if "text_encoder_path" not in pipeline_config: pipeline_config["text_encoder_path"] = wan_enc
+        if "tokenizer_path" not in pipeline_config: pipeline_config["tokenizer_path"] = wan_tok
+
+        # Init
+        quantization = Quantization.FP8_E4M3FN if pid == "krea_realtime_video" else None
+        args = {
+            "config": OmegaConf.create(pipeline_config),
+            "device": self.device,
+            "dtype": torch.bfloat16
+        }
+        if quantization:
+            args.update({"quantization": quantization})
+
+        # Add compile flag if pipeline accepts it (most new ones do)
+        # Note: Some pipelines might not have 'compile' arg in __init__, but Krea does.
+        # We can inspect or try/except, but for simplicity we assume consistency or pass it conditionally
+        if pid == "krea_realtime_video":
+            args["compile"] = self.compile_model
+        # For others, if they support compile, add logic here.
+        # StreamDiffusionV2 might not expose it in __init__?
+        # If it inherits from BasePipeline that has it?
+        # We'll leave it out for others unless we know they support it to avoid TypeError.
+
+        return pipeline_class(**args)
+
+    def _clear_memory(self):
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+            torch.cuda.synchronize()
+        gc.collect()
+
+
+# =================================================================================================
+# MAIN
+# =================================================================================================
+
+def main():
+    parser = argparse.ArgumentParser(description="Scope Benchmark")
+    parser.add_argument("--pipelines", nargs="+", help="Specific pipelines to test")
+    parser.add_argument("--resolutions", nargs="+", help="Resolutions (e.g. 512x512)")
+    parser.add_argument("--steps", type=int, default=4, help="Denoising steps (default: 4)")
+    parser.add_argument("--iterations", type=int, default=100, help="Measurement iterations per config")
+    parser.add_argument("--warmup", type=int, default=10, help="Warmup iterations per config")
+    parser.add_argument("--output", default=f"benchmark_{datetime.now().strftime('%Y%m%d_%H%M')}.json")
+    parser.add_argument("--no-tf32", action="store_true", help="Disable TF32 (enabled by default)")
+    parser.add_argument("--compile", action="store_true", help="Enable torch.compile")
+    args = parser.parse_args()
+
+    # Global Torch Settings
+    if not args.no_tf32 and torch.cuda.is_available():
+        torch.backends.cuda.matmul.allow_tf32 = True
+        torch.backends.cudnn.allow_tf32 = True
+        print("TF32 Enabled")
+
+    # Parse resolutions
+    custom_res = []
+    if args.resolutions:
+        for r in args.resolutions:
+            try:
+                h, w = map(int, r.split("x"))
+                custom_res.append((h, w))
+            except ValueError: pass
+
+    # Detect Hardware
+    hw = HardwareInfo()
+    print("\n=== Hardware ===")
+    print(f"GPU: {hw._get_gpu_info().get('devices', [{}])[0].get('name', 'None')}")
+    print(f"VRAM: {hw.get_primary_gpu_vram_gb():.1f} GB")
+
+    # Build Configurations (1 per resolution)
+    matrix = ConfigurationMatrix(
+        hw.get_primary_gpu_vram_gb(),
+        pipelines=args.pipelines,
+        resolutions=custom_res,
+        steps=[args.steps]
+    ).build()
+
+    print(f"\nPlanned Configurations: {len(matrix)}")
+    if not matrix: return
+
+    # Run
+    runner = BenchmarkRunner(args.warmup, args.iterations, compile_model=args.compile)
+    results = []
+
+    try:
+        for i, config in enumerate(matrix, 1):
+            print(f"\n[{i}/{len(matrix)}]", end=" ")
+            metrics = runner.run_config(config)
+            results.append({
+                "pipeline": config["pipeline_id"],
+                "resolution": f"{config['height']}x{config['width']}",
+                "metrics": metrics
+            })
+    except KeyboardInterrupt:
+        print("\nStopped.")
+
+    # Save
+    data = {
+        "metadata": {"timestamp": datetime.now().isoformat(), "args": vars(args)},
+        "hardware": hw.to_dict(),
+        "results": results
+    }
+    with open(args.output, "w") as f: json.dump(data, f, indent=2)
+    print(f"\nSaved to {args.output}")
+
+if __name__ == "__main__":
+    main()
diff --git a/pyproject.toml b/pyproject.toml
index 70534ae3..a94c395a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -108,6 +108,11 @@ dev = [
     "pytest>=8.4.2",
     "freezegun>=1.5.5",
 ]
+benchmark = [
+    "psutil>=6.1.0",
+    "nvidia-ml-py>=12.560.30",
+    "py-cpuinfo>=9.0.0",
+]
 
 [tool.ruff]
 line-length = 88
diff --git a/src/scope/core/pipelines/krea_realtime_video/docs/usage.md b/src/scope/core/pipelines/krea_realtime_video/docs/usage.md
index c5ac8953..fcc09730 100644
--- a/src/scope/core/pipelines/krea_realtime_video/docs/usage.md
+++ b/src/scope/core/pipelines/krea_realtime_video/docs/usage.md
@@ -89,7 +89,7 @@ Then:
 
 ```
 # Run from scope directory
-uv run -m score.core.pipelines.krea_realtime_video.test
+uv run -m scope.core.pipelines.krea_realtime_video.test
 ```
 
 This will create an `output.mp4` file in the `krea_realtime_video` directory.
diff --git a/src/scope/core/pipelines/longlive/docs/usage.md b/src/scope/core/pipelines/longlive/docs/usage.md
index 9026970d..9335eedb 100644
--- a/src/scope/core/pipelines/longlive/docs/usage.md
+++ b/src/scope/core/pipelines/longlive/docs/usage.md
@@ -73,7 +73,7 @@ Then:
 
 ```
 # Run from scope directory
-uv run -m score.core.pipelines.longlive.test
+uv run -m scope.core.pipelines.longlive.test
 ```
 
 This will create an `output.mp4` file in the `longlive` directory.
diff --git a/src/scope/core/pipelines/streamdiffusionv2/docs/usage.md b/src/scope/core/pipelines/streamdiffusionv2/docs/usage.md
index 036dd02b..d4c16058 100644
--- a/src/scope/core/pipelines/streamdiffusionv2/docs/usage.md
+++ b/src/scope/core/pipelines/streamdiffusionv2/docs/usage.md
@@ -55,7 +55,7 @@ Then:
 
 ```
 # Run from scope directory
-uv run -m score.core.pipelines.streamdiffusionv2.test
+uv run -m scope.core.pipelines.streamdiffusionv2.test
 ```
 
 This will create an `output.mp4` file in the `streamdiffusionv2` directory.
diff --git a/uv.lock b/uv.lock
index ad0ef91b..60147016 100644
--- a/uv.lock
+++ b/uv.lock
@@ -655,6 +655,11 @@ dependencies = [
 ]
 
 [package.dev-dependencies]
+benchmark = [
+    { name = "nvidia-ml-py" },
+    { name = "psutil" },
+    { name = "py-cpuinfo" },
+]
 dev = [
     { name = "freezegun" },
     { name = "imageio" },
@@ -701,6 +706,11 @@ requires-dist = [
 ]
 
 [package.metadata.requires-dev]
+benchmark = [
+    { name = "nvidia-ml-py", specifier = ">=12.560.30" },
+    { name = "psutil", specifier = ">=6.1.0" },
+    { name = "py-cpuinfo", specifier = ">=9.0.0" },
+]
 dev = [
     { name = "freezegun", specifier = ">=1.5.5" },
     { name = "imageio", specifier = ">=2.37.0" },
@@ -1953,6 +1963,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/56/79/12978b96bd44274fe38b5dde5cfb660b1d114f70a65ef962bcbbed99b549/nvidia_cusparselt_cu12-0.7.1-py3-none-manylinux2014_x86_64.whl", hash = "sha256:f1bb701d6b930d5a7cea44c19ceb973311500847f81b634d802b7b539dc55623", size = 287193691, upload-time = "2025-02-26T00:15:44.104Z" },
 ]
 
+[[package]]
+name = "nvidia-ml-py"
+version = "13.590.44"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/1b/23/3871537f204aee823c574ba25cbeb08cae779979d4d43c01adddda00bab9/nvidia_ml_py-13.590.44.tar.gz", hash = "sha256:b358c7614b0fdeea4b95f046f1c90123bfe25d148ab93bb1c00248b834703373", size = 49737, upload-time = "2025-12-08T14:41:10.872Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/e4/47/4c822bd37a008e72fd5a0eae33524ae3ac97b13f7030f63bae1728b8957e/nvidia_ml_py-13.590.44-py3-none-any.whl", hash = "sha256:18feb54eca7d0e3cdc8d1a040a771eda72d9ec3148e5443087970dbfd7377ecc", size = 50683, upload-time = "2025-12-08T14:41:09.597Z" },
+]
+
 [[package]]
 name = "nvidia-nccl-cu12"
 version = "2.27.3"
@@ -2294,6 +2313,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/c9/ad/33b2ccec09bf96c2b2ef3f9a6f66baac8253d7565d8839e024a6b905d45d/psutil-7.1.3-cp37-abi3-win_arm64.whl", hash = "sha256:bd0d69cee829226a761e92f28140bec9a5ee9d5b4fb4b0cc589068dbfff559b1", size = 244608, upload-time = "2025-11-02T12:26:36.136Z" },
 ]
 
+[[package]]
+name = "py-cpuinfo"
+version = "9.0.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/37/a8/d832f7293ebb21690860d2e01d8115e5ff6f2ae8bbdc953f0eb0fa4bd2c7/py-cpuinfo-9.0.0.tar.gz", hash = "sha256:3cdbbf3fac90dc6f118bfd64384f309edeadd902d7c8fb17f02ffa1fc3f49690", size = 104716, upload-time = "2022-10-25T20:38:06.303Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/e0/a9/023730ba63db1e494a271cb018dcd361bd2c917ba7004c3e49d5daf795a2/py_cpuinfo-9.0.0-py3-none-any.whl", hash = "sha256:859625bc251f64e21f077d099d4162689c762b5d6a4c3c97553d56241c9674d5", size = 22335, upload-time = "2022-10-25T20:38:27.636Z" },
+]
+
 [[package]]
 name = "pycparser"
 version = "2.23"

From 6ffcd549b797160ab741b11309c784c414fef46d Mon Sep 17 00:00:00 2001
From: Varshith Bathini <varshith15@gmail.com>
Date: Fri, 2 Jan 2026 09:50:16 +0000
Subject: [PATCH 2/6] fix: benchmark cleanup

---
 benchmark.py | 278 ++++++++++++++++-----------------------------------
 1 file changed, 86 insertions(+), 192 deletions(-)

diff --git a/benchmark.py b/benchmark.py
index 726fd5fe..28e22484 100644
--- a/benchmark.py
+++ b/benchmark.py
@@ -1,11 +1,3 @@
-#!/usr/bin/env python3
-"""
-Simple Benchmarking Script for Scope Pipelines.
-
-Usage:
-    uv run benchmark.py [options]
-"""
-
 import argparse
 import gc
 import json
@@ -17,32 +9,17 @@
 from pathlib import Path
 from typing import Any
 
-import psutil
 import torch
+import pynvml
+import psutil
+import statistics
 from omegaconf import OmegaConf
 
-# Optional dependencies
-try:
-    import cpuinfo
-except ImportError:
-    cpuinfo = None
-
-try:
-    import pynvml
-    PYNVML_AVAILABLE = True
-except ImportError:
-    PYNVML_AVAILABLE = False
-
-
-# Scope imports
-from scope.core.config import get_model_file_path, get_models_dir
-from scope.core.pipelines.registry import PipelineRegistry
 from scope.core.pipelines.utils import Quantization
-
-
-# =================================================================================================
-# HARDWARE INFO
-# =================================================================================================
+from scope.core.pipelines.registry import PipelineRegistry
+from scope.server.download_models import download_models
+from scope.server.models_config import models_are_downloaded
+from scope.core.config import get_model_file_path, get_models_dir
 
 class HardwareInfo:
     """Collects and stores hardware information."""
@@ -66,27 +43,23 @@ def _get_gpu_info(self) -> dict[str, Any]:
         gpu_info["count"] = torch.cuda.device_count()
         gpu_info["cuda_version"] = torch.version.cuda
 
-        if PYNVML_AVAILABLE:
-            try:
-                pynvml.nvmlInit()
-                for i in range(gpu_info["count"]):
-                    handle = pynvml.nvmlDeviceGetHandleByIndex(i)
-                    name = pynvml.nvmlDeviceGetName(handle)
-                    if isinstance(name, bytes): name = name.decode("utf-8")
-
-                    mem = pynvml.nvmlDeviceGetMemoryInfo(handle)
-                    driver = pynvml.nvmlSystemGetDriverVersion()
-                    if isinstance(driver, bytes): driver = driver.decode("utf-8")
-
-                    gpu_info["devices"].append({
-                        "index": i,
-                        "name": name,
-                        "memory_total_gb": mem.total / (1024**3),
-                        "driver_version": driver,
-                    })
-                pynvml.nvmlShutdown()
-            except Exception:
-                pass
+        pynvml.nvmlInit()
+        for i in range(gpu_info["count"]):
+            handle = pynvml.nvmlDeviceGetHandleByIndex(i)
+            name = pynvml.nvmlDeviceGetName(handle)
+            if isinstance(name, bytes): name = name.decode("utf-8")
+
+            mem = pynvml.nvmlDeviceGetMemoryInfo(handle)
+            driver = pynvml.nvmlSystemGetDriverVersion()
+            if isinstance(driver, bytes): driver = driver.decode("utf-8")
+
+            gpu_info["devices"].append({
+                "index": i,
+                "name": name,
+                "memory_total_gb": mem.total / (1024**3),
+                "driver_version": driver,
+            })
+        pynvml.nvmlShutdown()
 
         if not gpu_info["devices"]:
             for i in range(gpu_info["count"]):
@@ -126,11 +99,6 @@ def get_primary_gpu_vram_gb(self) -> float:
             return 0.0
         return self._info["gpu"]["devices"][0]["memory_total_gb"]
 
-
-# =================================================================================================
-# RESOURCE MONITOR
-# =================================================================================================
-
 class ResourceMonitor:
     def __init__(self, interval_ms: int = 100, device_index: int = 0):
         self.interval_ms = interval_ms
@@ -143,13 +111,9 @@ def __init__(self, interval_ms: int = 100, device_index: int = 0):
         self._pynvml_initialized = False
         self._gpu_handle = None
 
-        if PYNVML_AVAILABLE and torch.cuda.is_available():
-            try:
-                pynvml.nvmlInit()
-                self._gpu_handle = pynvml.nvmlDeviceGetHandleByIndex(device_index)
-                self._pynvml_initialized = True
-            except Exception:
-                pass
+        pynvml.nvmlInit()
+        self._gpu_handle = pynvml.nvmlDeviceGetHandleByIndex(device_index)
+        self._pynvml_initialized = True
 
     def start(self):
         if self._monitoring: return
@@ -175,18 +139,12 @@ def _monitor_loop(self):
     def _collect_sample(self) -> dict[str, Any]:
         sample = {}
         if torch.cuda.is_available():
-            try:
-                sample["gpu_memory_allocated_gb"] = torch.cuda.memory_allocated(self.device_index) / (1024**3)
-                if self._pynvml_initialized and self._gpu_handle:
-                    util = pynvml.nvmlDeviceGetUtilizationRates(self._gpu_handle)
-                    sample["gpu_utilization_percent"] = util.gpu
-            except Exception:
-                pass
+            sample["gpu_memory_allocated_gb"] = torch.cuda.memory_allocated(self.device_index) / (1024**3)
+            if self._pynvml_initialized and self._gpu_handle:
+                util = pynvml.nvmlDeviceGetUtilizationRates(self._gpu_handle)
+                sample["gpu_utilization_percent"] = util.gpu
 
-        try:
-            sample["system_cpu_percent"] = psutil.cpu_percent()
-        except Exception:
-            pass
+        sample["system_cpu_percent"] = psutil.cpu_percent()
         return sample
 
     def get_statistics(self) -> dict[str, float]:
@@ -200,21 +158,17 @@ def get_statistics(self) -> dict[str, float]:
             if values:
                 stats[f"{key}_avg"] = sum(values) / len(values)
                 stats[f"{key}_max"] = max(values)
+                stats[f"{key}_min"] = min(values)
+                stats[f"{key}_std"] = statistics.stdev(values)
         return stats
 
     def cleanup(self):
         self.stop()
         if self._pynvml_initialized:
-            try: pynvml.nvmlShutdown()
-            except Exception: pass
-
+            pynvml.nvmlShutdown()
 
-# =================================================================================================
-# CONFIGURATION MATRIX
-# =================================================================================================
 
 class ConfigurationMatrix:
-    # Default resolutions to test
     STANDARD_RESOLUTIONS = [
         (320, 576),
         (480, 832),
@@ -223,22 +177,11 @@ class ConfigurationMatrix:
         (768, 1344),
     ]
 
-    # Defaults (Single run per resolution)
     DEFAULT_PROMPT = "A realistic video of a serene landscape with rolling hills, a clear blue sky, and a gentle stream."
 
-    PIPELINE_CONSTRAINTS = {
-        "krea_realtime_video": {
-            "min_vram_gb": 32,
-            "high_res_vram_gb": 40,
-            "high_res_threshold": (480, 832),
-        },
-    }
-
-    def __init__(self, hardware_vram_gb: float, pipelines=None, resolutions=None, steps=None):
-        self.hardware_vram_gb = hardware_vram_gb
+    def __init__(self, pipelines=None, resolutions=None):
         self.selected_pipelines = pipelines
         self.custom_resolutions = resolutions
-        self.steps = steps or [4] # Default to 4 if not specified
 
     def build(self) -> list[dict]:
         all_pipelines = PipelineRegistry.list_pipelines()
@@ -250,11 +193,6 @@ def build(self) -> list[dict]:
 
         configurations = []
         for pid in pipelines:
-            if not self._check_constraints(pid):
-                print(f"Skipping {pid}: insufficient VRAM ({self.hardware_vram_gb:.1f}GB)")
-                continue
-
-            # Determine resolutions
             resolutions = self._get_resolutions(pid)
 
             for h, w in resolutions:
@@ -262,50 +200,30 @@ def build(self) -> list[dict]:
                     "pipeline_id": pid,
                     "height": h,
                     "width": w,
-                    "denoising_steps": self.steps,
                     "prompt": self.DEFAULT_PROMPT,
                 }
                 configurations.append(config)
 
         return configurations
 
-    def _check_constraints(self, pid: str) -> bool:
-        constraints = self.PIPELINE_CONSTRAINTS.get(pid, {})
-        return self.hardware_vram_gb >= constraints.get("min_vram_gb", 0)
-
     def _get_resolutions(self, pid: str) -> list[tuple[int, int]]:
         if self.custom_resolutions:
             return self.custom_resolutions
 
-        # Default config for the pipeline
         pipeline_class = PipelineRegistry.get(pid)
         if not pipeline_class: return []
         default_cfg = pipeline_class.get_config_class()()
 
-        # Start with default resolution
         res_set = {(default_cfg.height, default_cfg.width)}
 
-        # Add standard ones that fit VRAM constraints
-        constraints = self.PIPELINE_CONSTRAINTS.get(pid, {})
-        high_res_vram = constraints.get("high_res_vram_gb")
-        threshold = constraints.get("high_res_threshold")
-
         for h, w in self.STANDARD_RESOLUTIONS:
-            if high_res_vram and threshold:
-                th_h, th_w = threshold
-                if (h > th_h or w > th_w) and self.hardware_vram_gb < high_res_vram:
-                    continue
             res_set.add((h, w))
 
         return sorted(list(res_set))
 
 
-# =================================================================================================
-# BENCHMARK RUNNER
-# =================================================================================================
-
 class BenchmarkRunner:
-    def __init__(self, warmup_iterations=2, iterations=5, compile_model=False):
+    def __init__(self, warmup_iterations=5, iterations=30, compile_model=False):
         self.warmup_iterations = warmup_iterations
         self.iterations = iterations
         self.compile_model = compile_model
@@ -315,47 +233,55 @@ def run_config(self, config: dict) -> dict:
         pipeline_id = config["pipeline_id"]
         print(f"\n--- Benchmarking {pipeline_id} [{config['height']}x{config['width']}] ---")
 
+        if not models_are_downloaded(pipeline_id):
+            print(f"Downloading models for {pipeline_id}...")
+            try:
+                download_models(pipeline_id)
+                print(f"Models downloaded successfully for {pipeline_id}")
+            except Exception as e:
+                print(f"ERROR: Failed to download models: {e}")
+                return {"error": f"Model download failed: {str(e)}"}
+
         pipeline = None
         try:
             pipeline = self._init_pipeline(config)
             inputs = {"prompts": [{"text": config["prompt"], "weight": 100}]}
 
-            # Warmup Phase
-            if self.warmup_iterations > 0:
-                print(f"Warmup ({self.warmup_iterations} iterations)...")
+            if pipeline_id == "streamdiffusionv2":
+                inputs["video"] = torch.randn(
+                        1, 3, 4, config["height"], config["width"],
+                        device=self.device, dtype=torch.bfloat16
+                    )
+
+            print(f"Warmup ({self.warmup_iterations} iterations)...")
+            try:
                 for _ in range(self.warmup_iterations):
                     pipeline(**inputs)
-                self._clear_memory()
+            except Exception as e:
+                raise Exception(f"Warmup failed: {e}")
 
-            # Measurement Phase
             print(f"Measuring ({self.iterations} iterations)...")
             monitor = ResourceMonitor()
             latencies = []
-            frame_counts = []
-
-            monitor.start()
-            for _ in range(self.iterations):
-                if torch.cuda.is_available():
-                    torch.cuda.reset_peak_memory_stats()
-
-                t0 = time.time()
-                output = pipeline(**inputs)
-                latencies.append(time.time() - t0)
-
-                # Check output for frame count (batch size)
-                # Some pipelines return a tensor (T, C, H, W) or (B, T, C, H, W)
-                # If it's 4D (T, C, H, W), dim 0 is frames.
-                # If it's 5D (B, T, C, H, W), dim 1 is frames * batch size.
-                current_frames = 1
-                if hasattr(output, "shape") and len(output.shape) >= 1:
-                    current_frames = output.shape[0]
-                frame_counts.append(current_frames)
-
-            monitor.stop()
-            resource_stats = monitor.get_statistics()
-            monitor.cleanup()
-
-            # Metrics Calculation
+            fps_measures = []
+
+            try:
+                monitor.start()
+                for _ in range(self.iterations):
+                    t0 = time.time()
+                    output = pipeline(**inputs)
+                    latency = time.time() - t0
+                    latencies.append(latency)
+                    fps_measures.append(output.shape[0] / latency)
+                    del output
+            finally:
+                try:
+                    monitor.stop()
+                    resource_stats = monitor.get_statistics()
+                    monitor.cleanup()
+                except Exception:
+                    resource_stats = {}
+
             if not latencies:
                 return {"error": "No successful iterations"}
 
@@ -364,12 +290,9 @@ def run_config(self, config: dict) -> dict:
             max_latency = max(latencies)
             jitter = statistics.stdev(latencies) if len(latencies) > 1 else 0.0
 
-            # Calculate FPS based on frames generated per call
-            avg_frames_per_call = statistics.mean(frame_counts) if frame_counts else 1.0
-
-            fps_avg = avg_frames_per_call / avg_latency if avg_latency > 0 else 0
-            fps_min = avg_frames_per_call / max_latency if max_latency > 0 else 0
-            fps_max = avg_frames_per_call / min_latency if min_latency > 0 else 0
+            fps_avg = statistics.mean(fps_measures)
+            fps_min = min(fps_measures)
+            fps_max = max(fps_measures)
 
             results = {
                 "fps_avg": round(fps_avg, 2),
@@ -391,27 +314,20 @@ def run_config(self, config: dict) -> dict:
         finally:
             del pipeline
             self._clear_memory()
+            time.sleep(3.0)
 
     def _init_pipeline(self, config: dict):
         pid = config["pipeline_id"]
         pipeline_class = PipelineRegistry.get(pid)
-        if not pipeline_class: raise ValueError(f"Unknown pipeline: {pid}")
 
-        # Path Logic
-        model_dir = Path("src/scope/core/pipelines") / pid
-        if not model_dir.exists(): # Handle running from src vs root
-             model_dir = Path(__file__).parent / "src/scope/core/pipelines" / pid
-
-        model_config = OmegaConf.load(model_dir / "model.yaml")
+        model_config = OmegaConf.load(Path(__file__).parent / "src/scope/core/pipelines" / pid / "model.yaml")
         pipeline_config = {
             "model_dir": str(get_models_dir()),
             "model_config": model_config,
             "height": config["height"],
             "width": config["width"],
-            "denoising_steps": config["denoising_steps"],
         }
 
-        # Hardcoded paths matching original test scripts
         def model_path(p): return str(get_model_file_path(p))
         wan_enc = model_path("WanVideo_comfy/umt5-xxl-enc-fp8_e4m3fn.safetensors")
         wan_tok = model_path("Wan2.1-T2V-1.3B/google/umt5-xxl")
@@ -436,7 +352,6 @@ def model_path(p): return str(get_model_file_path(p))
         if "text_encoder_path" not in pipeline_config: pipeline_config["text_encoder_path"] = wan_enc
         if "tokenizer_path" not in pipeline_config: pipeline_config["tokenizer_path"] = wan_tok
 
-        # Init
         quantization = Quantization.FP8_E4M3FN if pid == "krea_realtime_video" else None
         args = {
             "config": OmegaConf.create(pipeline_config),
@@ -446,48 +361,33 @@ def model_path(p): return str(get_model_file_path(p))
         if quantization:
             args.update({"quantization": quantization})
 
-        # Add compile flag if pipeline accepts it (most new ones do)
-        # Note: Some pipelines might not have 'compile' arg in __init__, but Krea does.
-        # We can inspect or try/except, but for simplicity we assume consistency or pass it conditionally
         if pid == "krea_realtime_video":
             args["compile"] = self.compile_model
-        # For others, if they support compile, add logic here.
-        # StreamDiffusionV2 might not expose it in __init__?
-        # If it inherits from BasePipeline that has it?
-        # We'll leave it out for others unless we know they support it to avoid TypeError.
-
         return pipeline_class(**args)
 
     def _clear_memory(self):
-        if torch.cuda.is_available():
-            torch.cuda.empty_cache()
+        """Aggressively clear GPU and system memory."""
+        for _ in range(3):
+            gc.collect()
             torch.cuda.synchronize()
-        gc.collect()
-
-
-# =================================================================================================
-# MAIN
-# =================================================================================================
+            torch.cuda.empty_cache()
 
 def main():
     parser = argparse.ArgumentParser(description="Scope Benchmark")
     parser.add_argument("--pipelines", nargs="+", help="Specific pipelines to test")
     parser.add_argument("--resolutions", nargs="+", help="Resolutions (e.g. 512x512)")
-    parser.add_argument("--steps", type=int, default=4, help="Denoising steps (default: 4)")
-    parser.add_argument("--iterations", type=int, default=100, help="Measurement iterations per config")
-    parser.add_argument("--warmup", type=int, default=10, help="Warmup iterations per config")
+    parser.add_argument("--iterations", type=int, default=30, help="Measurement iterations per config")
+    parser.add_argument("--warmup", type=int, default=5, help="Warmup iterations per config")
     parser.add_argument("--output", default=f"benchmark_{datetime.now().strftime('%Y%m%d_%H%M')}.json")
     parser.add_argument("--no-tf32", action="store_true", help="Disable TF32 (enabled by default)")
     parser.add_argument("--compile", action="store_true", help="Enable torch.compile")
     args = parser.parse_args()
 
-    # Global Torch Settings
     if not args.no_tf32 and torch.cuda.is_available():
         torch.backends.cuda.matmul.allow_tf32 = True
         torch.backends.cudnn.allow_tf32 = True
         print("TF32 Enabled")
 
-    # Parse resolutions
     custom_res = []
     if args.resolutions:
         for r in args.resolutions:
@@ -496,24 +396,19 @@ def main():
                 custom_res.append((h, w))
             except ValueError: pass
 
-    # Detect Hardware
     hw = HardwareInfo()
     print("\n=== Hardware ===")
     print(f"GPU: {hw._get_gpu_info().get('devices', [{}])[0].get('name', 'None')}")
     print(f"VRAM: {hw.get_primary_gpu_vram_gb():.1f} GB")
 
-    # Build Configurations (1 per resolution)
     matrix = ConfigurationMatrix(
-        hw.get_primary_gpu_vram_gb(),
         pipelines=args.pipelines,
         resolutions=custom_res,
-        steps=[args.steps]
     ).build()
 
     print(f"\nPlanned Configurations: {len(matrix)}")
     if not matrix: return
 
-    # Run
     runner = BenchmarkRunner(args.warmup, args.iterations, compile_model=args.compile)
     results = []
 
@@ -529,7 +424,6 @@ def main():
     except KeyboardInterrupt:
         print("\nStopped.")
 
-    # Save
     data = {
         "metadata": {"timestamp": datetime.now().isoformat(), "args": vars(args)},
         "hardware": hw.to_dict(),

From 8bbc9c9128542d8bc71f05f16a36d127b490f30b Mon Sep 17 00:00:00 2001
From: Varshith Bathini <varshith15@gmail.com>
Date: Fri, 2 Jan 2026 12:38:34 +0000
Subject: [PATCH 3/6] fix: pid path

---
 benchmark.py | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/benchmark.py b/benchmark.py
index 28e22484..39cc1176 100644
--- a/benchmark.py
+++ b/benchmark.py
@@ -267,13 +267,19 @@ def run_config(self, config: dict) -> dict:
 
             try:
                 monitor.start()
+                output = None
                 for _ in range(self.iterations):
                     t0 = time.time()
                     output = pipeline(**inputs)
                     latency = time.time() - t0
                     latencies.append(latency)
                     fps_measures.append(output.shape[0] / latency)
+
+                if output is not None:
+                    output = output.cpu()
                     del output
+                    torch.cuda.synchronize()
+                    torch.cuda.empty_cache()
             finally:
                 try:
                     monitor.stop()
@@ -320,7 +326,7 @@ def _init_pipeline(self, config: dict):
         pid = config["pipeline_id"]
         pipeline_class = PipelineRegistry.get(pid)
 
-        model_config = OmegaConf.load(Path(__file__).parent / "src/scope/core/pipelines" / pid / "model.yaml")
+        model_config = OmegaConf.load(Path(__file__).parent / "src/scope/core/pipelines" / pid.replace("-", "_") / "model.yaml")
         pipeline_config = {
             "model_dir": str(get_models_dir()),
             "model_config": model_config,
@@ -340,19 +346,19 @@ def model_path(p): return str(get_model_file_path(p))
                 "generator_path": model_path("LongLive-1.3B/models/longlive_base.pt"),
                 "lora_path": model_path("LongLive-1.3B/models/lora.pt")
             }
-        elif pid == "krea_realtime_video":
+        elif pid == "krea-realtime-video":
             paths = {
                 "generator_path": model_path("krea-realtime-video/krea-realtime-video-14b.safetensors"),
                 "vae_path": model_path("Wan2.1-T2V-1.3B/Wan2.1_VAE.pth")
             }
-        elif pid == "reward_forcing":
+        elif pid == "reward-forcing":
             paths = {"generator_path": model_path("Reward-Forcing-T2V-1.3B/rewardforcing.pt")}
 
         pipeline_config.update(paths)
         if "text_encoder_path" not in pipeline_config: pipeline_config["text_encoder_path"] = wan_enc
         if "tokenizer_path" not in pipeline_config: pipeline_config["tokenizer_path"] = wan_tok
 
-        quantization = Quantization.FP8_E4M3FN if pid == "krea_realtime_video" else None
+        quantization = Quantization.FP8_E4M3FN if pid == "krea-realtime-video" else None
         args = {
             "config": OmegaConf.create(pipeline_config),
             "device": self.device,
@@ -361,7 +367,7 @@ def model_path(p): return str(get_model_file_path(p))
         if quantization:
             args.update({"quantization": quantization})
 
-        if pid == "krea_realtime_video":
+        if pid == "krea-realtime-video":
             args["compile"] = self.compile_model
         return pipeline_class(**args)
 

From e83234a7bf0c5395af46f96a2f135728a7c83606 Mon Sep 17 00:00:00 2001
From: Varshith Bathini <varshith15@gmail.com>
Date: Mon, 5 Jan 2026 14:50:56 +0000
Subject: [PATCH 4/6] fix: ruff

---
 benchmark.py | 52 ++++++++++++++++++++++++++++++++--------------------
 1 file changed, 32 insertions(+), 20 deletions(-)

diff --git a/benchmark.py b/benchmark.py
index 39cc1176..f7234084 100644
--- a/benchmark.py
+++ b/benchmark.py
@@ -2,24 +2,24 @@
 import gc
 import json
 import platform
+import statistics
 import threading
 import time
-import statistics
 from datetime import datetime
 from pathlib import Path
 from typing import Any
 
-import torch
-import pynvml
 import psutil
-import statistics
+import pynvml
+import torch
 from omegaconf import OmegaConf
 
-from scope.core.pipelines.utils import Quantization
+from scope.core.config import get_model_file_path, get_models_dir
 from scope.core.pipelines.registry import PipelineRegistry
+from scope.core.pipelines.utils import Quantization
 from scope.server.download_models import download_models
 from scope.server.models_config import models_are_downloaded
-from scope.core.config import get_model_file_path, get_models_dir
+
 
 class HardwareInfo:
     """Collects and stores hardware information."""
@@ -47,11 +47,13 @@ def _get_gpu_info(self) -> dict[str, Any]:
         for i in range(gpu_info["count"]):
             handle = pynvml.nvmlDeviceGetHandleByIndex(i)
             name = pynvml.nvmlDeviceGetName(handle)
-            if isinstance(name, bytes): name = name.decode("utf-8")
+            if isinstance(name, bytes):
+                name = name.decode("utf-8")
 
             mem = pynvml.nvmlDeviceGetMemoryInfo(handle)
             driver = pynvml.nvmlSystemGetDriverVersion()
-            if isinstance(driver, bytes): driver = driver.decode("utf-8")
+            if isinstance(driver, bytes):
+                driver = driver.decode("utf-8")
 
             gpu_info["devices"].append({
                 "index": i,
@@ -116,14 +118,16 @@ def __init__(self, interval_ms: int = 100, device_index: int = 0):
         self._pynvml_initialized = True
 
     def start(self):
-        if self._monitoring: return
+        if self._monitoring:
+            return
         self._monitoring = True
         self._samples = []
         self._thread = threading.Thread(target=self._monitor_loop, daemon=True)
         self._thread.start()
 
     def stop(self):
-        if not self._monitoring: return
+        if not self._monitoring:
+            return
         self._monitoring = False
         if self._thread:
             self._thread.join(timeout=2.0)
@@ -148,8 +152,10 @@ def _collect_sample(self) -> dict[str, Any]:
         return sample
 
     def get_statistics(self) -> dict[str, float]:
-        with self._lock: samples = self._samples.copy()
-        if not samples: return {}
+        with self._lock:
+            samples = self._samples.copy()
+        if not samples:
+            return {}
 
         stats = {}
         keys = ["gpu_memory_allocated_gb", "gpu_utilization_percent", "system_cpu_percent"]
@@ -211,7 +217,8 @@ def _get_resolutions(self, pid: str) -> list[tuple[int, int]]:
             return self.custom_resolutions
 
         pipeline_class = PipelineRegistry.get(pid)
-        if not pipeline_class: return []
+        if not pipeline_class:
+            return []
         default_cfg = pipeline_class.get_config_class()()
 
         res_set = {(default_cfg.height, default_cfg.width)}
@@ -219,7 +226,7 @@ def _get_resolutions(self, pid: str) -> list[tuple[int, int]]:
         for h, w in self.STANDARD_RESOLUTIONS:
             res_set.add((h, w))
 
-        return sorted(list(res_set))
+        return sorted(res_set)
 
 
 class BenchmarkRunner:
@@ -258,7 +265,7 @@ def run_config(self, config: dict) -> dict:
                 for _ in range(self.warmup_iterations):
                     pipeline(**inputs)
             except Exception as e:
-                raise Exception(f"Warmup failed: {e}")
+                raise Exception(f"Warmup failed: {e}") from e
 
             print(f"Measuring ({self.iterations} iterations)...")
             monitor = ResourceMonitor()
@@ -355,8 +362,10 @@ def model_path(p): return str(get_model_file_path(p))
             paths = {"generator_path": model_path("Reward-Forcing-T2V-1.3B/rewardforcing.pt")}
 
         pipeline_config.update(paths)
-        if "text_encoder_path" not in pipeline_config: pipeline_config["text_encoder_path"] = wan_enc
-        if "tokenizer_path" not in pipeline_config: pipeline_config["tokenizer_path"] = wan_tok
+        if "text_encoder_path" not in pipeline_config:
+            pipeline_config["text_encoder_path"] = wan_enc
+        if "tokenizer_path" not in pipeline_config:
+            pipeline_config["tokenizer_path"] = wan_tok
 
         quantization = Quantization.FP8_E4M3FN if pid == "krea-realtime-video" else None
         args = {
@@ -400,7 +409,8 @@ def main():
             try:
                 h, w = map(int, r.split("x"))
                 custom_res.append((h, w))
-            except ValueError: pass
+            except ValueError:
+                pass
 
     hw = HardwareInfo()
     print("\n=== Hardware ===")
@@ -413,7 +423,8 @@ def main():
     ).build()
 
     print(f"\nPlanned Configurations: {len(matrix)}")
-    if not matrix: return
+    if not matrix:
+        return
 
     runner = BenchmarkRunner(args.warmup, args.iterations, compile_model=args.compile)
     results = []
@@ -435,7 +446,8 @@ def main():
         "hardware": hw.to_dict(),
         "results": results
     }
-    with open(args.output, "w") as f: json.dump(data, f, indent=2)
+    with open(args.output, "w") as f:
+        json.dump(data, f, indent=2)
     print(f"\nSaved to {args.output}")
 
 if __name__ == "__main__":

From 2e3c48c182f7e0e403df6cef5f398cecf42502a2 Mon Sep 17 00:00:00 2001
From: Varshith Bathini <varshith15@gmail.com>
Date: Mon, 5 Jan 2026 15:05:03 +0000
Subject: [PATCH 5/6] fix: ruff format

---
 benchmark.py | 126 +++++++++++++++++++++++++++++++++++----------------
 1 file changed, 88 insertions(+), 38 deletions(-)

diff --git a/benchmark.py b/benchmark.py
index f7234084..5dae764c 100644
--- a/benchmark.py
+++ b/benchmark.py
@@ -55,22 +55,26 @@ def _get_gpu_info(self) -> dict[str, Any]:
             if isinstance(driver, bytes):
                 driver = driver.decode("utf-8")
 
-            gpu_info["devices"].append({
-                "index": i,
-                "name": name,
-                "memory_total_gb": mem.total / (1024**3),
-                "driver_version": driver,
-            })
+            gpu_info["devices"].append(
+                {
+                    "index": i,
+                    "name": name,
+                    "memory_total_gb": mem.total / (1024**3),
+                    "driver_version": driver,
+                }
+            )
         pynvml.nvmlShutdown()
 
         if not gpu_info["devices"]:
             for i in range(gpu_info["count"]):
                 props = torch.cuda.get_device_properties(i)
-                gpu_info["devices"].append({
-                    "index": i,
-                    "name": props.name,
-                    "memory_total_gb": props.total_memory / (1024**3),
-                })
+                gpu_info["devices"].append(
+                    {
+                        "index": i,
+                        "name": props.name,
+                        "memory_total_gb": props.total_memory / (1024**3),
+                    }
+                )
 
         return gpu_info
 
@@ -83,7 +87,10 @@ def _get_cpu_info(self) -> dict[str, Any]:
 
     def _get_memory_info(self) -> dict[str, Any]:
         mem = psutil.virtual_memory()
-        return {"total_gb": mem.total / (1024**3), "available_gb": mem.available / (1024**3)}
+        return {
+            "total_gb": mem.total / (1024**3),
+            "available_gb": mem.available / (1024**3),
+        }
 
     def _get_platform_info(self) -> dict[str, Any]:
         return {
@@ -101,6 +108,7 @@ def get_primary_gpu_vram_gb(self) -> float:
             return 0.0
         return self._info["gpu"]["devices"][0]["memory_total_gb"]
 
+
 class ResourceMonitor:
     def __init__(self, interval_ms: int = 100, device_index: int = 0):
         self.interval_ms = interval_ms
@@ -143,7 +151,9 @@ def _monitor_loop(self):
     def _collect_sample(self) -> dict[str, Any]:
         sample = {}
         if torch.cuda.is_available():
-            sample["gpu_memory_allocated_gb"] = torch.cuda.memory_allocated(self.device_index) / (1024**3)
+            sample["gpu_memory_allocated_gb"] = torch.cuda.memory_allocated(
+                self.device_index
+            ) / (1024**3)
             if self._pynvml_initialized and self._gpu_handle:
                 util = pynvml.nvmlDeviceGetUtilizationRates(self._gpu_handle)
                 sample["gpu_utilization_percent"] = util.gpu
@@ -158,7 +168,11 @@ def get_statistics(self) -> dict[str, float]:
             return {}
 
         stats = {}
-        keys = ["gpu_memory_allocated_gb", "gpu_utilization_percent", "system_cpu_percent"]
+        keys = [
+            "gpu_memory_allocated_gb",
+            "gpu_utilization_percent",
+            "system_cpu_percent",
+        ]
         for key in keys:
             values = [s[key] for s in samples if key in s]
             if values:
@@ -238,7 +252,9 @@ def __init__(self, warmup_iterations=5, iterations=30, compile_model=False):
 
     def run_config(self, config: dict) -> dict:
         pipeline_id = config["pipeline_id"]
-        print(f"\n--- Benchmarking {pipeline_id} [{config['height']}x{config['width']}] ---")
+        print(
+            f"\n--- Benchmarking {pipeline_id} [{config['height']}x{config['width']}] ---"
+        )
 
         if not models_are_downloaded(pipeline_id):
             print(f"Downloading models for {pipeline_id}...")
@@ -256,9 +272,14 @@ def run_config(self, config: dict) -> dict:
 
             if pipeline_id == "streamdiffusionv2":
                 inputs["video"] = torch.randn(
-                        1, 3, 4, config["height"], config["width"],
-                        device=self.device, dtype=torch.bfloat16
-                    )
+                    1,
+                    3,
+                    4,
+                    config["height"],
+                    config["width"],
+                    device=self.device,
+                    dtype=torch.bfloat16,
+                )
 
             print(f"Warmup ({self.warmup_iterations} iterations)...")
             try:
@@ -315,10 +336,12 @@ def run_config(self, config: dict) -> dict:
                 "latency_min_sec": round(min_latency, 4),
                 "latency_max_sec": round(max_latency, 4),
                 "jitter_sec": round(jitter, 6),
-                **resource_stats
+                **resource_stats,
             }
 
-            print(f"-> FPS: {results['fps_avg']} | Latency: {results['latency_avg_sec']}s | Jitter: {results['jitter_sec']}s")
+            print(
+                f"-> FPS: {results['fps_avg']} | Latency: {results['latency_avg_sec']}s | Jitter: {results['jitter_sec']}s"
+            )
             return results
 
         except Exception as e:
@@ -333,7 +356,12 @@ def _init_pipeline(self, config: dict):
         pid = config["pipeline_id"]
         pipeline_class = PipelineRegistry.get(pid)
 
-        model_config = OmegaConf.load(Path(__file__).parent / "src/scope/core/pipelines" / pid.replace("-", "_") / "model.yaml")
+        model_config = OmegaConf.load(
+            Path(__file__).parent
+            / "src/scope/core/pipelines"
+            / pid.replace("-", "_")
+            / "model.yaml"
+        )
         pipeline_config = {
             "model_dir": str(get_models_dir()),
             "model_config": model_config,
@@ -341,25 +369,35 @@ def _init_pipeline(self, config: dict):
             "width": config["width"],
         }
 
-        def model_path(p): return str(get_model_file_path(p))
+        def model_path(p):
+            return str(get_model_file_path(p))
+
         wan_enc = model_path("WanVideo_comfy/umt5-xxl-enc-fp8_e4m3fn.safetensors")
         wan_tok = model_path("Wan2.1-T2V-1.3B/google/umt5-xxl")
 
         paths = {}
         if pid == "streamdiffusionv2":
-            paths = {"generator_path": model_path("StreamDiffusionV2/wan_causal_dmd_v2v/model.pt")}
+            paths = {
+                "generator_path": model_path(
+                    "StreamDiffusionV2/wan_causal_dmd_v2v/model.pt"
+                )
+            }
         elif pid == "longlive":
             paths = {
                 "generator_path": model_path("LongLive-1.3B/models/longlive_base.pt"),
-                "lora_path": model_path("LongLive-1.3B/models/lora.pt")
+                "lora_path": model_path("LongLive-1.3B/models/lora.pt"),
             }
         elif pid == "krea-realtime-video":
             paths = {
-                "generator_path": model_path("krea-realtime-video/krea-realtime-video-14b.safetensors"),
-                "vae_path": model_path("Wan2.1-T2V-1.3B/Wan2.1_VAE.pth")
+                "generator_path": model_path(
+                    "krea-realtime-video/krea-realtime-video-14b.safetensors"
+                ),
+                "vae_path": model_path("Wan2.1-T2V-1.3B/Wan2.1_VAE.pth"),
             }
         elif pid == "reward-forcing":
-            paths = {"generator_path": model_path("Reward-Forcing-T2V-1.3B/rewardforcing.pt")}
+            paths = {
+                "generator_path": model_path("Reward-Forcing-T2V-1.3B/rewardforcing.pt")
+            }
 
         pipeline_config.update(paths)
         if "text_encoder_path" not in pipeline_config:
@@ -371,7 +409,7 @@ def model_path(p): return str(get_model_file_path(p))
         args = {
             "config": OmegaConf.create(pipeline_config),
             "device": self.device,
-            "dtype": torch.bfloat16
+            "dtype": torch.bfloat16,
         }
         if quantization:
             args.update({"quantization": quantization})
@@ -387,14 +425,23 @@ def _clear_memory(self):
             torch.cuda.synchronize()
             torch.cuda.empty_cache()
 
+
 def main():
     parser = argparse.ArgumentParser(description="Scope Benchmark")
     parser.add_argument("--pipelines", nargs="+", help="Specific pipelines to test")
     parser.add_argument("--resolutions", nargs="+", help="Resolutions (e.g. 512x512)")
-    parser.add_argument("--iterations", type=int, default=30, help="Measurement iterations per config")
-    parser.add_argument("--warmup", type=int, default=5, help="Warmup iterations per config")
-    parser.add_argument("--output", default=f"benchmark_{datetime.now().strftime('%Y%m%d_%H%M')}.json")
-    parser.add_argument("--no-tf32", action="store_true", help="Disable TF32 (enabled by default)")
+    parser.add_argument(
+        "--iterations", type=int, default=30, help="Measurement iterations per config"
+    )
+    parser.add_argument(
+        "--warmup", type=int, default=5, help="Warmup iterations per config"
+    )
+    parser.add_argument(
+        "--output", default=f"benchmark_{datetime.now().strftime('%Y%m%d_%H%M')}.json"
+    )
+    parser.add_argument(
+        "--no-tf32", action="store_true", help="Disable TF32 (enabled by default)"
+    )
     parser.add_argument("--compile", action="store_true", help="Enable torch.compile")
     args = parser.parse_args()
 
@@ -433,22 +480,25 @@ def main():
         for i, config in enumerate(matrix, 1):
             print(f"\n[{i}/{len(matrix)}]", end=" ")
             metrics = runner.run_config(config)
-            results.append({
-                "pipeline": config["pipeline_id"],
-                "resolution": f"{config['height']}x{config['width']}",
-                "metrics": metrics
-            })
+            results.append(
+                {
+                    "pipeline": config["pipeline_id"],
+                    "resolution": f"{config['height']}x{config['width']}",
+                    "metrics": metrics,
+                }
+            )
     except KeyboardInterrupt:
         print("\nStopped.")
 
     data = {
         "metadata": {"timestamp": datetime.now().isoformat(), "args": vars(args)},
         "hardware": hw.to_dict(),
-        "results": results
+        "results": results,
     }
     with open(args.output, "w") as f:
         json.dump(data, f, indent=2)
     print(f"\nSaved to {args.output}")
 
+
 if __name__ == "__main__":
     main()

From 53370e190f5cb57ef233763c0088e6f53875015a Mon Sep 17 00:00:00 2001
From: Varshith Bathini <varshith15@gmail.com>
Date: Mon, 5 Jan 2026 15:08:01 +0000
Subject: [PATCH 6/6] fix: reqs

---
 pyproject.toml |  1 -
 uv.lock        | 11 -----------
 2 files changed, 12 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index a94c395a..c4b041a9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -111,7 +111,6 @@ dev = [
 benchmark = [
     "psutil>=6.1.0",
     "nvidia-ml-py>=12.560.30",
-    "py-cpuinfo>=9.0.0",
 ]
 
 [tool.ruff]
diff --git a/uv.lock b/uv.lock
index 60147016..db06b45a 100644
--- a/uv.lock
+++ b/uv.lock
@@ -658,7 +658,6 @@ dependencies = [
 benchmark = [
     { name = "nvidia-ml-py" },
     { name = "psutil" },
-    { name = "py-cpuinfo" },
 ]
 dev = [
     { name = "freezegun" },
@@ -709,7 +708,6 @@ requires-dist = [
 benchmark = [
     { name = "nvidia-ml-py", specifier = ">=12.560.30" },
     { name = "psutil", specifier = ">=6.1.0" },
-    { name = "py-cpuinfo", specifier = ">=9.0.0" },
 ]
 dev = [
     { name = "freezegun", specifier = ">=1.5.5" },
@@ -2313,15 +2311,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/c9/ad/33b2ccec09bf96c2b2ef3f9a6f66baac8253d7565d8839e024a6b905d45d/psutil-7.1.3-cp37-abi3-win_arm64.whl", hash = "sha256:bd0d69cee829226a761e92f28140bec9a5ee9d5b4fb4b0cc589068dbfff559b1", size = 244608, upload-time = "2025-11-02T12:26:36.136Z" },
 ]
 
-[[package]]
-name = "py-cpuinfo"
-version = "9.0.0"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/37/a8/d832f7293ebb21690860d2e01d8115e5ff6f2ae8bbdc953f0eb0fa4bd2c7/py-cpuinfo-9.0.0.tar.gz", hash = "sha256:3cdbbf3fac90dc6f118bfd64384f309edeadd902d7c8fb17f02ffa1fc3f49690", size = 104716, upload-time = "2022-10-25T20:38:06.303Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/e0/a9/023730ba63db1e494a271cb018dcd361bd2c917ba7004c3e49d5daf795a2/py_cpuinfo-9.0.0-py3-none-any.whl", hash = "sha256:859625bc251f64e21f077d099d4162689c762b5d6a4c3c97553d56241c9674d5", size = 22335, upload-time = "2022-10-25T20:38:27.636Z" },
-]
-
 [[package]]
 name = "pycparser"
 version = "2.23"