From 149591812a54cd832409010322103d24130380f0 Mon Sep 17 00:00:00 2001 From: Varshith Bathini Date: Mon, 29 Dec 2025 17:42:59 +0000 Subject: [PATCH 1/6] feat: benchmark --- .gitignore | 2 + README.md | 46 ++ benchmark.py | 542 ++++++++++++++++++ pyproject.toml | 5 + .../krea_realtime_video/docs/usage.md | 2 +- .../core/pipelines/longlive/docs/usage.md | 2 +- .../pipelines/streamdiffusionv2/docs/usage.md | 2 +- uv.lock | 28 + 8 files changed, 626 insertions(+), 3 deletions(-) create mode 100644 benchmark.py diff --git a/.gitignore b/.gitignore index 62928309..60b4c7e2 100644 --- a/.gitignore +++ b/.gitignore @@ -16,6 +16,8 @@ src/scope/core/pipelines/**/*.mp4 notes/ +benchmark_*.json + # Cursor IDE files .cursorrules .cursorignore diff --git a/README.md b/README.md index 58251d98..e513f129 100644 --- a/README.md +++ b/README.md @@ -131,6 +131,52 @@ After your first generation you can: - Use [LoRAs](./docs/lora.md) to customize the concepts and styles used in your generations. - Use [Spout](./docs/spout.md) (Windows only) to share real-time video between Scope and other local applications. +## Benchmarking + +Scope includes a comprehensive benchmarking suite to test pipeline performance across different configurations and hardware setups. This is useful for: + +- Understanding performance characteristics of different GPUs (H100, A6000, 4090, etc.) +- Determining optimal configurations (resolution, denoising steps) for your hardware +- Identifying optimization opportunities + +### Quick Start + +Install benchmark dependencies: + +```bash +uv sync --group benchmark +``` + +Run a comprehensive benchmark (all pipelines, all configurations): + +```bash +uv run benchmark.py +``` + +### Example Usage + +```bash +# Benchmark specific pipelines +uv run benchmark.py --pipelines streamdiffusionv2 longlive + +# Custom resolutions +uv run benchmark.py --resolutions 480x832 768x1344 + +# Custom iterations (defaults: warmup=10, iterations=100) +uv run benchmark.py --warmup 5 --iterations 50 + +# Save results to specific file +uv run benchmark.py --output h100_results.json +``` + +### Output + +The benchmark generates a JSON file with: +- Hardware specifications (GPU, CPU, memory) +- Average performance metrics per configuration +- Peak resource utilization (VRAM, GPU utilization, CPU usage) + + ## Firewalls If you run Scope in a cloud environment with restrictive firewall settings (eg. Runpod), Scope supports using [TURN servers](https://webrtc.org/getting-started/turn-server) to establish a connection between your browser and the streaming server. diff --git a/benchmark.py b/benchmark.py new file mode 100644 index 00000000..726fd5fe --- /dev/null +++ b/benchmark.py @@ -0,0 +1,542 @@ +#!/usr/bin/env python3 +""" +Simple Benchmarking Script for Scope Pipelines. + +Usage: + uv run benchmark.py [options] +""" + +import argparse +import gc +import json +import platform +import threading +import time +import statistics +from datetime import datetime +from pathlib import Path +from typing import Any + +import psutil +import torch +from omegaconf import OmegaConf + +# Optional dependencies +try: + import cpuinfo +except ImportError: + cpuinfo = None + +try: + import pynvml + PYNVML_AVAILABLE = True +except ImportError: + PYNVML_AVAILABLE = False + + +# Scope imports +from scope.core.config import get_model_file_path, get_models_dir +from scope.core.pipelines.registry import PipelineRegistry +from scope.core.pipelines.utils import Quantization + + +# ================================================================================================= +# HARDWARE INFO +# ================================================================================================= + +class HardwareInfo: + """Collects and stores hardware information.""" + + def __init__(self): + self._info = self._collect_info() + + def _collect_info(self) -> dict[str, Any]: + return { + "gpu": self._get_gpu_info(), + "cpu": self._get_cpu_info(), + "memory": self._get_memory_info(), + "platform": self._get_platform_info(), + } + + def _get_gpu_info(self) -> dict[str, Any]: + gpu_info = {"available": torch.cuda.is_available(), "count": 0, "devices": []} + if not torch.cuda.is_available(): + return gpu_info + + gpu_info["count"] = torch.cuda.device_count() + gpu_info["cuda_version"] = torch.version.cuda + + if PYNVML_AVAILABLE: + try: + pynvml.nvmlInit() + for i in range(gpu_info["count"]): + handle = pynvml.nvmlDeviceGetHandleByIndex(i) + name = pynvml.nvmlDeviceGetName(handle) + if isinstance(name, bytes): name = name.decode("utf-8") + + mem = pynvml.nvmlDeviceGetMemoryInfo(handle) + driver = pynvml.nvmlSystemGetDriverVersion() + if isinstance(driver, bytes): driver = driver.decode("utf-8") + + gpu_info["devices"].append({ + "index": i, + "name": name, + "memory_total_gb": mem.total / (1024**3), + "driver_version": driver, + }) + pynvml.nvmlShutdown() + except Exception: + pass + + if not gpu_info["devices"]: + for i in range(gpu_info["count"]): + props = torch.cuda.get_device_properties(i) + gpu_info["devices"].append({ + "index": i, + "name": props.name, + "memory_total_gb": props.total_memory / (1024**3), + }) + + return gpu_info + + def _get_cpu_info(self) -> dict[str, Any]: + return { + "physical_cores": psutil.cpu_count(logical=False), + "logical_cores": psutil.cpu_count(logical=True), + "model": platform.processor(), + } + + def _get_memory_info(self) -> dict[str, Any]: + mem = psutil.virtual_memory() + return {"total_gb": mem.total / (1024**3), "available_gb": mem.available / (1024**3)} + + def _get_platform_info(self) -> dict[str, Any]: + return { + "system": platform.system(), + "release": platform.release(), + "python_version": platform.python_version(), + "pytorch_version": torch.__version__, + } + + def to_dict(self) -> dict[str, Any]: + return self._info + + def get_primary_gpu_vram_gb(self) -> float: + if not self._info["gpu"]["available"] or not self._info["gpu"]["devices"]: + return 0.0 + return self._info["gpu"]["devices"][0]["memory_total_gb"] + + +# ================================================================================================= +# RESOURCE MONITOR +# ================================================================================================= + +class ResourceMonitor: + def __init__(self, interval_ms: int = 100, device_index: int = 0): + self.interval_ms = interval_ms + self.device_index = device_index + self._monitoring = False + self._thread = None + self._samples = [] + self._lock = threading.Lock() + self._process = psutil.Process() + self._pynvml_initialized = False + self._gpu_handle = None + + if PYNVML_AVAILABLE and torch.cuda.is_available(): + try: + pynvml.nvmlInit() + self._gpu_handle = pynvml.nvmlDeviceGetHandleByIndex(device_index) + self._pynvml_initialized = True + except Exception: + pass + + def start(self): + if self._monitoring: return + self._monitoring = True + self._samples = [] + self._thread = threading.Thread(target=self._monitor_loop, daemon=True) + self._thread.start() + + def stop(self): + if not self._monitoring: return + self._monitoring = False + if self._thread: + self._thread.join(timeout=2.0) + self._thread = None + + def _monitor_loop(self): + while self._monitoring: + sample = self._collect_sample() + with self._lock: + self._samples.append(sample) + time.sleep(self.interval_ms / 1000.0) + + def _collect_sample(self) -> dict[str, Any]: + sample = {} + if torch.cuda.is_available(): + try: + sample["gpu_memory_allocated_gb"] = torch.cuda.memory_allocated(self.device_index) / (1024**3) + if self._pynvml_initialized and self._gpu_handle: + util = pynvml.nvmlDeviceGetUtilizationRates(self._gpu_handle) + sample["gpu_utilization_percent"] = util.gpu + except Exception: + pass + + try: + sample["system_cpu_percent"] = psutil.cpu_percent() + except Exception: + pass + return sample + + def get_statistics(self) -> dict[str, float]: + with self._lock: samples = self._samples.copy() + if not samples: return {} + + stats = {} + keys = ["gpu_memory_allocated_gb", "gpu_utilization_percent", "system_cpu_percent"] + for key in keys: + values = [s[key] for s in samples if key in s] + if values: + stats[f"{key}_avg"] = sum(values) / len(values) + stats[f"{key}_max"] = max(values) + return stats + + def cleanup(self): + self.stop() + if self._pynvml_initialized: + try: pynvml.nvmlShutdown() + except Exception: pass + + +# ================================================================================================= +# CONFIGURATION MATRIX +# ================================================================================================= + +class ConfigurationMatrix: + # Default resolutions to test + STANDARD_RESOLUTIONS = [ + (320, 576), + (480, 832), + (512, 512), + (576, 1024), + (768, 1344), + ] + + # Defaults (Single run per resolution) + DEFAULT_PROMPT = "A realistic video of a serene landscape with rolling hills, a clear blue sky, and a gentle stream." + + PIPELINE_CONSTRAINTS = { + "krea_realtime_video": { + "min_vram_gb": 32, + "high_res_vram_gb": 40, + "high_res_threshold": (480, 832), + }, + } + + def __init__(self, hardware_vram_gb: float, pipelines=None, resolutions=None, steps=None): + self.hardware_vram_gb = hardware_vram_gb + self.selected_pipelines = pipelines + self.custom_resolutions = resolutions + self.steps = steps or [4] # Default to 4 if not specified + + def build(self) -> list[dict]: + all_pipelines = PipelineRegistry.list_pipelines() + + if self.selected_pipelines: + pipelines = [p for p in all_pipelines if p in self.selected_pipelines] + else: + pipelines = [p for p in all_pipelines if p != "passthrough"] + + configurations = [] + for pid in pipelines: + if not self._check_constraints(pid): + print(f"Skipping {pid}: insufficient VRAM ({self.hardware_vram_gb:.1f}GB)") + continue + + # Determine resolutions + resolutions = self._get_resolutions(pid) + + for h, w in resolutions: + config = { + "pipeline_id": pid, + "height": h, + "width": w, + "denoising_steps": self.steps, + "prompt": self.DEFAULT_PROMPT, + } + configurations.append(config) + + return configurations + + def _check_constraints(self, pid: str) -> bool: + constraints = self.PIPELINE_CONSTRAINTS.get(pid, {}) + return self.hardware_vram_gb >= constraints.get("min_vram_gb", 0) + + def _get_resolutions(self, pid: str) -> list[tuple[int, int]]: + if self.custom_resolutions: + return self.custom_resolutions + + # Default config for the pipeline + pipeline_class = PipelineRegistry.get(pid) + if not pipeline_class: return [] + default_cfg = pipeline_class.get_config_class()() + + # Start with default resolution + res_set = {(default_cfg.height, default_cfg.width)} + + # Add standard ones that fit VRAM constraints + constraints = self.PIPELINE_CONSTRAINTS.get(pid, {}) + high_res_vram = constraints.get("high_res_vram_gb") + threshold = constraints.get("high_res_threshold") + + for h, w in self.STANDARD_RESOLUTIONS: + if high_res_vram and threshold: + th_h, th_w = threshold + if (h > th_h or w > th_w) and self.hardware_vram_gb < high_res_vram: + continue + res_set.add((h, w)) + + return sorted(list(res_set)) + + +# ================================================================================================= +# BENCHMARK RUNNER +# ================================================================================================= + +class BenchmarkRunner: + def __init__(self, warmup_iterations=2, iterations=5, compile_model=False): + self.warmup_iterations = warmup_iterations + self.iterations = iterations + self.compile_model = compile_model + self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + + def run_config(self, config: dict) -> dict: + pipeline_id = config["pipeline_id"] + print(f"\n--- Benchmarking {pipeline_id} [{config['height']}x{config['width']}] ---") + + pipeline = None + try: + pipeline = self._init_pipeline(config) + inputs = {"prompts": [{"text": config["prompt"], "weight": 100}]} + + # Warmup Phase + if self.warmup_iterations > 0: + print(f"Warmup ({self.warmup_iterations} iterations)...") + for _ in range(self.warmup_iterations): + pipeline(**inputs) + self._clear_memory() + + # Measurement Phase + print(f"Measuring ({self.iterations} iterations)...") + monitor = ResourceMonitor() + latencies = [] + frame_counts = [] + + monitor.start() + for _ in range(self.iterations): + if torch.cuda.is_available(): + torch.cuda.reset_peak_memory_stats() + + t0 = time.time() + output = pipeline(**inputs) + latencies.append(time.time() - t0) + + # Check output for frame count (batch size) + # Some pipelines return a tensor (T, C, H, W) or (B, T, C, H, W) + # If it's 4D (T, C, H, W), dim 0 is frames. + # If it's 5D (B, T, C, H, W), dim 1 is frames * batch size. + current_frames = 1 + if hasattr(output, "shape") and len(output.shape) >= 1: + current_frames = output.shape[0] + frame_counts.append(current_frames) + + monitor.stop() + resource_stats = monitor.get_statistics() + monitor.cleanup() + + # Metrics Calculation + if not latencies: + return {"error": "No successful iterations"} + + avg_latency = statistics.mean(latencies) + min_latency = min(latencies) + max_latency = max(latencies) + jitter = statistics.stdev(latencies) if len(latencies) > 1 else 0.0 + + # Calculate FPS based on frames generated per call + avg_frames_per_call = statistics.mean(frame_counts) if frame_counts else 1.0 + + fps_avg = avg_frames_per_call / avg_latency if avg_latency > 0 else 0 + fps_min = avg_frames_per_call / max_latency if max_latency > 0 else 0 + fps_max = avg_frames_per_call / min_latency if min_latency > 0 else 0 + + results = { + "fps_avg": round(fps_avg, 2), + "fps_min": round(fps_min, 2), + "fps_max": round(fps_max, 2), + "latency_avg_sec": round(avg_latency, 4), + "latency_min_sec": round(min_latency, 4), + "latency_max_sec": round(max_latency, 4), + "jitter_sec": round(jitter, 6), + **resource_stats + } + + print(f"-> FPS: {results['fps_avg']} | Latency: {results['latency_avg_sec']}s | Jitter: {results['jitter_sec']}s") + return results + + except Exception as e: + print(f"ERROR: {e}") + return {"error": str(e)} + finally: + del pipeline + self._clear_memory() + + def _init_pipeline(self, config: dict): + pid = config["pipeline_id"] + pipeline_class = PipelineRegistry.get(pid) + if not pipeline_class: raise ValueError(f"Unknown pipeline: {pid}") + + # Path Logic + model_dir = Path("src/scope/core/pipelines") / pid + if not model_dir.exists(): # Handle running from src vs root + model_dir = Path(__file__).parent / "src/scope/core/pipelines" / pid + + model_config = OmegaConf.load(model_dir / "model.yaml") + pipeline_config = { + "model_dir": str(get_models_dir()), + "model_config": model_config, + "height": config["height"], + "width": config["width"], + "denoising_steps": config["denoising_steps"], + } + + # Hardcoded paths matching original test scripts + def model_path(p): return str(get_model_file_path(p)) + wan_enc = model_path("WanVideo_comfy/umt5-xxl-enc-fp8_e4m3fn.safetensors") + wan_tok = model_path("Wan2.1-T2V-1.3B/google/umt5-xxl") + + paths = {} + if pid == "streamdiffusionv2": + paths = {"generator_path": model_path("StreamDiffusionV2/wan_causal_dmd_v2v/model.pt")} + elif pid == "longlive": + paths = { + "generator_path": model_path("LongLive-1.3B/models/longlive_base.pt"), + "lora_path": model_path("LongLive-1.3B/models/lora.pt") + } + elif pid == "krea_realtime_video": + paths = { + "generator_path": model_path("krea-realtime-video/krea-realtime-video-14b.safetensors"), + "vae_path": model_path("Wan2.1-T2V-1.3B/Wan2.1_VAE.pth") + } + elif pid == "reward_forcing": + paths = {"generator_path": model_path("Reward-Forcing-T2V-1.3B/rewardforcing.pt")} + + pipeline_config.update(paths) + if "text_encoder_path" not in pipeline_config: pipeline_config["text_encoder_path"] = wan_enc + if "tokenizer_path" not in pipeline_config: pipeline_config["tokenizer_path"] = wan_tok + + # Init + quantization = Quantization.FP8_E4M3FN if pid == "krea_realtime_video" else None + args = { + "config": OmegaConf.create(pipeline_config), + "device": self.device, + "dtype": torch.bfloat16 + } + if quantization: + args.update({"quantization": quantization}) + + # Add compile flag if pipeline accepts it (most new ones do) + # Note: Some pipelines might not have 'compile' arg in __init__, but Krea does. + # We can inspect or try/except, but for simplicity we assume consistency or pass it conditionally + if pid == "krea_realtime_video": + args["compile"] = self.compile_model + # For others, if they support compile, add logic here. + # StreamDiffusionV2 might not expose it in __init__? + # If it inherits from BasePipeline that has it? + # We'll leave it out for others unless we know they support it to avoid TypeError. + + return pipeline_class(**args) + + def _clear_memory(self): + if torch.cuda.is_available(): + torch.cuda.empty_cache() + torch.cuda.synchronize() + gc.collect() + + +# ================================================================================================= +# MAIN +# ================================================================================================= + +def main(): + parser = argparse.ArgumentParser(description="Scope Benchmark") + parser.add_argument("--pipelines", nargs="+", help="Specific pipelines to test") + parser.add_argument("--resolutions", nargs="+", help="Resolutions (e.g. 512x512)") + parser.add_argument("--steps", type=int, default=4, help="Denoising steps (default: 4)") + parser.add_argument("--iterations", type=int, default=100, help="Measurement iterations per config") + parser.add_argument("--warmup", type=int, default=10, help="Warmup iterations per config") + parser.add_argument("--output", default=f"benchmark_{datetime.now().strftime('%Y%m%d_%H%M')}.json") + parser.add_argument("--no-tf32", action="store_true", help="Disable TF32 (enabled by default)") + parser.add_argument("--compile", action="store_true", help="Enable torch.compile") + args = parser.parse_args() + + # Global Torch Settings + if not args.no_tf32 and torch.cuda.is_available(): + torch.backends.cuda.matmul.allow_tf32 = True + torch.backends.cudnn.allow_tf32 = True + print("TF32 Enabled") + + # Parse resolutions + custom_res = [] + if args.resolutions: + for r in args.resolutions: + try: + h, w = map(int, r.split("x")) + custom_res.append((h, w)) + except ValueError: pass + + # Detect Hardware + hw = HardwareInfo() + print("\n=== Hardware ===") + print(f"GPU: {hw._get_gpu_info().get('devices', [{}])[0].get('name', 'None')}") + print(f"VRAM: {hw.get_primary_gpu_vram_gb():.1f} GB") + + # Build Configurations (1 per resolution) + matrix = ConfigurationMatrix( + hw.get_primary_gpu_vram_gb(), + pipelines=args.pipelines, + resolutions=custom_res, + steps=[args.steps] + ).build() + + print(f"\nPlanned Configurations: {len(matrix)}") + if not matrix: return + + # Run + runner = BenchmarkRunner(args.warmup, args.iterations, compile_model=args.compile) + results = [] + + try: + for i, config in enumerate(matrix, 1): + print(f"\n[{i}/{len(matrix)}]", end=" ") + metrics = runner.run_config(config) + results.append({ + "pipeline": config["pipeline_id"], + "resolution": f"{config['height']}x{config['width']}", + "metrics": metrics + }) + except KeyboardInterrupt: + print("\nStopped.") + + # Save + data = { + "metadata": {"timestamp": datetime.now().isoformat(), "args": vars(args)}, + "hardware": hw.to_dict(), + "results": results + } + with open(args.output, "w") as f: json.dump(data, f, indent=2) + print(f"\nSaved to {args.output}") + +if __name__ == "__main__": + main() diff --git a/pyproject.toml b/pyproject.toml index 70534ae3..a94c395a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -108,6 +108,11 @@ dev = [ "pytest>=8.4.2", "freezegun>=1.5.5", ] +benchmark = [ + "psutil>=6.1.0", + "nvidia-ml-py>=12.560.30", + "py-cpuinfo>=9.0.0", +] [tool.ruff] line-length = 88 diff --git a/src/scope/core/pipelines/krea_realtime_video/docs/usage.md b/src/scope/core/pipelines/krea_realtime_video/docs/usage.md index c5ac8953..fcc09730 100644 --- a/src/scope/core/pipelines/krea_realtime_video/docs/usage.md +++ b/src/scope/core/pipelines/krea_realtime_video/docs/usage.md @@ -89,7 +89,7 @@ Then: ``` # Run from scope directory -uv run -m score.core.pipelines.krea_realtime_video.test +uv run -m scope.core.pipelines.krea_realtime_video.test ``` This will create an `output.mp4` file in the `krea_realtime_video` directory. diff --git a/src/scope/core/pipelines/longlive/docs/usage.md b/src/scope/core/pipelines/longlive/docs/usage.md index 9026970d..9335eedb 100644 --- a/src/scope/core/pipelines/longlive/docs/usage.md +++ b/src/scope/core/pipelines/longlive/docs/usage.md @@ -73,7 +73,7 @@ Then: ``` # Run from scope directory -uv run -m score.core.pipelines.longlive.test +uv run -m scope.core.pipelines.longlive.test ``` This will create an `output.mp4` file in the `longlive` directory. diff --git a/src/scope/core/pipelines/streamdiffusionv2/docs/usage.md b/src/scope/core/pipelines/streamdiffusionv2/docs/usage.md index 036dd02b..d4c16058 100644 --- a/src/scope/core/pipelines/streamdiffusionv2/docs/usage.md +++ b/src/scope/core/pipelines/streamdiffusionv2/docs/usage.md @@ -55,7 +55,7 @@ Then: ``` # Run from scope directory -uv run -m score.core.pipelines.streamdiffusionv2.test +uv run -m scope.core.pipelines.streamdiffusionv2.test ``` This will create an `output.mp4` file in the `streamdiffusionv2` directory. diff --git a/uv.lock b/uv.lock index ad0ef91b..60147016 100644 --- a/uv.lock +++ b/uv.lock @@ -655,6 +655,11 @@ dependencies = [ ] [package.dev-dependencies] +benchmark = [ + { name = "nvidia-ml-py" }, + { name = "psutil" }, + { name = "py-cpuinfo" }, +] dev = [ { name = "freezegun" }, { name = "imageio" }, @@ -701,6 +706,11 @@ requires-dist = [ ] [package.metadata.requires-dev] +benchmark = [ + { name = "nvidia-ml-py", specifier = ">=12.560.30" }, + { name = "psutil", specifier = ">=6.1.0" }, + { name = "py-cpuinfo", specifier = ">=9.0.0" }, +] dev = [ { name = "freezegun", specifier = ">=1.5.5" }, { name = "imageio", specifier = ">=2.37.0" }, @@ -1953,6 +1963,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/56/79/12978b96bd44274fe38b5dde5cfb660b1d114f70a65ef962bcbbed99b549/nvidia_cusparselt_cu12-0.7.1-py3-none-manylinux2014_x86_64.whl", hash = "sha256:f1bb701d6b930d5a7cea44c19ceb973311500847f81b634d802b7b539dc55623", size = 287193691, upload-time = "2025-02-26T00:15:44.104Z" }, ] +[[package]] +name = "nvidia-ml-py" +version = "13.590.44" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/1b/23/3871537f204aee823c574ba25cbeb08cae779979d4d43c01adddda00bab9/nvidia_ml_py-13.590.44.tar.gz", hash = "sha256:b358c7614b0fdeea4b95f046f1c90123bfe25d148ab93bb1c00248b834703373", size = 49737, upload-time = "2025-12-08T14:41:10.872Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e4/47/4c822bd37a008e72fd5a0eae33524ae3ac97b13f7030f63bae1728b8957e/nvidia_ml_py-13.590.44-py3-none-any.whl", hash = "sha256:18feb54eca7d0e3cdc8d1a040a771eda72d9ec3148e5443087970dbfd7377ecc", size = 50683, upload-time = "2025-12-08T14:41:09.597Z" }, +] + [[package]] name = "nvidia-nccl-cu12" version = "2.27.3" @@ -2294,6 +2313,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c9/ad/33b2ccec09bf96c2b2ef3f9a6f66baac8253d7565d8839e024a6b905d45d/psutil-7.1.3-cp37-abi3-win_arm64.whl", hash = "sha256:bd0d69cee829226a761e92f28140bec9a5ee9d5b4fb4b0cc589068dbfff559b1", size = 244608, upload-time = "2025-11-02T12:26:36.136Z" }, ] +[[package]] +name = "py-cpuinfo" +version = "9.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/37/a8/d832f7293ebb21690860d2e01d8115e5ff6f2ae8bbdc953f0eb0fa4bd2c7/py-cpuinfo-9.0.0.tar.gz", hash = "sha256:3cdbbf3fac90dc6f118bfd64384f309edeadd902d7c8fb17f02ffa1fc3f49690", size = 104716, upload-time = "2022-10-25T20:38:06.303Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e0/a9/023730ba63db1e494a271cb018dcd361bd2c917ba7004c3e49d5daf795a2/py_cpuinfo-9.0.0-py3-none-any.whl", hash = "sha256:859625bc251f64e21f077d099d4162689c762b5d6a4c3c97553d56241c9674d5", size = 22335, upload-time = "2022-10-25T20:38:27.636Z" }, +] + [[package]] name = "pycparser" version = "2.23" From 6ffcd549b797160ab741b11309c784c414fef46d Mon Sep 17 00:00:00 2001 From: Varshith Bathini Date: Fri, 2 Jan 2026 09:50:16 +0000 Subject: [PATCH 2/6] fix: benchmark cleanup --- benchmark.py | 278 ++++++++++++++++----------------------------------- 1 file changed, 86 insertions(+), 192 deletions(-) diff --git a/benchmark.py b/benchmark.py index 726fd5fe..28e22484 100644 --- a/benchmark.py +++ b/benchmark.py @@ -1,11 +1,3 @@ -#!/usr/bin/env python3 -""" -Simple Benchmarking Script for Scope Pipelines. - -Usage: - uv run benchmark.py [options] -""" - import argparse import gc import json @@ -17,32 +9,17 @@ from pathlib import Path from typing import Any -import psutil import torch +import pynvml +import psutil +import statistics from omegaconf import OmegaConf -# Optional dependencies -try: - import cpuinfo -except ImportError: - cpuinfo = None - -try: - import pynvml - PYNVML_AVAILABLE = True -except ImportError: - PYNVML_AVAILABLE = False - - -# Scope imports -from scope.core.config import get_model_file_path, get_models_dir -from scope.core.pipelines.registry import PipelineRegistry from scope.core.pipelines.utils import Quantization - - -# ================================================================================================= -# HARDWARE INFO -# ================================================================================================= +from scope.core.pipelines.registry import PipelineRegistry +from scope.server.download_models import download_models +from scope.server.models_config import models_are_downloaded +from scope.core.config import get_model_file_path, get_models_dir class HardwareInfo: """Collects and stores hardware information.""" @@ -66,27 +43,23 @@ def _get_gpu_info(self) -> dict[str, Any]: gpu_info["count"] = torch.cuda.device_count() gpu_info["cuda_version"] = torch.version.cuda - if PYNVML_AVAILABLE: - try: - pynvml.nvmlInit() - for i in range(gpu_info["count"]): - handle = pynvml.nvmlDeviceGetHandleByIndex(i) - name = pynvml.nvmlDeviceGetName(handle) - if isinstance(name, bytes): name = name.decode("utf-8") - - mem = pynvml.nvmlDeviceGetMemoryInfo(handle) - driver = pynvml.nvmlSystemGetDriverVersion() - if isinstance(driver, bytes): driver = driver.decode("utf-8") - - gpu_info["devices"].append({ - "index": i, - "name": name, - "memory_total_gb": mem.total / (1024**3), - "driver_version": driver, - }) - pynvml.nvmlShutdown() - except Exception: - pass + pynvml.nvmlInit() + for i in range(gpu_info["count"]): + handle = pynvml.nvmlDeviceGetHandleByIndex(i) + name = pynvml.nvmlDeviceGetName(handle) + if isinstance(name, bytes): name = name.decode("utf-8") + + mem = pynvml.nvmlDeviceGetMemoryInfo(handle) + driver = pynvml.nvmlSystemGetDriverVersion() + if isinstance(driver, bytes): driver = driver.decode("utf-8") + + gpu_info["devices"].append({ + "index": i, + "name": name, + "memory_total_gb": mem.total / (1024**3), + "driver_version": driver, + }) + pynvml.nvmlShutdown() if not gpu_info["devices"]: for i in range(gpu_info["count"]): @@ -126,11 +99,6 @@ def get_primary_gpu_vram_gb(self) -> float: return 0.0 return self._info["gpu"]["devices"][0]["memory_total_gb"] - -# ================================================================================================= -# RESOURCE MONITOR -# ================================================================================================= - class ResourceMonitor: def __init__(self, interval_ms: int = 100, device_index: int = 0): self.interval_ms = interval_ms @@ -143,13 +111,9 @@ def __init__(self, interval_ms: int = 100, device_index: int = 0): self._pynvml_initialized = False self._gpu_handle = None - if PYNVML_AVAILABLE and torch.cuda.is_available(): - try: - pynvml.nvmlInit() - self._gpu_handle = pynvml.nvmlDeviceGetHandleByIndex(device_index) - self._pynvml_initialized = True - except Exception: - pass + pynvml.nvmlInit() + self._gpu_handle = pynvml.nvmlDeviceGetHandleByIndex(device_index) + self._pynvml_initialized = True def start(self): if self._monitoring: return @@ -175,18 +139,12 @@ def _monitor_loop(self): def _collect_sample(self) -> dict[str, Any]: sample = {} if torch.cuda.is_available(): - try: - sample["gpu_memory_allocated_gb"] = torch.cuda.memory_allocated(self.device_index) / (1024**3) - if self._pynvml_initialized and self._gpu_handle: - util = pynvml.nvmlDeviceGetUtilizationRates(self._gpu_handle) - sample["gpu_utilization_percent"] = util.gpu - except Exception: - pass + sample["gpu_memory_allocated_gb"] = torch.cuda.memory_allocated(self.device_index) / (1024**3) + if self._pynvml_initialized and self._gpu_handle: + util = pynvml.nvmlDeviceGetUtilizationRates(self._gpu_handle) + sample["gpu_utilization_percent"] = util.gpu - try: - sample["system_cpu_percent"] = psutil.cpu_percent() - except Exception: - pass + sample["system_cpu_percent"] = psutil.cpu_percent() return sample def get_statistics(self) -> dict[str, float]: @@ -200,21 +158,17 @@ def get_statistics(self) -> dict[str, float]: if values: stats[f"{key}_avg"] = sum(values) / len(values) stats[f"{key}_max"] = max(values) + stats[f"{key}_min"] = min(values) + stats[f"{key}_std"] = statistics.stdev(values) return stats def cleanup(self): self.stop() if self._pynvml_initialized: - try: pynvml.nvmlShutdown() - except Exception: pass - + pynvml.nvmlShutdown() -# ================================================================================================= -# CONFIGURATION MATRIX -# ================================================================================================= class ConfigurationMatrix: - # Default resolutions to test STANDARD_RESOLUTIONS = [ (320, 576), (480, 832), @@ -223,22 +177,11 @@ class ConfigurationMatrix: (768, 1344), ] - # Defaults (Single run per resolution) DEFAULT_PROMPT = "A realistic video of a serene landscape with rolling hills, a clear blue sky, and a gentle stream." - PIPELINE_CONSTRAINTS = { - "krea_realtime_video": { - "min_vram_gb": 32, - "high_res_vram_gb": 40, - "high_res_threshold": (480, 832), - }, - } - - def __init__(self, hardware_vram_gb: float, pipelines=None, resolutions=None, steps=None): - self.hardware_vram_gb = hardware_vram_gb + def __init__(self, pipelines=None, resolutions=None): self.selected_pipelines = pipelines self.custom_resolutions = resolutions - self.steps = steps or [4] # Default to 4 if not specified def build(self) -> list[dict]: all_pipelines = PipelineRegistry.list_pipelines() @@ -250,11 +193,6 @@ def build(self) -> list[dict]: configurations = [] for pid in pipelines: - if not self._check_constraints(pid): - print(f"Skipping {pid}: insufficient VRAM ({self.hardware_vram_gb:.1f}GB)") - continue - - # Determine resolutions resolutions = self._get_resolutions(pid) for h, w in resolutions: @@ -262,50 +200,30 @@ def build(self) -> list[dict]: "pipeline_id": pid, "height": h, "width": w, - "denoising_steps": self.steps, "prompt": self.DEFAULT_PROMPT, } configurations.append(config) return configurations - def _check_constraints(self, pid: str) -> bool: - constraints = self.PIPELINE_CONSTRAINTS.get(pid, {}) - return self.hardware_vram_gb >= constraints.get("min_vram_gb", 0) - def _get_resolutions(self, pid: str) -> list[tuple[int, int]]: if self.custom_resolutions: return self.custom_resolutions - # Default config for the pipeline pipeline_class = PipelineRegistry.get(pid) if not pipeline_class: return [] default_cfg = pipeline_class.get_config_class()() - # Start with default resolution res_set = {(default_cfg.height, default_cfg.width)} - # Add standard ones that fit VRAM constraints - constraints = self.PIPELINE_CONSTRAINTS.get(pid, {}) - high_res_vram = constraints.get("high_res_vram_gb") - threshold = constraints.get("high_res_threshold") - for h, w in self.STANDARD_RESOLUTIONS: - if high_res_vram and threshold: - th_h, th_w = threshold - if (h > th_h or w > th_w) and self.hardware_vram_gb < high_res_vram: - continue res_set.add((h, w)) return sorted(list(res_set)) -# ================================================================================================= -# BENCHMARK RUNNER -# ================================================================================================= - class BenchmarkRunner: - def __init__(self, warmup_iterations=2, iterations=5, compile_model=False): + def __init__(self, warmup_iterations=5, iterations=30, compile_model=False): self.warmup_iterations = warmup_iterations self.iterations = iterations self.compile_model = compile_model @@ -315,47 +233,55 @@ def run_config(self, config: dict) -> dict: pipeline_id = config["pipeline_id"] print(f"\n--- Benchmarking {pipeline_id} [{config['height']}x{config['width']}] ---") + if not models_are_downloaded(pipeline_id): + print(f"Downloading models for {pipeline_id}...") + try: + download_models(pipeline_id) + print(f"Models downloaded successfully for {pipeline_id}") + except Exception as e: + print(f"ERROR: Failed to download models: {e}") + return {"error": f"Model download failed: {str(e)}"} + pipeline = None try: pipeline = self._init_pipeline(config) inputs = {"prompts": [{"text": config["prompt"], "weight": 100}]} - # Warmup Phase - if self.warmup_iterations > 0: - print(f"Warmup ({self.warmup_iterations} iterations)...") + if pipeline_id == "streamdiffusionv2": + inputs["video"] = torch.randn( + 1, 3, 4, config["height"], config["width"], + device=self.device, dtype=torch.bfloat16 + ) + + print(f"Warmup ({self.warmup_iterations} iterations)...") + try: for _ in range(self.warmup_iterations): pipeline(**inputs) - self._clear_memory() + except Exception as e: + raise Exception(f"Warmup failed: {e}") - # Measurement Phase print(f"Measuring ({self.iterations} iterations)...") monitor = ResourceMonitor() latencies = [] - frame_counts = [] - - monitor.start() - for _ in range(self.iterations): - if torch.cuda.is_available(): - torch.cuda.reset_peak_memory_stats() - - t0 = time.time() - output = pipeline(**inputs) - latencies.append(time.time() - t0) - - # Check output for frame count (batch size) - # Some pipelines return a tensor (T, C, H, W) or (B, T, C, H, W) - # If it's 4D (T, C, H, W), dim 0 is frames. - # If it's 5D (B, T, C, H, W), dim 1 is frames * batch size. - current_frames = 1 - if hasattr(output, "shape") and len(output.shape) >= 1: - current_frames = output.shape[0] - frame_counts.append(current_frames) - - monitor.stop() - resource_stats = monitor.get_statistics() - monitor.cleanup() - - # Metrics Calculation + fps_measures = [] + + try: + monitor.start() + for _ in range(self.iterations): + t0 = time.time() + output = pipeline(**inputs) + latency = time.time() - t0 + latencies.append(latency) + fps_measures.append(output.shape[0] / latency) + del output + finally: + try: + monitor.stop() + resource_stats = monitor.get_statistics() + monitor.cleanup() + except Exception: + resource_stats = {} + if not latencies: return {"error": "No successful iterations"} @@ -364,12 +290,9 @@ def run_config(self, config: dict) -> dict: max_latency = max(latencies) jitter = statistics.stdev(latencies) if len(latencies) > 1 else 0.0 - # Calculate FPS based on frames generated per call - avg_frames_per_call = statistics.mean(frame_counts) if frame_counts else 1.0 - - fps_avg = avg_frames_per_call / avg_latency if avg_latency > 0 else 0 - fps_min = avg_frames_per_call / max_latency if max_latency > 0 else 0 - fps_max = avg_frames_per_call / min_latency if min_latency > 0 else 0 + fps_avg = statistics.mean(fps_measures) + fps_min = min(fps_measures) + fps_max = max(fps_measures) results = { "fps_avg": round(fps_avg, 2), @@ -391,27 +314,20 @@ def run_config(self, config: dict) -> dict: finally: del pipeline self._clear_memory() + time.sleep(3.0) def _init_pipeline(self, config: dict): pid = config["pipeline_id"] pipeline_class = PipelineRegistry.get(pid) - if not pipeline_class: raise ValueError(f"Unknown pipeline: {pid}") - # Path Logic - model_dir = Path("src/scope/core/pipelines") / pid - if not model_dir.exists(): # Handle running from src vs root - model_dir = Path(__file__).parent / "src/scope/core/pipelines" / pid - - model_config = OmegaConf.load(model_dir / "model.yaml") + model_config = OmegaConf.load(Path(__file__).parent / "src/scope/core/pipelines" / pid / "model.yaml") pipeline_config = { "model_dir": str(get_models_dir()), "model_config": model_config, "height": config["height"], "width": config["width"], - "denoising_steps": config["denoising_steps"], } - # Hardcoded paths matching original test scripts def model_path(p): return str(get_model_file_path(p)) wan_enc = model_path("WanVideo_comfy/umt5-xxl-enc-fp8_e4m3fn.safetensors") wan_tok = model_path("Wan2.1-T2V-1.3B/google/umt5-xxl") @@ -436,7 +352,6 @@ def model_path(p): return str(get_model_file_path(p)) if "text_encoder_path" not in pipeline_config: pipeline_config["text_encoder_path"] = wan_enc if "tokenizer_path" not in pipeline_config: pipeline_config["tokenizer_path"] = wan_tok - # Init quantization = Quantization.FP8_E4M3FN if pid == "krea_realtime_video" else None args = { "config": OmegaConf.create(pipeline_config), @@ -446,48 +361,33 @@ def model_path(p): return str(get_model_file_path(p)) if quantization: args.update({"quantization": quantization}) - # Add compile flag if pipeline accepts it (most new ones do) - # Note: Some pipelines might not have 'compile' arg in __init__, but Krea does. - # We can inspect or try/except, but for simplicity we assume consistency or pass it conditionally if pid == "krea_realtime_video": args["compile"] = self.compile_model - # For others, if they support compile, add logic here. - # StreamDiffusionV2 might not expose it in __init__? - # If it inherits from BasePipeline that has it? - # We'll leave it out for others unless we know they support it to avoid TypeError. - return pipeline_class(**args) def _clear_memory(self): - if torch.cuda.is_available(): - torch.cuda.empty_cache() + """Aggressively clear GPU and system memory.""" + for _ in range(3): + gc.collect() torch.cuda.synchronize() - gc.collect() - - -# ================================================================================================= -# MAIN -# ================================================================================================= + torch.cuda.empty_cache() def main(): parser = argparse.ArgumentParser(description="Scope Benchmark") parser.add_argument("--pipelines", nargs="+", help="Specific pipelines to test") parser.add_argument("--resolutions", nargs="+", help="Resolutions (e.g. 512x512)") - parser.add_argument("--steps", type=int, default=4, help="Denoising steps (default: 4)") - parser.add_argument("--iterations", type=int, default=100, help="Measurement iterations per config") - parser.add_argument("--warmup", type=int, default=10, help="Warmup iterations per config") + parser.add_argument("--iterations", type=int, default=30, help="Measurement iterations per config") + parser.add_argument("--warmup", type=int, default=5, help="Warmup iterations per config") parser.add_argument("--output", default=f"benchmark_{datetime.now().strftime('%Y%m%d_%H%M')}.json") parser.add_argument("--no-tf32", action="store_true", help="Disable TF32 (enabled by default)") parser.add_argument("--compile", action="store_true", help="Enable torch.compile") args = parser.parse_args() - # Global Torch Settings if not args.no_tf32 and torch.cuda.is_available(): torch.backends.cuda.matmul.allow_tf32 = True torch.backends.cudnn.allow_tf32 = True print("TF32 Enabled") - # Parse resolutions custom_res = [] if args.resolutions: for r in args.resolutions: @@ -496,24 +396,19 @@ def main(): custom_res.append((h, w)) except ValueError: pass - # Detect Hardware hw = HardwareInfo() print("\n=== Hardware ===") print(f"GPU: {hw._get_gpu_info().get('devices', [{}])[0].get('name', 'None')}") print(f"VRAM: {hw.get_primary_gpu_vram_gb():.1f} GB") - # Build Configurations (1 per resolution) matrix = ConfigurationMatrix( - hw.get_primary_gpu_vram_gb(), pipelines=args.pipelines, resolutions=custom_res, - steps=[args.steps] ).build() print(f"\nPlanned Configurations: {len(matrix)}") if not matrix: return - # Run runner = BenchmarkRunner(args.warmup, args.iterations, compile_model=args.compile) results = [] @@ -529,7 +424,6 @@ def main(): except KeyboardInterrupt: print("\nStopped.") - # Save data = { "metadata": {"timestamp": datetime.now().isoformat(), "args": vars(args)}, "hardware": hw.to_dict(), From 8bbc9c9128542d8bc71f05f16a36d127b490f30b Mon Sep 17 00:00:00 2001 From: Varshith Bathini Date: Fri, 2 Jan 2026 12:38:34 +0000 Subject: [PATCH 3/6] fix: pid path --- benchmark.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/benchmark.py b/benchmark.py index 28e22484..39cc1176 100644 --- a/benchmark.py +++ b/benchmark.py @@ -267,13 +267,19 @@ def run_config(self, config: dict) -> dict: try: monitor.start() + output = None for _ in range(self.iterations): t0 = time.time() output = pipeline(**inputs) latency = time.time() - t0 latencies.append(latency) fps_measures.append(output.shape[0] / latency) + + if output is not None: + output = output.cpu() del output + torch.cuda.synchronize() + torch.cuda.empty_cache() finally: try: monitor.stop() @@ -320,7 +326,7 @@ def _init_pipeline(self, config: dict): pid = config["pipeline_id"] pipeline_class = PipelineRegistry.get(pid) - model_config = OmegaConf.load(Path(__file__).parent / "src/scope/core/pipelines" / pid / "model.yaml") + model_config = OmegaConf.load(Path(__file__).parent / "src/scope/core/pipelines" / pid.replace("-", "_") / "model.yaml") pipeline_config = { "model_dir": str(get_models_dir()), "model_config": model_config, @@ -340,19 +346,19 @@ def model_path(p): return str(get_model_file_path(p)) "generator_path": model_path("LongLive-1.3B/models/longlive_base.pt"), "lora_path": model_path("LongLive-1.3B/models/lora.pt") } - elif pid == "krea_realtime_video": + elif pid == "krea-realtime-video": paths = { "generator_path": model_path("krea-realtime-video/krea-realtime-video-14b.safetensors"), "vae_path": model_path("Wan2.1-T2V-1.3B/Wan2.1_VAE.pth") } - elif pid == "reward_forcing": + elif pid == "reward-forcing": paths = {"generator_path": model_path("Reward-Forcing-T2V-1.3B/rewardforcing.pt")} pipeline_config.update(paths) if "text_encoder_path" not in pipeline_config: pipeline_config["text_encoder_path"] = wan_enc if "tokenizer_path" not in pipeline_config: pipeline_config["tokenizer_path"] = wan_tok - quantization = Quantization.FP8_E4M3FN if pid == "krea_realtime_video" else None + quantization = Quantization.FP8_E4M3FN if pid == "krea-realtime-video" else None args = { "config": OmegaConf.create(pipeline_config), "device": self.device, @@ -361,7 +367,7 @@ def model_path(p): return str(get_model_file_path(p)) if quantization: args.update({"quantization": quantization}) - if pid == "krea_realtime_video": + if pid == "krea-realtime-video": args["compile"] = self.compile_model return pipeline_class(**args) From e83234a7bf0c5395af46f96a2f135728a7c83606 Mon Sep 17 00:00:00 2001 From: Varshith Bathini Date: Mon, 5 Jan 2026 14:50:56 +0000 Subject: [PATCH 4/6] fix: ruff --- benchmark.py | 52 ++++++++++++++++++++++++++++++++-------------------- 1 file changed, 32 insertions(+), 20 deletions(-) diff --git a/benchmark.py b/benchmark.py index 39cc1176..f7234084 100644 --- a/benchmark.py +++ b/benchmark.py @@ -2,24 +2,24 @@ import gc import json import platform +import statistics import threading import time -import statistics from datetime import datetime from pathlib import Path from typing import Any -import torch -import pynvml import psutil -import statistics +import pynvml +import torch from omegaconf import OmegaConf -from scope.core.pipelines.utils import Quantization +from scope.core.config import get_model_file_path, get_models_dir from scope.core.pipelines.registry import PipelineRegistry +from scope.core.pipelines.utils import Quantization from scope.server.download_models import download_models from scope.server.models_config import models_are_downloaded -from scope.core.config import get_model_file_path, get_models_dir + class HardwareInfo: """Collects and stores hardware information.""" @@ -47,11 +47,13 @@ def _get_gpu_info(self) -> dict[str, Any]: for i in range(gpu_info["count"]): handle = pynvml.nvmlDeviceGetHandleByIndex(i) name = pynvml.nvmlDeviceGetName(handle) - if isinstance(name, bytes): name = name.decode("utf-8") + if isinstance(name, bytes): + name = name.decode("utf-8") mem = pynvml.nvmlDeviceGetMemoryInfo(handle) driver = pynvml.nvmlSystemGetDriverVersion() - if isinstance(driver, bytes): driver = driver.decode("utf-8") + if isinstance(driver, bytes): + driver = driver.decode("utf-8") gpu_info["devices"].append({ "index": i, @@ -116,14 +118,16 @@ def __init__(self, interval_ms: int = 100, device_index: int = 0): self._pynvml_initialized = True def start(self): - if self._monitoring: return + if self._monitoring: + return self._monitoring = True self._samples = [] self._thread = threading.Thread(target=self._monitor_loop, daemon=True) self._thread.start() def stop(self): - if not self._monitoring: return + if not self._monitoring: + return self._monitoring = False if self._thread: self._thread.join(timeout=2.0) @@ -148,8 +152,10 @@ def _collect_sample(self) -> dict[str, Any]: return sample def get_statistics(self) -> dict[str, float]: - with self._lock: samples = self._samples.copy() - if not samples: return {} + with self._lock: + samples = self._samples.copy() + if not samples: + return {} stats = {} keys = ["gpu_memory_allocated_gb", "gpu_utilization_percent", "system_cpu_percent"] @@ -211,7 +217,8 @@ def _get_resolutions(self, pid: str) -> list[tuple[int, int]]: return self.custom_resolutions pipeline_class = PipelineRegistry.get(pid) - if not pipeline_class: return [] + if not pipeline_class: + return [] default_cfg = pipeline_class.get_config_class()() res_set = {(default_cfg.height, default_cfg.width)} @@ -219,7 +226,7 @@ def _get_resolutions(self, pid: str) -> list[tuple[int, int]]: for h, w in self.STANDARD_RESOLUTIONS: res_set.add((h, w)) - return sorted(list(res_set)) + return sorted(res_set) class BenchmarkRunner: @@ -258,7 +265,7 @@ def run_config(self, config: dict) -> dict: for _ in range(self.warmup_iterations): pipeline(**inputs) except Exception as e: - raise Exception(f"Warmup failed: {e}") + raise Exception(f"Warmup failed: {e}") from e print(f"Measuring ({self.iterations} iterations)...") monitor = ResourceMonitor() @@ -355,8 +362,10 @@ def model_path(p): return str(get_model_file_path(p)) paths = {"generator_path": model_path("Reward-Forcing-T2V-1.3B/rewardforcing.pt")} pipeline_config.update(paths) - if "text_encoder_path" not in pipeline_config: pipeline_config["text_encoder_path"] = wan_enc - if "tokenizer_path" not in pipeline_config: pipeline_config["tokenizer_path"] = wan_tok + if "text_encoder_path" not in pipeline_config: + pipeline_config["text_encoder_path"] = wan_enc + if "tokenizer_path" not in pipeline_config: + pipeline_config["tokenizer_path"] = wan_tok quantization = Quantization.FP8_E4M3FN if pid == "krea-realtime-video" else None args = { @@ -400,7 +409,8 @@ def main(): try: h, w = map(int, r.split("x")) custom_res.append((h, w)) - except ValueError: pass + except ValueError: + pass hw = HardwareInfo() print("\n=== Hardware ===") @@ -413,7 +423,8 @@ def main(): ).build() print(f"\nPlanned Configurations: {len(matrix)}") - if not matrix: return + if not matrix: + return runner = BenchmarkRunner(args.warmup, args.iterations, compile_model=args.compile) results = [] @@ -435,7 +446,8 @@ def main(): "hardware": hw.to_dict(), "results": results } - with open(args.output, "w") as f: json.dump(data, f, indent=2) + with open(args.output, "w") as f: + json.dump(data, f, indent=2) print(f"\nSaved to {args.output}") if __name__ == "__main__": From 2e3c48c182f7e0e403df6cef5f398cecf42502a2 Mon Sep 17 00:00:00 2001 From: Varshith Bathini Date: Mon, 5 Jan 2026 15:05:03 +0000 Subject: [PATCH 5/6] fix: ruff format --- benchmark.py | 126 +++++++++++++++++++++++++++++++++++---------------- 1 file changed, 88 insertions(+), 38 deletions(-) diff --git a/benchmark.py b/benchmark.py index f7234084..5dae764c 100644 --- a/benchmark.py +++ b/benchmark.py @@ -55,22 +55,26 @@ def _get_gpu_info(self) -> dict[str, Any]: if isinstance(driver, bytes): driver = driver.decode("utf-8") - gpu_info["devices"].append({ - "index": i, - "name": name, - "memory_total_gb": mem.total / (1024**3), - "driver_version": driver, - }) + gpu_info["devices"].append( + { + "index": i, + "name": name, + "memory_total_gb": mem.total / (1024**3), + "driver_version": driver, + } + ) pynvml.nvmlShutdown() if not gpu_info["devices"]: for i in range(gpu_info["count"]): props = torch.cuda.get_device_properties(i) - gpu_info["devices"].append({ - "index": i, - "name": props.name, - "memory_total_gb": props.total_memory / (1024**3), - }) + gpu_info["devices"].append( + { + "index": i, + "name": props.name, + "memory_total_gb": props.total_memory / (1024**3), + } + ) return gpu_info @@ -83,7 +87,10 @@ def _get_cpu_info(self) -> dict[str, Any]: def _get_memory_info(self) -> dict[str, Any]: mem = psutil.virtual_memory() - return {"total_gb": mem.total / (1024**3), "available_gb": mem.available / (1024**3)} + return { + "total_gb": mem.total / (1024**3), + "available_gb": mem.available / (1024**3), + } def _get_platform_info(self) -> dict[str, Any]: return { @@ -101,6 +108,7 @@ def get_primary_gpu_vram_gb(self) -> float: return 0.0 return self._info["gpu"]["devices"][0]["memory_total_gb"] + class ResourceMonitor: def __init__(self, interval_ms: int = 100, device_index: int = 0): self.interval_ms = interval_ms @@ -143,7 +151,9 @@ def _monitor_loop(self): def _collect_sample(self) -> dict[str, Any]: sample = {} if torch.cuda.is_available(): - sample["gpu_memory_allocated_gb"] = torch.cuda.memory_allocated(self.device_index) / (1024**3) + sample["gpu_memory_allocated_gb"] = torch.cuda.memory_allocated( + self.device_index + ) / (1024**3) if self._pynvml_initialized and self._gpu_handle: util = pynvml.nvmlDeviceGetUtilizationRates(self._gpu_handle) sample["gpu_utilization_percent"] = util.gpu @@ -158,7 +168,11 @@ def get_statistics(self) -> dict[str, float]: return {} stats = {} - keys = ["gpu_memory_allocated_gb", "gpu_utilization_percent", "system_cpu_percent"] + keys = [ + "gpu_memory_allocated_gb", + "gpu_utilization_percent", + "system_cpu_percent", + ] for key in keys: values = [s[key] for s in samples if key in s] if values: @@ -238,7 +252,9 @@ def __init__(self, warmup_iterations=5, iterations=30, compile_model=False): def run_config(self, config: dict) -> dict: pipeline_id = config["pipeline_id"] - print(f"\n--- Benchmarking {pipeline_id} [{config['height']}x{config['width']}] ---") + print( + f"\n--- Benchmarking {pipeline_id} [{config['height']}x{config['width']}] ---" + ) if not models_are_downloaded(pipeline_id): print(f"Downloading models for {pipeline_id}...") @@ -256,9 +272,14 @@ def run_config(self, config: dict) -> dict: if pipeline_id == "streamdiffusionv2": inputs["video"] = torch.randn( - 1, 3, 4, config["height"], config["width"], - device=self.device, dtype=torch.bfloat16 - ) + 1, + 3, + 4, + config["height"], + config["width"], + device=self.device, + dtype=torch.bfloat16, + ) print(f"Warmup ({self.warmup_iterations} iterations)...") try: @@ -315,10 +336,12 @@ def run_config(self, config: dict) -> dict: "latency_min_sec": round(min_latency, 4), "latency_max_sec": round(max_latency, 4), "jitter_sec": round(jitter, 6), - **resource_stats + **resource_stats, } - print(f"-> FPS: {results['fps_avg']} | Latency: {results['latency_avg_sec']}s | Jitter: {results['jitter_sec']}s") + print( + f"-> FPS: {results['fps_avg']} | Latency: {results['latency_avg_sec']}s | Jitter: {results['jitter_sec']}s" + ) return results except Exception as e: @@ -333,7 +356,12 @@ def _init_pipeline(self, config: dict): pid = config["pipeline_id"] pipeline_class = PipelineRegistry.get(pid) - model_config = OmegaConf.load(Path(__file__).parent / "src/scope/core/pipelines" / pid.replace("-", "_") / "model.yaml") + model_config = OmegaConf.load( + Path(__file__).parent + / "src/scope/core/pipelines" + / pid.replace("-", "_") + / "model.yaml" + ) pipeline_config = { "model_dir": str(get_models_dir()), "model_config": model_config, @@ -341,25 +369,35 @@ def _init_pipeline(self, config: dict): "width": config["width"], } - def model_path(p): return str(get_model_file_path(p)) + def model_path(p): + return str(get_model_file_path(p)) + wan_enc = model_path("WanVideo_comfy/umt5-xxl-enc-fp8_e4m3fn.safetensors") wan_tok = model_path("Wan2.1-T2V-1.3B/google/umt5-xxl") paths = {} if pid == "streamdiffusionv2": - paths = {"generator_path": model_path("StreamDiffusionV2/wan_causal_dmd_v2v/model.pt")} + paths = { + "generator_path": model_path( + "StreamDiffusionV2/wan_causal_dmd_v2v/model.pt" + ) + } elif pid == "longlive": paths = { "generator_path": model_path("LongLive-1.3B/models/longlive_base.pt"), - "lora_path": model_path("LongLive-1.3B/models/lora.pt") + "lora_path": model_path("LongLive-1.3B/models/lora.pt"), } elif pid == "krea-realtime-video": paths = { - "generator_path": model_path("krea-realtime-video/krea-realtime-video-14b.safetensors"), - "vae_path": model_path("Wan2.1-T2V-1.3B/Wan2.1_VAE.pth") + "generator_path": model_path( + "krea-realtime-video/krea-realtime-video-14b.safetensors" + ), + "vae_path": model_path("Wan2.1-T2V-1.3B/Wan2.1_VAE.pth"), } elif pid == "reward-forcing": - paths = {"generator_path": model_path("Reward-Forcing-T2V-1.3B/rewardforcing.pt")} + paths = { + "generator_path": model_path("Reward-Forcing-T2V-1.3B/rewardforcing.pt") + } pipeline_config.update(paths) if "text_encoder_path" not in pipeline_config: @@ -371,7 +409,7 @@ def model_path(p): return str(get_model_file_path(p)) args = { "config": OmegaConf.create(pipeline_config), "device": self.device, - "dtype": torch.bfloat16 + "dtype": torch.bfloat16, } if quantization: args.update({"quantization": quantization}) @@ -387,14 +425,23 @@ def _clear_memory(self): torch.cuda.synchronize() torch.cuda.empty_cache() + def main(): parser = argparse.ArgumentParser(description="Scope Benchmark") parser.add_argument("--pipelines", nargs="+", help="Specific pipelines to test") parser.add_argument("--resolutions", nargs="+", help="Resolutions (e.g. 512x512)") - parser.add_argument("--iterations", type=int, default=30, help="Measurement iterations per config") - parser.add_argument("--warmup", type=int, default=5, help="Warmup iterations per config") - parser.add_argument("--output", default=f"benchmark_{datetime.now().strftime('%Y%m%d_%H%M')}.json") - parser.add_argument("--no-tf32", action="store_true", help="Disable TF32 (enabled by default)") + parser.add_argument( + "--iterations", type=int, default=30, help="Measurement iterations per config" + ) + parser.add_argument( + "--warmup", type=int, default=5, help="Warmup iterations per config" + ) + parser.add_argument( + "--output", default=f"benchmark_{datetime.now().strftime('%Y%m%d_%H%M')}.json" + ) + parser.add_argument( + "--no-tf32", action="store_true", help="Disable TF32 (enabled by default)" + ) parser.add_argument("--compile", action="store_true", help="Enable torch.compile") args = parser.parse_args() @@ -433,22 +480,25 @@ def main(): for i, config in enumerate(matrix, 1): print(f"\n[{i}/{len(matrix)}]", end=" ") metrics = runner.run_config(config) - results.append({ - "pipeline": config["pipeline_id"], - "resolution": f"{config['height']}x{config['width']}", - "metrics": metrics - }) + results.append( + { + "pipeline": config["pipeline_id"], + "resolution": f"{config['height']}x{config['width']}", + "metrics": metrics, + } + ) except KeyboardInterrupt: print("\nStopped.") data = { "metadata": {"timestamp": datetime.now().isoformat(), "args": vars(args)}, "hardware": hw.to_dict(), - "results": results + "results": results, } with open(args.output, "w") as f: json.dump(data, f, indent=2) print(f"\nSaved to {args.output}") + if __name__ == "__main__": main() From 53370e190f5cb57ef233763c0088e6f53875015a Mon Sep 17 00:00:00 2001 From: Varshith Bathini Date: Mon, 5 Jan 2026 15:08:01 +0000 Subject: [PATCH 6/6] fix: reqs --- pyproject.toml | 1 - uv.lock | 11 ----------- 2 files changed, 12 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index a94c395a..c4b041a9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -111,7 +111,6 @@ dev = [ benchmark = [ "psutil>=6.1.0", "nvidia-ml-py>=12.560.30", - "py-cpuinfo>=9.0.0", ] [tool.ruff] diff --git a/uv.lock b/uv.lock index 60147016..db06b45a 100644 --- a/uv.lock +++ b/uv.lock @@ -658,7 +658,6 @@ dependencies = [ benchmark = [ { name = "nvidia-ml-py" }, { name = "psutil" }, - { name = "py-cpuinfo" }, ] dev = [ { name = "freezegun" }, @@ -709,7 +708,6 @@ requires-dist = [ benchmark = [ { name = "nvidia-ml-py", specifier = ">=12.560.30" }, { name = "psutil", specifier = ">=6.1.0" }, - { name = "py-cpuinfo", specifier = ">=9.0.0" }, ] dev = [ { name = "freezegun", specifier = ">=1.5.5" }, @@ -2313,15 +2311,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c9/ad/33b2ccec09bf96c2b2ef3f9a6f66baac8253d7565d8839e024a6b905d45d/psutil-7.1.3-cp37-abi3-win_arm64.whl", hash = "sha256:bd0d69cee829226a761e92f28140bec9a5ee9d5b4fb4b0cc589068dbfff559b1", size = 244608, upload-time = "2025-11-02T12:26:36.136Z" }, ] -[[package]] -name = "py-cpuinfo" -version = "9.0.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/37/a8/d832f7293ebb21690860d2e01d8115e5ff6f2ae8bbdc953f0eb0fa4bd2c7/py-cpuinfo-9.0.0.tar.gz", hash = "sha256:3cdbbf3fac90dc6f118bfd64384f309edeadd902d7c8fb17f02ffa1fc3f49690", size = 104716, upload-time = "2022-10-25T20:38:06.303Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/e0/a9/023730ba63db1e494a271cb018dcd361bd2c917ba7004c3e49d5daf795a2/py_cpuinfo-9.0.0-py3-none-any.whl", hash = "sha256:859625bc251f64e21f077d099d4162689c762b5d6a4c3c97553d56241c9674d5", size = 22335, upload-time = "2022-10-25T20:38:27.636Z" }, -] - [[package]] name = "pycparser" version = "2.23"