Camshowrecordings/model/sam_samantha/5 May 2026
# Normalize img_norm = img_rgb.astype(np.float32) / 255.0 mean = np.array(cfg["preprocess"]["mean"]) std = np.array(cfg["preprocess"]["std"]) img_norm = (img_norm - mean) / std
model: name: sam_samantha version: 5 backbone: vit_h image_size: 1024 num_classes: 1 # Usually segmentation → binary mask preprocess: normalize: true mean: [0.485, 0.456, 0.406] std: [0.229, 0.224, 0.225] device: cuda Below is a minimal, self‑contained script that loads the model and runs a single inference on a video frame.
# ------------------------------------------------------------------ # 5️⃣ Run inference # ------------------------------------------------------------------ def infer(frame: np.ndarray): x = preprocess(frame, cfg) with torch.no_grad(): # The exact call depends on the model; many SAM‑style models return a mask mask = model(x) # → (B, 1, H, W) logits or probabilities # Post‑process: convert logits → binary mask mask = torch.sigmoid(mask) > 0.5 mask_np = mask.squeeze().cpu().numpy().astype(np.uint8) * 255 return mask_np camshowrecordings/model/sam_samantha/5
cd model/sam_samantha/5 ls -l Typical files you’ll see:
if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("input_video", type=Path) parser.add_argument("output_video", type=Path) parser.add_argument("--stride", type=int, default=5, help="Run inference every N frames (default=5)") args = parser.parse_args() process_video(args.input_video, args.output_video, args.stride) # Normalize img_norm = img_rgb
fourcc = cv2.VideoWriter_fourcc(*"mp4v") out = cv2.VideoWriter(str(out_path), fourcc, fps, (w, h))
# ------------------------------------------------------------------ # 4️⃣ Pre‑process a single frame (example uses OpenCV) # ------------------------------------------------------------------ def preprocess(img: np.ndarray, cfg) -> torch.Tensor: # Resize while keeping aspect ratio (optional) target_sz = cfg["model"]["image_size"] img_resized = cv2.resize(img, (target_sz, target_sz)) 0.406] std: [0.229
def process_video(in_path: Path, out_path: Path, stride: int = 5): cap = cv2.VideoCapture(str(in_path)) if not cap.isOpened(): raise RuntimeError(f"Cannot open in_path")
