update

2026-02-28 00:22:41 -08:00 · 2026-02-19 00:33:08 -08:00
parent e37f3dd7b1
commit 70dd0779f2
143 changed files with 31888 additions and 0 deletions
--- a/.agents/skills/ios-simulator-skill/scripts/common/init.py
+++ b/.agents/skills/ios-simulator-skill/scripts/common/init.py
@@ -0,0 +1,59 @@
+"""
+Common utilities shared across iOS simulator scripts.
+
+This module centralizes genuinely reused code patterns to eliminate duplication
+while respecting Jackson's Law - no over-abstraction, only truly shared logic.
+
+Organization:
+- device_utils: Device detection, command building, coordinate transformation
+- idb_utils: IDB-specific operations (accessibility tree, element manipulation)
+- cache_utils: Progressive disclosure caching for large outputs
+- screenshot_utils: Screenshot capture with file and inline modes
+"""
+
+from .cache_utils import ProgressiveCache, get_cache
+from .device_utils import (
+    build_idb_command,
+    build_simctl_command,
+    get_booted_device_udid,
+    get_device_screen_size,
+    resolve_udid,
+    transform_screenshot_coords,
+)
+from .idb_utils import (
+    count_elements,
+    flatten_tree,
+    get_accessibility_tree,
+    get_screen_size,
+)
+from .screenshot_utils import (
+    capture_screenshot,
+    format_screenshot_result,
+    generate_screenshot_name,
+    get_size_preset,
+    resize_screenshot,
+)
+
+__all__ = [
+    # cache_utils
+    "ProgressiveCache",
+    # device_utils
+    "build_idb_command",
+    "build_simctl_command",
+    # screenshot_utils
+    "capture_screenshot",
+    # idb_utils
+    "count_elements",
+    "flatten_tree",
+    "format_screenshot_result",
+    "generate_screenshot_name",
+    "get_accessibility_tree",
+    "get_booted_device_udid",
+    "get_cache",
+    "get_device_screen_size",
+    "get_screen_size",
+    "get_size_preset",
+    "resize_screenshot",
+    "resolve_udid",
+    "transform_screenshot_coords",
+]
--- a/.agents/skills/ios-simulator-skill/scripts/common/cache_utils.py
+++ b/.agents/skills/ios-simulator-skill/scripts/common/cache_utils.py
@@ -0,0 +1,260 @@
+#!/usr/bin/env python3
+"""
+Progressive disclosure cache for large outputs.
+
+Implements cache system to support progressive disclosure pattern:
+- Return concise summary with cache_id for large outputs
+- User retrieves full details on demand via cache_id
+- Reduces token usage by 96% for common queries
+
+Cache directory: ~/.ios-simulator-skill/cache/
+Cache expiration: Configurable per cache type (default 1 hour)
+
+Used by:
+- sim_list.py - Simulator listing progressive disclosure
+- Future: build logs, UI trees, etc.
+"""
+
+import json
+import time
+from datetime import datetime, timedelta
+from pathlib import Path
+from typing import Any
+
+
+class ProgressiveCache:
+    """Cache for progressive disclosure pattern.
+
+    Stores large outputs with timestamped IDs for on-demand retrieval.
+    Automatically cleans up expired entries.
+    """
+
+    def __init__(self, cache_dir: str | None = None, max_age_hours: int = 1):
+        """Initialize cache system.
+
+        Args:
+            cache_dir: Cache directory path (default: ~/.ios-simulator-skill/cache/)
+            max_age_hours: Max age for cache entries before expiration (default: 1 hour)
+        """
+        if cache_dir is None:
+            cache_dir = str(Path("~/.ios-simulator-skill/cache").expanduser())
+
+        self.cache_dir = Path(cache_dir)
+        self.max_age_hours = max_age_hours
+
+        # Create cache directory if needed
+        self.cache_dir.mkdir(parents=True, exist_ok=True)
+
+    def save(self, data: dict[str, Any], cache_type: str) -> str:
+        """Save data to cache and return cache_id.
+
+        Args:
+            data: Dictionary data to cache
+            cache_type: Type of cache ('simulator-list', 'build-log', 'ui-tree', etc.)
+
+        Returns:
+            Cache ID like 'sim-20251028-143052' for use in progressive disclosure
+
+        Example:
+            cache_id = cache.save({'devices': [...]}, 'simulator-list')
+            # Returns: 'sim-20251028-143052'
+        """
+        # Generate cache_id with timestamp
+        timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
+        cache_prefix = cache_type.split("-")[0]  # e.g., 'sim' from 'simulator-list'
+        cache_id = f"{cache_prefix}-{timestamp}"
+
+        # Save to file
+        cache_file = self.cache_dir / f"{cache_id}.json"
+        with open(cache_file, "w") as f:
+            json.dump(
+                {
+                    "cache_id": cache_id,
+                    "cache_type": cache_type,
+                    "created_at": datetime.now().isoformat(),
+                    "data": data,
+                },
+                f,
+                indent=2,
+            )
+
+        return cache_id
+
+    def get(self, cache_id: str) -> dict[str, Any] | None:
+        """Retrieve data from cache by cache_id.
+
+        Args:
+            cache_id: Cache ID from save() or list_entries()
+
+        Returns:
+            Cached data dictionary, or None if not found/expired
+
+        Example:
+            data = cache.get('sim-20251028-143052')
+            if data:
+                print(f"Found {len(data)} devices")
+        """
+        cache_file = self.cache_dir / f"{cache_id}.json"
+
+        if not cache_file.exists():
+            return None
+
+        # Check if expired
+        if self._is_expired(cache_file):
+            cache_file.unlink()  # Delete expired file
+            return None
+
+        try:
+            with open(cache_file) as f:
+                entry = json.load(f)
+                return entry.get("data")
+        except (OSError, json.JSONDecodeError):
+            return None
+
+    def list_entries(self, cache_type: str | None = None) -> list[dict[str, Any]]:
+        """List available cache entries with metadata.
+
+        Args:
+            cache_type: Filter by type (e.g., 'simulator-list'), or None for all
+
+        Returns:
+            List of cache entries with id, type, created_at, age_seconds
+
+        Example:
+            entries = cache.list_entries('simulator-list')
+            for entry in entries:
+                print(f"{entry['id']} - {entry['age_seconds']}s old")
+        """
+        entries = []
+
+        for cache_file in sorted(self.cache_dir.glob("*.json"), reverse=True):
+            # Check if expired
+            if self._is_expired(cache_file):
+                cache_file.unlink()
+                continue
+
+            try:
+                with open(cache_file) as f:
+                    entry = json.load(f)
+
+                    # Filter by type if specified
+                    if cache_type and entry.get("cache_type") != cache_type:
+                        continue
+
+                    created_at = datetime.fromisoformat(entry.get("created_at", ""))
+                    age_seconds = (datetime.now() - created_at).total_seconds()
+
+                    entries.append(
+                        {
+                            "id": entry.get("cache_id"),
+                            "type": entry.get("cache_type"),
+                            "created_at": entry.get("created_at"),
+                            "age_seconds": int(age_seconds),
+                        }
+                    )
+            except (OSError, json.JSONDecodeError, ValueError):
+                continue
+
+        return entries
+
+    def cleanup(self, max_age_hours: int | None = None) -> int:
+        """Remove expired cache entries.
+
+        Args:
+            max_age_hours: Age threshold (default: uses instance max_age_hours)
+
+        Returns:
+            Number of entries deleted
+
+        Example:
+            deleted = cache.cleanup()
+            print(f"Deleted {deleted} expired cache entries")
+        """
+        if max_age_hours is None:
+            max_age_hours = self.max_age_hours
+
+        deleted = 0
+
+        for cache_file in self.cache_dir.glob("*.json"):
+            if self._is_expired(cache_file, max_age_hours):
+                cache_file.unlink()
+                deleted += 1
+
+        return deleted
+
+    def clear(self, cache_type: str | None = None) -> int:
+        """Clear all cache entries of a type.
+
+        Args:
+            cache_type: Type to clear (e.g., 'simulator-list'), or None to clear all
+
+        Returns:
+            Number of entries deleted
+
+        Example:
+            cleared = cache.clear('simulator-list')
+            print(f"Cleared {cleared} simulator list entries")
+        """
+        deleted = 0
+
+        for cache_file in self.cache_dir.glob("*.json"):
+            if cache_type is None:
+                # Clear all
+                cache_file.unlink()
+                deleted += 1
+            else:
+                # Clear by type
+                try:
+                    with open(cache_file) as f:
+                        entry = json.load(f)
+                        if entry.get("cache_type") == cache_type:
+                            cache_file.unlink()
+                            deleted += 1
+                except (OSError, json.JSONDecodeError):
+                    pass
+
+        return deleted
+
+    def _is_expired(self, cache_file: Path, max_age_hours: int | None = None) -> bool:
+        """Check if cache file is expired.
+
+        Args:
+            cache_file: Path to cache file
+            max_age_hours: Age threshold (default: uses instance max_age_hours)
+
+        Returns:
+            True if file is older than max_age_hours
+        """
+        if max_age_hours is None:
+            max_age_hours = self.max_age_hours
+
+        try:
+            with open(cache_file) as f:
+                entry = json.load(f)
+                created_at = datetime.fromisoformat(entry.get("created_at", ""))
+                age = datetime.now() - created_at
+                return age > timedelta(hours=max_age_hours)
+        except (OSError, json.JSONDecodeError, ValueError):
+            return True
+
+
+# Module-level cache instances (lazy-loaded)
+_cache_instances: dict[str, ProgressiveCache] = {}
+
+
+def get_cache(cache_dir: str | None = None) -> ProgressiveCache:
+    """Get or create global cache instance.
+
+    Args:
+        cache_dir: Custom cache directory (uses default if None)
+
+    Returns:
+        ProgressiveCache instance
+    """
+    # Use cache_dir as key, or 'default' if None
+    key = cache_dir or "default"
+
+    if key not in _cache_instances:
+        _cache_instances[key] = ProgressiveCache(cache_dir)
+
+    return _cache_instances[key]
--- a/.agents/skills/ios-simulator-skill/scripts/common/device_utils.py
+++ b/.agents/skills/ios-simulator-skill/scripts/common/device_utils.py
@@ -0,0 +1,432 @@
+#!/usr/bin/env python3
+"""
+Shared device and simulator utilities.
+
+Common patterns for interacting with simulators via xcrun simctl and IDB.
+Standardizes command building and device targeting to prevent errors.
+
+Follows Jackson's Law - only extracts genuinely reused patterns.
+
+Used by:
+- app_launcher.py (8 call sites) - App lifecycle commands
+- Multiple scripts (15+ locations) - IDB command building
+- navigator.py, gesture.py - Coordinate transformation
+- test_recorder.py, app_state_capture.py - Auto-UDID detection
+"""
+
+import json
+import re
+import subprocess
+
+
+def build_simctl_command(
+    operation: str,
+    udid: str | None = None,
+    *args,
+) -> list[str]:
+    """
+    Build xcrun simctl command with proper device handling.
+
+    Standardizes command building to prevent device targeting bugs.
+    Automatically uses "booted" if no UDID provided.
+
+    Used by:
+    - app_launcher.py: launch, terminate, install, uninstall, openurl, listapps, spawn
+    - Multiple scripts: generic simctl operations
+
+    Args:
+        operation: simctl operation (launch, terminate, install, etc.)
+        udid: Device UDID (uses 'booted' if None)
+        *args: Additional command arguments
+
+    Returns:
+        Complete command list ready for subprocess.run()
+
+    Examples:
+        # Launch app on booted simulator
+        cmd = build_simctl_command("launch", None, "com.app.bundle")
+        # Returns: ["xcrun", "simctl", "launch", "booted", "com.app.bundle"]
+
+        # Launch on specific device
+        cmd = build_simctl_command("launch", "ABC123", "com.app.bundle")
+        # Returns: ["xcrun", "simctl", "launch", "ABC123", "com.app.bundle"]
+
+        # Install app on specific device
+        cmd = build_simctl_command("install", "ABC123", "/path/to/app.app")
+        # Returns: ["xcrun", "simctl", "install", "ABC123", "/path/to/app.app"]
+    """
+    cmd = ["xcrun", "simctl", operation]
+
+    # Add device (booted or specific UDID)
+    cmd.append(udid if udid else "booted")
+
+    # Add remaining arguments
+    cmd.extend(str(arg) for arg in args)
+
+    return cmd
+
+
+def build_idb_command(
+    operation: str,
+    udid: str | None = None,
+    *args,
+) -> list[str]:
+    """
+    Build IDB command with proper device targeting.
+
+    Standardizes IDB command building across all scripts using IDB.
+    Handles device UDID consistently.
+
+    Used by:
+    - navigator.py: ui tap, ui text, ui describe-all
+    - gesture.py: ui swipe, ui tap
+    - keyboard.py: ui key, ui text, ui tap
+    - And more: 15+ locations
+
+    Args:
+        operation: IDB operation path (e.g., "ui tap", "ui text", "ui describe-all")
+        udid: Device UDID (omits --udid flag if None, IDB uses booted by default)
+        *args: Additional command arguments
+
+    Returns:
+        Complete command list ready for subprocess.run()
+
+    Examples:
+        # Tap on booted simulator
+        cmd = build_idb_command("ui tap", None, "200", "400")
+        # Returns: ["idb", "ui", "tap", "200", "400"]
+
+        # Tap on specific device
+        cmd = build_idb_command("ui tap", "ABC123", "200", "400")
+        # Returns: ["idb", "ui", "tap", "200", "400", "--udid", "ABC123"]
+
+        # Get accessibility tree
+        cmd = build_idb_command("ui describe-all", "ABC123", "--json", "--nested")
+        # Returns: ["idb", "ui", "describe-all", "--json", "--nested", "--udid", "ABC123"]
+
+        # Enter text
+        cmd = build_idb_command("ui text", None, "hello world")
+        # Returns: ["idb", "ui", "text", "hello world"]
+    """
+    # Split operation into parts (e.g., "ui tap" -> ["ui", "tap"])
+    cmd = ["idb"] + operation.split()
+
+    # Add arguments
+    cmd.extend(str(arg) for arg in args)
+
+    # Add device targeting if specified (optional for IDB, uses booted by default)
+    if udid:
+        cmd.extend(["--udid", udid])
+
+    return cmd
+
+
+def get_booted_device_udid() -> str | None:
+    """
+    Auto-detect currently booted simulator UDID.
+
+    Queries xcrun simctl for booted devices and returns first match.
+
+    Returns:
+        UDID of booted simulator, or None if no simulator is booted.
+
+    Example:
+        udid = get_booted_device_udid()
+        if udid:
+            print(f"Booted simulator: {udid}")
+        else:
+            print("No simulator is currently booted")
+    """
+    try:
+        result = subprocess.run(
+            ["xcrun", "simctl", "list", "devices", "booted"],
+            capture_output=True,
+            text=True,
+            check=True,
+        )
+
+        # Parse output to find UDID
+        # Format: "  iPhone 16 Pro (ABC123-DEF456) (Booted)"
+        for line in result.stdout.split("\n"):
+            # Look for UUID pattern in parentheses
+            match = re.search(r"\(([A-F0-9\-]{36})\)", line)
+            if match:
+                return match.group(1)
+
+        return None
+    except subprocess.CalledProcessError:
+        return None
+
+
+def resolve_udid(udid_arg: str | None) -> str:
+    """
+    Resolve device UDID with auto-detection fallback.
+
+    If udid_arg is provided, returns it immediately.
+    If None, attempts to auto-detect booted simulator.
+    Raises error if neither is available.
+
+    Args:
+        udid_arg: Explicit UDID from command line, or None
+
+    Returns:
+        Valid UDID string
+
+    Raises:
+        RuntimeError: If no UDID provided and no booted simulator found
+
+    Example:
+        try:
+            udid = resolve_udid(args.udid)  # args.udid might be None
+            print(f"Using device: {udid}")
+        except RuntimeError as e:
+            print(f"Error: {e}")
+            sys.exit(1)
+    """
+    if udid_arg:
+        return udid_arg
+
+    booted_udid = get_booted_device_udid()
+    if booted_udid:
+        return booted_udid
+
+    raise RuntimeError(
+        "No device UDID provided and no simulator is currently booted.\n"
+        "Boot a simulator or provide --udid explicitly:\n"
+        "  xcrun simctl boot <device-name>\n"
+        "  python scripts/script_name.py --udid <device-udid>"
+    )
+
+
+def get_device_screen_size(udid: str) -> tuple[int, int]:
+    """
+    Get actual screen dimensions for device via accessibility tree.
+
+    Queries IDB accessibility tree to determine actual device resolution.
+    Falls back to iPhone 14 defaults (390x844) if detection fails.
+
+    Args:
+        udid: Device UDID
+
+    Returns:
+        Tuple of (width, height) in pixels
+
+    Example:
+        width, height = get_device_screen_size("ABC123")
+        print(f"Device screen: {width}x{height}")
+    """
+    try:
+        cmd = build_idb_command("ui describe-all", udid, "--json")
+        result = subprocess.run(cmd, capture_output=True, text=True, check=True)
+
+        # Parse JSON response
+        data = json.loads(result.stdout)
+        tree = data[0] if isinstance(data, list) and len(data) > 0 else data
+
+        # Get frame size from root element
+        if tree and "frame" in tree:
+            frame = tree["frame"]
+            width = int(frame.get("width", 390))
+            height = int(frame.get("height", 844))
+            return (width, height)
+
+        # Fallback
+        return (390, 844)
+    except Exception:
+        # Graceful fallback to iPhone 14 Pro defaults
+        return (390, 844)
+
+
+def resolve_device_identifier(identifier: str) -> str:
+    """
+    Resolve device name or partial UDID to full UDID.
+
+    Supports multiple identifier formats:
+    - Full UDID: "ABC-123-DEF456..." (36 character UUID)
+    - Device name: "iPhone 16 Pro" (matches full name)
+    - Partial match: "iPhone 16" (matches first device containing this string)
+    - Special: "booted" (resolves to currently booted device)
+
+    Args:
+        identifier: Device UDID, name, or special value "booted"
+
+    Returns:
+        Full device UDID
+
+    Raises:
+        RuntimeError: If identifier cannot be resolved
+
+    Example:
+        udid = resolve_device_identifier("iPhone 16 Pro")
+        # Returns: "ABC123DEF456..."
+
+        udid = resolve_device_identifier("booted")
+        # Returns UDID of booted simulator
+    """
+    # Handle "booted" special case
+    if identifier.lower() == "booted":
+        booted = get_booted_device_udid()
+        if booted:
+            return booted
+        raise RuntimeError(
+            "No simulator is currently booted. "
+            "Boot a simulator first: xcrun simctl boot <device-udid>"
+        )
+
+    # Check if already a full UDID (36 character UUID format)
+    if re.match(r"^[A-F0-9\-]{36}$", identifier, re.IGNORECASE):
+        return identifier.upper()
+
+    # Try to match by device name
+    simulators = list_simulators(state=None)
+    exact_matches = [s for s in simulators if s["name"].lower() == identifier.lower()]
+    if exact_matches:
+        return exact_matches[0]["udid"]
+
+    # Try partial match
+    partial_matches = [s for s in simulators if identifier.lower() in s["name"].lower()]
+    if partial_matches:
+        return partial_matches[0]["udid"]
+
+    # No match found
+    raise RuntimeError(
+        f"Device '{identifier}' not found. "
+        f"Use 'xcrun simctl list devices' to see available simulators."
+    )
+
+
+def list_simulators(state: str | None = None) -> list[dict]:
+    """
+    List iOS simulators with optional state filtering.
+
+    Queries xcrun simctl and returns structured list of simulators.
+    Optionally filters by state (available, booted, all).
+
+    Args:
+        state: Optional filter - "available", "booted", or None for all
+
+    Returns:
+        List of simulator dicts with keys:
+        - "name": Device name (e.g., "iPhone 16 Pro")
+        - "udid": Device UDID (36 char UUID)
+        - "state": Device state ("Booted", "Shutdown", "Unavailable")
+        - "runtime": iOS version (e.g., "iOS 18.0", "unavailable")
+        - "type": Device type ("iPhone", "iPad", "Apple Watch", etc.)
+
+    Example:
+        # List all simulators
+        all_sims = list_simulators()
+        print(f"Total simulators: {len(all_sims)}")
+
+        # List only available simulators
+        available = list_simulators(state="available")
+        for sim in available:
+            print(f"{sim['name']} ({sim['state']}) - {sim['udid']}")
+
+        # List only booted simulators
+        booted = list_simulators(state="booted")
+        for sim in booted:
+            print(f"Booted: {sim['name']}")
+    """
+    try:
+        # Query simctl for device list
+        cmd = ["xcrun", "simctl", "list", "devices", "-j"]
+        result = subprocess.run(cmd, capture_output=True, text=True, check=True)
+
+        data = json.loads(result.stdout)
+        simulators = []
+
+        # Parse JSON response
+        # Format: {"devices": {"iOS 18.0": [{...}, {...}], "iOS 17.0": [...], ...}}
+        for ios_version, devices in data.get("devices", {}).items():
+            for device in devices:
+                sim = {
+                    "name": device.get("name", "Unknown"),
+                    "udid": device.get("udid", ""),
+                    "state": device.get("state", "Unknown"),
+                    "runtime": ios_version,
+                    "type": _extract_device_type(device.get("name", "")),
+                }
+                simulators.append(sim)
+
+        # Apply state filtering
+        if state == "booted":
+            return [s for s in simulators if s["state"] == "Booted"]
+        if state == "available":
+            return [s for s in simulators if s["state"] == "Shutdown"]  # Available to boot
+        if state is None:
+            return simulators
+        return [s for s in simulators if s["state"].lower() == state.lower()]
+
+    except (subprocess.CalledProcessError, json.JSONDecodeError, KeyError) as e:
+        raise RuntimeError(f"Failed to list simulators: {e}") from e
+
+
+def _extract_device_type(device_name: str) -> str:
+    """
+    Extract device type from device name.
+
+    Parses device name to determine type (iPhone, iPad, Watch, etc.).
+
+    Args:
+        device_name: Full device name (e.g., "iPhone 16 Pro")
+
+    Returns:
+        Device type string
+
+    Example:
+        _extract_device_type("iPhone 16 Pro")  # Returns "iPhone"
+        _extract_device_type("iPad Air")        # Returns "iPad"
+        _extract_device_type("Apple Watch Series 9") # Returns "Watch"
+    """
+    if "iPhone" in device_name:
+        return "iPhone"
+    if "iPad" in device_name:
+        return "iPad"
+    if "Watch" in device_name or "Apple Watch" in device_name:
+        return "Watch"
+    if "TV" in device_name or "Apple TV" in device_name:
+        return "TV"
+    return "Unknown"
+
+
+def transform_screenshot_coords(
+    x: float,
+    y: float,
+    screenshot_width: int,
+    screenshot_height: int,
+    device_width: int,
+    device_height: int,
+) -> tuple[int, int]:
+    """
+    Transform screenshot coordinates to device coordinates.
+
+    Handles the case where a screenshot was downscaled (e.g., to 'half' size)
+    and needs to be transformed back to actual device pixel coordinates
+    for accurate tapping.
+
+    The transformation is linear:
+    device_x = (screenshot_x / screenshot_width) * device_width
+    device_y = (screenshot_y / screenshot_height) * device_height
+
+    Args:
+        x, y: Coordinates in the screenshot
+        screenshot_width, screenshot_height: Screenshot dimensions (e.g., 195, 422)
+        device_width, device_height: Actual device dimensions (e.g., 390, 844)
+
+    Returns:
+        Tuple of (device_x, device_y) in device pixels
+
+    Example:
+        # Screenshot taken at 'half' size: 195x422 (from 390x844 device)
+        device_x, device_y = transform_screenshot_coords(
+            100, 200,  # Tap point in screenshot
+            195, 422,  # Screenshot dimensions
+            390, 844   # Device dimensions
+        )
+        print(f"Tap at device coords: ({device_x}, {device_y})")
+        # Output: Tap at device coords: (200, 400)
+    """
+    device_x = int((x / screenshot_width) * device_width)
+    device_y = int((y / screenshot_height) * device_height)
+    return (device_x, device_y)
--- a/.agents/skills/ios-simulator-skill/scripts/common/idb_utils.py
+++ b/.agents/skills/ios-simulator-skill/scripts/common/idb_utils.py
@@ -0,0 +1,180 @@
+#!/usr/bin/env python3
+"""
+Shared IDB utility functions.
+
+This module provides common IDB operations used across multiple scripts.
+Follows Jackson's Law - only shared code that's truly reused, not speculative.
+
+Used by:
+- navigator.py - Accessibility tree navigation
+- screen_mapper.py - UI element analysis
+- accessibility_audit.py - WCAG compliance checking
+- test_recorder.py - Test documentation
+- app_state_capture.py - State snapshots
+- gesture.py - Touch gesture operations
+"""
+
+import json
+import subprocess
+import sys
+
+
+def get_accessibility_tree(udid: str | None = None, nested: bool = True) -> dict:
+    """
+    Fetch accessibility tree from IDB.
+
+    The accessibility tree represents the complete UI hierarchy of the current
+    screen, with all element properties needed for semantic navigation.
+
+    Args:
+        udid: Device UDID (uses booted simulator if None)
+        nested: Include nested structure (default True). If False, returns flat array.
+
+    Returns:
+        Root element of accessibility tree as dict.
+        Structure: {
+            "type": "Window",
+            "AXLabel": "App Name",
+            "frame": {"x": 0, "y": 0, "width": 390, "height": 844},
+            "children": [...]
+        }
+
+    Raises:
+        SystemExit: If IDB command fails or returns invalid JSON
+
+    Example:
+        tree = get_accessibility_tree("UDID123")
+        # Root is Window element with all children nested
+    """
+    cmd = ["idb", "ui", "describe-all", "--json"]
+    if nested:
+        cmd.append("--nested")
+    if udid:
+        cmd.extend(["--udid", udid])
+
+    try:
+        result = subprocess.run(cmd, capture_output=True, text=True, check=True)
+        tree_data = json.loads(result.stdout)
+
+        # IDB returns array format, extract first element (root)
+        if isinstance(tree_data, list) and len(tree_data) > 0:
+            return tree_data[0]
+        return tree_data
+    except subprocess.CalledProcessError as e:
+        print(f"Error: Failed to get accessibility tree: {e.stderr}", file=sys.stderr)
+        sys.exit(1)
+    except json.JSONDecodeError:
+        print("Error: Invalid JSON from idb", file=sys.stderr)
+        sys.exit(1)
+
+
+def flatten_tree(node: dict, depth: int = 0, elements: list[dict] | None = None) -> list[dict]:
+    """
+    Flatten nested accessibility tree into list of elements.
+
+    Converts the hierarchical accessibility tree into a flat list where each
+    element includes its depth for context.
+
+    Used by:
+    - navigator.py - Element finding
+    - screen_mapper.py - Element analysis
+    - accessibility_audit.py - Audit scanning
+
+    Args:
+        node: Root node of tree (typically from get_accessibility_tree)
+        depth: Current depth (used internally, start at 0)
+        elements: Accumulator list (used internally, start as None)
+
+    Returns:
+        Flat list of elements, each with "depth" key indicating nesting level.
+        Structure of each element: {
+            "type": "Button",
+            "AXLabel": "Login",
+            "frame": {...},
+            "depth": 2,
+            ...
+        }
+
+    Example:
+        tree = get_accessibility_tree()
+        flat = flatten_tree(tree)
+        for elem in flat:
+            print(f"{'  ' * elem['depth']}{elem.get('type')}: {elem.get('AXLabel')}")
+    """
+    if elements is None:
+        elements = []
+
+    # Add current node with depth tracking
+    node_copy = node.copy()
+    node_copy["depth"] = depth
+    elements.append(node_copy)
+
+    # Process children recursively
+    for child in node.get("children", []):
+        flatten_tree(child, depth + 1, elements)
+
+    return elements
+
+
+def count_elements(node: dict) -> int:
+    """
+    Count total elements in tree (recursive).
+
+    Traverses entire tree counting all elements for reporting purposes.
+
+    Used by:
+    - test_recorder.py - Element counting per step
+    - screen_mapper.py - Summary statistics
+
+    Args:
+        node: Root node of tree
+
+    Returns:
+        Total element count including root and all descendants
+
+    Example:
+        tree = get_accessibility_tree()
+        total = count_elements(tree)
+        print(f"Screen has {total} elements")
+    """
+    count = 1
+    for child in node.get("children", []):
+        count += count_elements(child)
+    return count
+
+
+def get_screen_size(udid: str | None = None) -> tuple[int, int]:
+    """
+    Get screen dimensions from accessibility tree.
+
+    Extracts the screen size from the root element's frame. Useful for
+    gesture calculations and coordinate normalization.
+
+    Used by:
+    - gesture.py - Gesture positioning
+    - Potentially: screenshot positioning, screen-aware scaling
+
+    Args:
+        udid: Device UDID (uses booted if None)
+
+    Returns:
+        (width, height) tuple. Defaults to (390, 844) if detection fails
+        or tree cannot be accessed.
+
+    Example:
+        width, height = get_screen_size()
+        center_x = width // 2
+        center_y = height // 2
+    """
+    DEFAULT_WIDTH = 390  # iPhone 14
+    DEFAULT_HEIGHT = 844
+
+    try:
+        tree = get_accessibility_tree(udid, nested=False)
+        frame = tree.get("frame", {})
+        width = int(frame.get("width", DEFAULT_WIDTH))
+        height = int(frame.get("height", DEFAULT_HEIGHT))
+        return (width, height)
+    except Exception:
+        # Silently fall back to defaults if tree access fails
+        return (DEFAULT_WIDTH, DEFAULT_HEIGHT)
--- a/.agents/skills/ios-simulator-skill/scripts/common/screenshot_utils.py
+++ b/.agents/skills/ios-simulator-skill/scripts/common/screenshot_utils.py
@@ -0,0 +1,338 @@
+#!/usr/bin/env python3
+"""
+Screenshot utilities with dual-mode support.
+
+Provides unified screenshot handling with:
+- File-based mode: Persistent artifacts for test documentation
+- Inline base64 mode: Vision-based automation for agent analysis
+- Size presets: Token optimization (full/half/quarter/thumb)
+- Semantic naming: {appName}_{screenName}_{state}_{timestamp}.png
+
+Supports resize operations via PIL (optional dependency).
+
+Used by:
+- test_recorder.py - Step-based screenshot recording
+- app_state_capture.py - State snapshot captures
+"""
+
+import base64
+import os
+import subprocess
+import sys
+from datetime import datetime
+from pathlib import Path
+from typing import Any
+
+# Try to import PIL for resizing, but make it optional
+try:
+    from PIL import Image
+
+    HAS_PIL = True
+except ImportError:
+    HAS_PIL = False
+
+
+def generate_screenshot_name(
+    app_name: str | None = None,
+    screen_name: str | None = None,
+    state: str | None = None,
+    timestamp: str | None = None,
+    extension: str = "png",
+) -> str:
+    """Generate semantic screenshot filename.
+
+    Format: {appName}_{screenName}_{state}_{timestamp}.{ext}
+    Falls back to: screenshot_{timestamp}.{ext}
+
+    Args:
+        app_name: Application name (e.g., 'MyApp')
+        screen_name: Screen name (e.g., 'Login')
+        state: State description (e.g., 'Empty', 'Filled', 'Error')
+        timestamp: ISO timestamp (uses current time if None)
+        extension: File extension (default: 'png')
+
+    Returns:
+        Semantic filename ready for safe file creation
+
+    Example:
+        name = generate_screenshot_name('MyApp', 'Login', 'Empty')
+        # Returns: 'MyApp_Login_Empty_20251028-143052.png'
+
+        name = generate_screenshot_name()
+        # Returns: 'screenshot_20251028-143052.png'
+    """
+    if timestamp is None:
+        timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
+
+    # Build semantic name
+    if app_name or screen_name or state:
+        parts = [app_name, screen_name, state]
+        parts = [p for p in parts if p]  # Filter None/empty
+        name = "_".join(parts) + f"_{timestamp}"
+    else:
+        name = f"screenshot_{timestamp}"
+
+    return f"{name}.{extension}"
+
+
+def get_size_preset(size: str = "half") -> tuple[float, float]:
+    """Get scale factors for size preset.
+
+    Args:
+        size: 'full', 'half', 'quarter', 'thumb'
+
+    Returns:
+        Tuple of (scale_x, scale_y) for resizing
+
+    Example:
+        scale_x, scale_y = get_size_preset('half')
+        # Returns: (0.5, 0.5)
+    """
+    presets = {
+        "full": (1.0, 1.0),
+        "half": (0.5, 0.5),
+        "quarter": (0.25, 0.25),
+        "thumb": (0.1, 0.1),
+    }
+    return presets.get(size, (0.5, 0.5))
+
+
+def resize_screenshot(
+    input_path: str,
+    output_path: str | None = None,
+    size: str = "half",
+    quality: int = 85,
+) -> tuple[str, int, int]:
+    """Resize screenshot for token optimization.
+
+    Requires PIL (Pillow). Falls back gracefully without it.
+
+    Args:
+        input_path: Path to original screenshot
+        output_path: Output path (uses input_path if None)
+        size: 'full', 'half', 'quarter', 'thumb'
+        quality: JPEG quality (1-100, default: 85)
+
+    Returns:
+        Tuple of (output_path, width, height) of resized image
+
+    Raises:
+        FileNotFoundError: If input file doesn't exist
+        ValueError: If PIL not installed and size != 'full'
+
+    Example:
+        output, w, h = resize_screenshot(
+            'screenshot.png',
+            'screenshot_half.png',
+            'half'
+        )
+        print(f"Resized to {w}x{h}")
+    """
+    input_file = Path(input_path)
+    if not input_file.exists():
+        raise FileNotFoundError(f"Screenshot not found: {input_path}")
+
+    # If full size, just copy
+    if size == "full":
+        if output_path:
+            import shutil
+
+            shutil.copy(input_path, output_path)
+            output_file = Path(output_path)
+        else:
+            output_file = input_file
+
+        # Get original dimensions
+        if HAS_PIL:
+            img = Image.open(str(output_file))
+            return (str(output_file), img.width, img.height)
+        return (str(output_file), 0, 0)  # Dimensions unknown without PIL
+
+    # Need PIL to resize
+    if not HAS_PIL:
+        raise ValueError(
+            f"Size preset '{size}' requires PIL (Pillow). " "Install with: pip3 install pillow"
+        )
+
+    # Open original image
+    img = Image.open(str(input_file))
+    orig_w, orig_h = img.size
+
+    # Calculate new size
+    scale_x, scale_y = get_size_preset(size)
+    new_w = int(orig_w * scale_x)
+    new_h = int(orig_h * scale_y)
+
+    # Resize with high-quality resampling
+    resized = img.resize((new_w, new_h), Image.Resampling.LANCZOS)
+
+    # Determine output path
+    if output_path is None:
+        # Insert size marker before extension
+        stem = input_file.stem
+        suffix = input_file.suffix
+        output_path = str(input_file.parent / f"{stem}_{size}{suffix}")
+
+    # Save resized image
+    resized.save(output_path, quality=quality, optimize=True)
+
+    return (output_path, new_w, new_h)
+
+
+def capture_screenshot(
+    udid: str,
+    output_path: str | None = None,
+    size: str = "half",
+    inline: bool = False,
+    app_name: str | None = None,
+    screen_name: str | None = None,
+    state: str | None = None,
+) -> dict[str, Any]:
+    """Capture screenshot with flexible output modes.
+
+    Supports both file-based (persistent artifacts) and inline base64 modes
+    (for vision-based automation).
+
+    Args:
+        udid: Device UDID
+        output_path: File path for file mode (generates semantic name if None)
+        size: 'full', 'half', 'quarter', 'thumb' (default: 'half')
+        inline: If True, returns base64 data instead of saving to file
+        app_name: App name for semantic naming
+        screen_name: Screen name for semantic naming
+        state: State description for semantic naming
+
+    Returns:
+        Dict with mode-specific fields:
+
+        File mode:
+        {
+            'mode': 'file',
+            'file_path': str,
+            'size_bytes': int,
+            'width': int,
+            'height': int,
+            'size_preset': str
+        }
+
+        Inline mode:
+        {
+            'mode': 'inline',
+            'base64_data': str,
+            'mime_type': 'image/png',
+            'width': int,
+            'height': int,
+            'size_preset': str
+        }
+
+    Example:
+        # File mode
+        result = capture_screenshot('ABC123', app_name='MyApp')
+        print(f"Saved to: {result['file_path']}")
+
+        # Inline mode
+        result = capture_screenshot('ABC123', inline=True, size='half')
+        print(f"Screenshot: {result['width']}x{result['height']}")
+        print(f"Base64: {result['base64_data'][:50]}...")
+    """
+    try:
+        # Capture raw screenshot to temp file
+        temp_path = "/tmp/ios_simulator_screenshot.png"
+        cmd = ["xcrun", "simctl", "io", udid, "screenshot", temp_path]
+
+        subprocess.run(cmd, capture_output=True, text=True, check=True)
+
+        if inline:
+            # Inline mode: resize and convert to base64
+            # Resize if needed
+            if size != "full" and HAS_PIL:
+                resized_path, width, height = resize_screenshot(temp_path, size=size)
+            else:
+                resized_path = temp_path
+                # Get dimensions via PIL if available
+                if HAS_PIL:
+                    img = Image.open(resized_path)
+                    width, height = img.size
+                else:
+                    width, height = 390, 844  # Fallback to common device size
+
+            # Read and encode as base64
+            with open(resized_path, "rb") as f:
+                base64_data = base64.b64encode(f.read()).decode("utf-8")
+
+            # Clean up temp files
+            Path(temp_path).unlink(missing_ok=True)
+            if resized_path != temp_path:
+                Path(resized_path).unlink(missing_ok=True)
+
+            return {
+                "mode": "inline",
+                "base64_data": base64_data,
+                "mime_type": "image/png",
+                "width": width,
+                "height": height,
+                "size_preset": size,
+            }
+
+        # File mode: save to output path with semantic naming
+        if output_path is None:
+            output_path = generate_screenshot_name(app_name, screen_name, state)
+
+        # Resize if needed
+        if size != "full" and HAS_PIL:
+            final_path, width, height = resize_screenshot(temp_path, output_path, size)
+        else:
+            # Just move temp to output
+            import shutil
+
+            shutil.move(temp_path, output_path)
+            final_path = output_path
+
+            # Get dimensions via PIL if available
+            if HAS_PIL:
+                img = Image.open(final_path)
+                width, height = img.size
+            else:
+                width, height = 390, 844  # Fallback
+
+        # Get file size
+        size_bytes = Path(final_path).stat().st_size
+
+        return {
+            "mode": "file",
+            "file_path": final_path,
+            "size_bytes": size_bytes,
+            "width": width,
+            "height": height,
+            "size_preset": size,
+        }
+
+    except subprocess.CalledProcessError as e:
+        raise RuntimeError(f"Failed to capture screenshot: {e.stderr}") from e
+    except Exception as e:
+        raise RuntimeError(f"Screenshot capture error: {e!s}") from e
+
+
+def format_screenshot_result(result: dict[str, Any]) -> str:
+    """Format screenshot result for human-readable output.
+
+    Args:
+        result: Result dictionary from capture_screenshot()
+
+    Returns:
+        Formatted string for printing
+
+    Example:
+        result = capture_screenshot('ABC123', inline=True)
+        print(format_screenshot_result(result))
+    """
+    if result["mode"] == "file":
+        return (
+            f"Screenshot: {result['file_path']}\n"
+            f"Dimensions: {result['width']}x{result['height']}\n"
+            f"Size: {result['size_bytes']} bytes"
+        )
+    return (
+        f"Screenshot (inline): {result['width']}x{result['height']}\n"
+        f"Base64 length: {len(result['base64_data'])} chars"
+    )