dotfiles/.agents/skills/ios-simulator-skill/scripts/screen_mapper.py

#!/usr/bin/env python3
"""
iOS Screen Mapper - Current Screen Analyzer

Maps the current screen's UI elements for navigation decisions.
Provides token-efficient summaries of available interactions.

This script analyzes the iOS simulator screen using IDB's accessibility tree
and provides a compact, actionable summary of what's currently visible and
interactive on the screen. Perfect for AI agents making navigation decisions.

Key Features:
- Token-efficient output (5-7 lines by default)
- Identifies buttons, text fields, navigation elements
- Counts interactive and focusable elements
- Progressive detail with --verbose flag
- Navigation hints with --hints flag

Usage Examples:
    # Quick summary (default)
    python scripts/screen_mapper.py --udid <device-id>

    # Detailed element breakdown
    python scripts/screen_mapper.py --udid <device-id> --verbose

    # Include navigation suggestions
    python scripts/screen_mapper.py --udid <device-id> --hints

    # Full JSON output for parsing
    python scripts/screen_mapper.py --udid <device-id> --json

Output Format (default):
    Screen: LoginViewController (45 elements, 7 interactive)
    Buttons: "Login", "Cancel", "Forgot Password"
    TextFields: 2 (0 filled)
    Navigation: NavBar: "Sign In"
    Focusable: 7 elements

Technical Details:
- Uses IDB's accessibility tree via `idb ui describe-all --json --nested`
- Parses IDB's array format: [{ root element with children }]
- Identifies element types: Button, TextField, NavigationBar, TabBar, etc.
- Extracts labels from AXLabel, AXValue, and AXUniqueId fields
"""

import argparse
import json
import subprocess
import sys
from collections import defaultdict

from common import get_accessibility_tree, resolve_udid


class ScreenMapper:
    """
    Analyzes current screen for navigation decisions.

    This class fetches the iOS accessibility tree from IDB and analyzes it
    to provide actionable summaries for navigation. It categorizes elements
    by type, counts interactive elements, and identifies key UI patterns.

    Attributes:
        udid (Optional[str]): Device UDID to target, or None for booted device
        INTERACTIVE_TYPES (Set[str]): Element types that users can interact with

    Design Philosophy:
        - Token efficiency: Provide minimal but complete information
        - Progressive disclosure: Summary by default, details on request
        - Navigation-focused: Highlight elements relevant for automation
    """

    # Element types we care about for navigation
    # These are the accessibility element types that indicate user interaction points
    INTERACTIVE_TYPES = {
        "Button",
        "Link",
        "TextField",
        "SecureTextField",
        "Cell",
        "Switch",
        "Slider",
        "Stepper",
        "SegmentedControl",
        "TabBar",
        "NavigationBar",
        "Toolbar",
    }

    def __init__(self, udid: str | None = None):
        """
        Initialize screen mapper.

        Args:
            udid: Optional device UDID. If None, uses booted simulator.

        Example:
            mapper = ScreenMapper(udid="656DC652-1C9F-4AB2-AD4F-F38E65976BDA")
            mapper = ScreenMapper()  # Uses booted device
        """
        self.udid = udid

    def get_accessibility_tree(self) -> dict:
        """
        Fetch accessibility tree from iOS simulator via IDB.

        Delegates to shared utility for consistent tree fetching across all scripts.
        """
        return get_accessibility_tree(self.udid, nested=True)

    def analyze_tree(self, node: dict, depth: int = 0) -> dict:
        """Analyze accessibility tree for navigation info."""
        analysis = {
            "elements_by_type": defaultdict(list),
            "total_elements": 0,
            "interactive_elements": 0,
            "text_fields": [],
            "buttons": [],
            "navigation": {},
            "screen_name": None,
            "focusable": 0,
        }

        self._analyze_recursive(node, analysis, depth)

        # Post-process for clean output
        analysis["elements_by_type"] = dict(analysis["elements_by_type"])

        return analysis

    def _analyze_recursive(self, node: dict, analysis: dict, depth: int):
        """Recursively analyze tree nodes."""
        elem_type = node.get("type")
        label = node.get("AXLabel", "")
        value = node.get("AXValue", "")
        identifier = node.get("AXUniqueId", "")

        # Count element
        if elem_type:
            analysis["total_elements"] += 1

            # Track by type
            if elem_type in self.INTERACTIVE_TYPES:
                analysis["interactive_elements"] += 1

                # Store concise info (label only, not full node)
                elem_info = label or value or identifier or "Unnamed"
                analysis["elements_by_type"][elem_type].append(elem_info)

                # Special handling for common types
                if elem_type == "Button":
                    analysis["buttons"].append(elem_info)
                elif elem_type in ("TextField", "SecureTextField"):
                    analysis["text_fields"].append(
                        {"type": elem_type, "label": elem_info, "has_value": bool(value)}
                    )
                elif elem_type == "NavigationBar":
                    analysis["navigation"]["nav_title"] = label or "Navigation"
                elif elem_type == "TabBar":
                    # Count tab items
                    tab_count = len(node.get("children", []))
                    analysis["navigation"]["tab_count"] = tab_count

            # Track focusable elements
            if node.get("enabled", False) and elem_type in self.INTERACTIVE_TYPES:
                analysis["focusable"] += 1

        # Try to identify screen name from view controller
        if not analysis["screen_name"] and identifier:
            if "ViewController" in identifier or "Screen" in identifier:
                analysis["screen_name"] = identifier

        # Process children
        for child in node.get("children", []):
            self._analyze_recursive(child, analysis, depth + 1)

    def format_summary(self, analysis: dict, verbose: bool = False) -> str:
        """Format analysis as token-efficient summary."""
        lines = []

        # Screen identification (1 line)
        screen = analysis["screen_name"] or "Unknown Screen"
        total = analysis["total_elements"]
        interactive = analysis["interactive_elements"]
        lines.append(f"Screen: {screen} ({total} elements, {interactive} interactive)")

        # Buttons summary (1 line)
        if analysis["buttons"]:
            button_list = ", ".join(f'"{b}"' for b in analysis["buttons"][:5])
            if len(analysis["buttons"]) > 5:
                button_list += f" +{len(analysis['buttons']) - 5} more"
            lines.append(f"Buttons: {button_list}")

        # Text fields summary (1 line)
        if analysis["text_fields"]:
            field_count = len(analysis["text_fields"])
            [f["type"] for f in analysis["text_fields"]]
            filled = sum(1 for f in analysis["text_fields"] if f["has_value"])
            lines.append(f"TextFields: {field_count} ({filled} filled)")

        # Navigation summary (1 line)
        nav_parts = []
        if "nav_title" in analysis["navigation"]:
            nav_parts.append(f"NavBar: \"{analysis['navigation']['nav_title']}\"")
        if "tab_count" in analysis["navigation"]:
            nav_parts.append(f"TabBar: {analysis['navigation']['tab_count']} tabs")
        if nav_parts:
            lines.append(f"Navigation: {', '.join(nav_parts)}")

        # Focusable count (1 line)
        lines.append(f"Focusable: {analysis['focusable']} elements")

        # Verbose mode adds element type breakdown
        if verbose:
            lines.append("\nElements by type:")
            for elem_type, items in analysis["elements_by_type"].items():
                if items:  # Only show types that exist
                    lines.append(f"  {elem_type}: {len(items)}")
                    for item in items[:3]:  # Show first 3
                        lines.append(f"    - {item}")
                    if len(items) > 3:
                        lines.append(f"    ... +{len(items) - 3} more")

        return "\n".join(lines)

    def get_navigation_hints(self, analysis: dict) -> list[str]:
        """Generate navigation hints based on screen analysis."""
        hints = []

        # Check for common patterns
        if "Login" in str(analysis.get("buttons", [])):
            hints.append("Login screen detected - find TextFields for credentials")

        if analysis["text_fields"]:
            unfilled = [f for f in analysis["text_fields"] if not f["has_value"]]
            if unfilled:
                hints.append(f"{len(unfilled)} empty text field(s) - may need input")

        if not analysis["buttons"] and not analysis["text_fields"]:
            hints.append("No interactive elements - try swiping or going back")

        if "tab_count" in analysis.get("navigation", {}):
            hints.append(f"Tab bar available with {analysis['navigation']['tab_count']} tabs")

        return hints


def main():
    """Main entry point."""
    parser = argparse.ArgumentParser(description="Map current screen UI elements")
    parser.add_argument("--verbose", action="store_true", help="Show detailed element breakdown")
    parser.add_argument("--json", action="store_true", help="Output raw JSON analysis")
    parser.add_argument("--hints", action="store_true", help="Include navigation hints")
    parser.add_argument(
        "--udid",
        help="Device UDID (auto-detects booted simulator if not provided)",
    )

    args = parser.parse_args()

    # Resolve UDID with auto-detection
    try:
        udid = resolve_udid(args.udid)
    except RuntimeError as e:
        print(f"Error: {e}")
        sys.exit(1)

    # Create mapper and analyze
    mapper = ScreenMapper(udid=udid)
    tree = mapper.get_accessibility_tree()
    analysis = mapper.analyze_tree(tree)

    # Output based on format
    if args.json:
        # Full JSON (verbose)
        print(json.dumps(analysis, indent=2, default=str))
    else:
        # Token-efficient summary (default)
        summary = mapper.format_summary(analysis, verbose=args.verbose)
        print(summary)

        # Add hints if requested
        if args.hints:
            hints = mapper.get_navigation_hints(analysis)
            if hints:
                print("\nHints:")
                for hint in hints:
                    print(f"  - {hint}")


if __name__ == "__main__":
    main()