mirror of
https://github.com/ksyasuda/dotfiles.git
synced 2026-02-28 00:22:41 -08:00
update
This commit is contained in:
292
.agents/skills/ios-simulator-skill/scripts/screen_mapper.py
Executable file
292
.agents/skills/ios-simulator-skill/scripts/screen_mapper.py
Executable file
@@ -0,0 +1,292 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
iOS Screen Mapper - Current Screen Analyzer
|
||||
|
||||
Maps the current screen's UI elements for navigation decisions.
|
||||
Provides token-efficient summaries of available interactions.
|
||||
|
||||
This script analyzes the iOS simulator screen using IDB's accessibility tree
|
||||
and provides a compact, actionable summary of what's currently visible and
|
||||
interactive on the screen. Perfect for AI agents making navigation decisions.
|
||||
|
||||
Key Features:
|
||||
- Token-efficient output (5-7 lines by default)
|
||||
- Identifies buttons, text fields, navigation elements
|
||||
- Counts interactive and focusable elements
|
||||
- Progressive detail with --verbose flag
|
||||
- Navigation hints with --hints flag
|
||||
|
||||
Usage Examples:
|
||||
# Quick summary (default)
|
||||
python scripts/screen_mapper.py --udid <device-id>
|
||||
|
||||
# Detailed element breakdown
|
||||
python scripts/screen_mapper.py --udid <device-id> --verbose
|
||||
|
||||
# Include navigation suggestions
|
||||
python scripts/screen_mapper.py --udid <device-id> --hints
|
||||
|
||||
# Full JSON output for parsing
|
||||
python scripts/screen_mapper.py --udid <device-id> --json
|
||||
|
||||
Output Format (default):
|
||||
Screen: LoginViewController (45 elements, 7 interactive)
|
||||
Buttons: "Login", "Cancel", "Forgot Password"
|
||||
TextFields: 2 (0 filled)
|
||||
Navigation: NavBar: "Sign In"
|
||||
Focusable: 7 elements
|
||||
|
||||
Technical Details:
|
||||
- Uses IDB's accessibility tree via `idb ui describe-all --json --nested`
|
||||
- Parses IDB's array format: [{ root element with children }]
|
||||
- Identifies element types: Button, TextField, NavigationBar, TabBar, etc.
|
||||
- Extracts labels from AXLabel, AXValue, and AXUniqueId fields
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import subprocess
|
||||
import sys
|
||||
from collections import defaultdict
|
||||
|
||||
from common import get_accessibility_tree, resolve_udid
|
||||
|
||||
|
||||
class ScreenMapper:
|
||||
"""
|
||||
Analyzes current screen for navigation decisions.
|
||||
|
||||
This class fetches the iOS accessibility tree from IDB and analyzes it
|
||||
to provide actionable summaries for navigation. It categorizes elements
|
||||
by type, counts interactive elements, and identifies key UI patterns.
|
||||
|
||||
Attributes:
|
||||
udid (Optional[str]): Device UDID to target, or None for booted device
|
||||
INTERACTIVE_TYPES (Set[str]): Element types that users can interact with
|
||||
|
||||
Design Philosophy:
|
||||
- Token efficiency: Provide minimal but complete information
|
||||
- Progressive disclosure: Summary by default, details on request
|
||||
- Navigation-focused: Highlight elements relevant for automation
|
||||
"""
|
||||
|
||||
# Element types we care about for navigation
|
||||
# These are the accessibility element types that indicate user interaction points
|
||||
INTERACTIVE_TYPES = {
|
||||
"Button",
|
||||
"Link",
|
||||
"TextField",
|
||||
"SecureTextField",
|
||||
"Cell",
|
||||
"Switch",
|
||||
"Slider",
|
||||
"Stepper",
|
||||
"SegmentedControl",
|
||||
"TabBar",
|
||||
"NavigationBar",
|
||||
"Toolbar",
|
||||
}
|
||||
|
||||
def __init__(self, udid: str | None = None):
|
||||
"""
|
||||
Initialize screen mapper.
|
||||
|
||||
Args:
|
||||
udid: Optional device UDID. If None, uses booted simulator.
|
||||
|
||||
Example:
|
||||
mapper = ScreenMapper(udid="656DC652-1C9F-4AB2-AD4F-F38E65976BDA")
|
||||
mapper = ScreenMapper() # Uses booted device
|
||||
"""
|
||||
self.udid = udid
|
||||
|
||||
def get_accessibility_tree(self) -> dict:
|
||||
"""
|
||||
Fetch accessibility tree from iOS simulator via IDB.
|
||||
|
||||
Delegates to shared utility for consistent tree fetching across all scripts.
|
||||
"""
|
||||
return get_accessibility_tree(self.udid, nested=True)
|
||||
|
||||
def analyze_tree(self, node: dict, depth: int = 0) -> dict:
|
||||
"""Analyze accessibility tree for navigation info."""
|
||||
analysis = {
|
||||
"elements_by_type": defaultdict(list),
|
||||
"total_elements": 0,
|
||||
"interactive_elements": 0,
|
||||
"text_fields": [],
|
||||
"buttons": [],
|
||||
"navigation": {},
|
||||
"screen_name": None,
|
||||
"focusable": 0,
|
||||
}
|
||||
|
||||
self._analyze_recursive(node, analysis, depth)
|
||||
|
||||
# Post-process for clean output
|
||||
analysis["elements_by_type"] = dict(analysis["elements_by_type"])
|
||||
|
||||
return analysis
|
||||
|
||||
def _analyze_recursive(self, node: dict, analysis: dict, depth: int):
|
||||
"""Recursively analyze tree nodes."""
|
||||
elem_type = node.get("type")
|
||||
label = node.get("AXLabel", "")
|
||||
value = node.get("AXValue", "")
|
||||
identifier = node.get("AXUniqueId", "")
|
||||
|
||||
# Count element
|
||||
if elem_type:
|
||||
analysis["total_elements"] += 1
|
||||
|
||||
# Track by type
|
||||
if elem_type in self.INTERACTIVE_TYPES:
|
||||
analysis["interactive_elements"] += 1
|
||||
|
||||
# Store concise info (label only, not full node)
|
||||
elem_info = label or value or identifier or "Unnamed"
|
||||
analysis["elements_by_type"][elem_type].append(elem_info)
|
||||
|
||||
# Special handling for common types
|
||||
if elem_type == "Button":
|
||||
analysis["buttons"].append(elem_info)
|
||||
elif elem_type in ("TextField", "SecureTextField"):
|
||||
analysis["text_fields"].append(
|
||||
{"type": elem_type, "label": elem_info, "has_value": bool(value)}
|
||||
)
|
||||
elif elem_type == "NavigationBar":
|
||||
analysis["navigation"]["nav_title"] = label or "Navigation"
|
||||
elif elem_type == "TabBar":
|
||||
# Count tab items
|
||||
tab_count = len(node.get("children", []))
|
||||
analysis["navigation"]["tab_count"] = tab_count
|
||||
|
||||
# Track focusable elements
|
||||
if node.get("enabled", False) and elem_type in self.INTERACTIVE_TYPES:
|
||||
analysis["focusable"] += 1
|
||||
|
||||
# Try to identify screen name from view controller
|
||||
if not analysis["screen_name"] and identifier:
|
||||
if "ViewController" in identifier or "Screen" in identifier:
|
||||
analysis["screen_name"] = identifier
|
||||
|
||||
# Process children
|
||||
for child in node.get("children", []):
|
||||
self._analyze_recursive(child, analysis, depth + 1)
|
||||
|
||||
def format_summary(self, analysis: dict, verbose: bool = False) -> str:
|
||||
"""Format analysis as token-efficient summary."""
|
||||
lines = []
|
||||
|
||||
# Screen identification (1 line)
|
||||
screen = analysis["screen_name"] or "Unknown Screen"
|
||||
total = analysis["total_elements"]
|
||||
interactive = analysis["interactive_elements"]
|
||||
lines.append(f"Screen: {screen} ({total} elements, {interactive} interactive)")
|
||||
|
||||
# Buttons summary (1 line)
|
||||
if analysis["buttons"]:
|
||||
button_list = ", ".join(f'"{b}"' for b in analysis["buttons"][:5])
|
||||
if len(analysis["buttons"]) > 5:
|
||||
button_list += f" +{len(analysis['buttons']) - 5} more"
|
||||
lines.append(f"Buttons: {button_list}")
|
||||
|
||||
# Text fields summary (1 line)
|
||||
if analysis["text_fields"]:
|
||||
field_count = len(analysis["text_fields"])
|
||||
[f["type"] for f in analysis["text_fields"]]
|
||||
filled = sum(1 for f in analysis["text_fields"] if f["has_value"])
|
||||
lines.append(f"TextFields: {field_count} ({filled} filled)")
|
||||
|
||||
# Navigation summary (1 line)
|
||||
nav_parts = []
|
||||
if "nav_title" in analysis["navigation"]:
|
||||
nav_parts.append(f"NavBar: \"{analysis['navigation']['nav_title']}\"")
|
||||
if "tab_count" in analysis["navigation"]:
|
||||
nav_parts.append(f"TabBar: {analysis['navigation']['tab_count']} tabs")
|
||||
if nav_parts:
|
||||
lines.append(f"Navigation: {', '.join(nav_parts)}")
|
||||
|
||||
# Focusable count (1 line)
|
||||
lines.append(f"Focusable: {analysis['focusable']} elements")
|
||||
|
||||
# Verbose mode adds element type breakdown
|
||||
if verbose:
|
||||
lines.append("\nElements by type:")
|
||||
for elem_type, items in analysis["elements_by_type"].items():
|
||||
if items: # Only show types that exist
|
||||
lines.append(f" {elem_type}: {len(items)}")
|
||||
for item in items[:3]: # Show first 3
|
||||
lines.append(f" - {item}")
|
||||
if len(items) > 3:
|
||||
lines.append(f" ... +{len(items) - 3} more")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
def get_navigation_hints(self, analysis: dict) -> list[str]:
|
||||
"""Generate navigation hints based on screen analysis."""
|
||||
hints = []
|
||||
|
||||
# Check for common patterns
|
||||
if "Login" in str(analysis.get("buttons", [])):
|
||||
hints.append("Login screen detected - find TextFields for credentials")
|
||||
|
||||
if analysis["text_fields"]:
|
||||
unfilled = [f for f in analysis["text_fields"] if not f["has_value"]]
|
||||
if unfilled:
|
||||
hints.append(f"{len(unfilled)} empty text field(s) - may need input")
|
||||
|
||||
if not analysis["buttons"] and not analysis["text_fields"]:
|
||||
hints.append("No interactive elements - try swiping or going back")
|
||||
|
||||
if "tab_count" in analysis.get("navigation", {}):
|
||||
hints.append(f"Tab bar available with {analysis['navigation']['tab_count']} tabs")
|
||||
|
||||
return hints
|
||||
|
||||
|
||||
def main():
|
||||
"""Main entry point."""
|
||||
parser = argparse.ArgumentParser(description="Map current screen UI elements")
|
||||
parser.add_argument("--verbose", action="store_true", help="Show detailed element breakdown")
|
||||
parser.add_argument("--json", action="store_true", help="Output raw JSON analysis")
|
||||
parser.add_argument("--hints", action="store_true", help="Include navigation hints")
|
||||
parser.add_argument(
|
||||
"--udid",
|
||||
help="Device UDID (auto-detects booted simulator if not provided)",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Resolve UDID with auto-detection
|
||||
try:
|
||||
udid = resolve_udid(args.udid)
|
||||
except RuntimeError as e:
|
||||
print(f"Error: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
# Create mapper and analyze
|
||||
mapper = ScreenMapper(udid=udid)
|
||||
tree = mapper.get_accessibility_tree()
|
||||
analysis = mapper.analyze_tree(tree)
|
||||
|
||||
# Output based on format
|
||||
if args.json:
|
||||
# Full JSON (verbose)
|
||||
print(json.dumps(analysis, indent=2, default=str))
|
||||
else:
|
||||
# Token-efficient summary (default)
|
||||
summary = mapper.format_summary(analysis, verbose=args.verbose)
|
||||
print(summary)
|
||||
|
||||
# Add hints if requested
|
||||
if args.hints:
|
||||
hints = mapper.get_navigation_hints(analysis)
|
||||
if hints:
|
||||
print("\nHints:")
|
||||
for hint in hints:
|
||||
print(f" - {hint}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user