This commit is contained in:
2026-02-19 00:33:08 -08:00
parent e37f3dd7b1
commit 70dd0779f2
143 changed files with 31888 additions and 0 deletions

View File

@@ -0,0 +1,59 @@
"""
Common utilities shared across iOS simulator scripts.
This module centralizes genuinely reused code patterns to eliminate duplication
while respecting Jackson's Law - no over-abstraction, only truly shared logic.
Organization:
- device_utils: Device detection, command building, coordinate transformation
- idb_utils: IDB-specific operations (accessibility tree, element manipulation)
- cache_utils: Progressive disclosure caching for large outputs
- screenshot_utils: Screenshot capture with file and inline modes
"""
from .cache_utils import ProgressiveCache, get_cache
from .device_utils import (
build_idb_command,
build_simctl_command,
get_booted_device_udid,
get_device_screen_size,
resolve_udid,
transform_screenshot_coords,
)
from .idb_utils import (
count_elements,
flatten_tree,
get_accessibility_tree,
get_screen_size,
)
from .screenshot_utils import (
capture_screenshot,
format_screenshot_result,
generate_screenshot_name,
get_size_preset,
resize_screenshot,
)
__all__ = [
# cache_utils
"ProgressiveCache",
# device_utils
"build_idb_command",
"build_simctl_command",
# screenshot_utils
"capture_screenshot",
# idb_utils
"count_elements",
"flatten_tree",
"format_screenshot_result",
"generate_screenshot_name",
"get_accessibility_tree",
"get_booted_device_udid",
"get_cache",
"get_device_screen_size",
"get_screen_size",
"get_size_preset",
"resize_screenshot",
"resolve_udid",
"transform_screenshot_coords",
]

View File

@@ -0,0 +1,260 @@
#!/usr/bin/env python3
"""
Progressive disclosure cache for large outputs.
Implements cache system to support progressive disclosure pattern:
- Return concise summary with cache_id for large outputs
- User retrieves full details on demand via cache_id
- Reduces token usage by 96% for common queries
Cache directory: ~/.ios-simulator-skill/cache/
Cache expiration: Configurable per cache type (default 1 hour)
Used by:
- sim_list.py - Simulator listing progressive disclosure
- Future: build logs, UI trees, etc.
"""
import json
import time
from datetime import datetime, timedelta
from pathlib import Path
from typing import Any
class ProgressiveCache:
"""Cache for progressive disclosure pattern.
Stores large outputs with timestamped IDs for on-demand retrieval.
Automatically cleans up expired entries.
"""
def __init__(self, cache_dir: str | None = None, max_age_hours: int = 1):
"""Initialize cache system.
Args:
cache_dir: Cache directory path (default: ~/.ios-simulator-skill/cache/)
max_age_hours: Max age for cache entries before expiration (default: 1 hour)
"""
if cache_dir is None:
cache_dir = str(Path("~/.ios-simulator-skill/cache").expanduser())
self.cache_dir = Path(cache_dir)
self.max_age_hours = max_age_hours
# Create cache directory if needed
self.cache_dir.mkdir(parents=True, exist_ok=True)
def save(self, data: dict[str, Any], cache_type: str) -> str:
"""Save data to cache and return cache_id.
Args:
data: Dictionary data to cache
cache_type: Type of cache ('simulator-list', 'build-log', 'ui-tree', etc.)
Returns:
Cache ID like 'sim-20251028-143052' for use in progressive disclosure
Example:
cache_id = cache.save({'devices': [...]}, 'simulator-list')
# Returns: 'sim-20251028-143052'
"""
# Generate cache_id with timestamp
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
cache_prefix = cache_type.split("-")[0] # e.g., 'sim' from 'simulator-list'
cache_id = f"{cache_prefix}-{timestamp}"
# Save to file
cache_file = self.cache_dir / f"{cache_id}.json"
with open(cache_file, "w") as f:
json.dump(
{
"cache_id": cache_id,
"cache_type": cache_type,
"created_at": datetime.now().isoformat(),
"data": data,
},
f,
indent=2,
)
return cache_id
def get(self, cache_id: str) -> dict[str, Any] | None:
"""Retrieve data from cache by cache_id.
Args:
cache_id: Cache ID from save() or list_entries()
Returns:
Cached data dictionary, or None if not found/expired
Example:
data = cache.get('sim-20251028-143052')
if data:
print(f"Found {len(data)} devices")
"""
cache_file = self.cache_dir / f"{cache_id}.json"
if not cache_file.exists():
return None
# Check if expired
if self._is_expired(cache_file):
cache_file.unlink() # Delete expired file
return None
try:
with open(cache_file) as f:
entry = json.load(f)
return entry.get("data")
except (OSError, json.JSONDecodeError):
return None
def list_entries(self, cache_type: str | None = None) -> list[dict[str, Any]]:
"""List available cache entries with metadata.
Args:
cache_type: Filter by type (e.g., 'simulator-list'), or None for all
Returns:
List of cache entries with id, type, created_at, age_seconds
Example:
entries = cache.list_entries('simulator-list')
for entry in entries:
print(f"{entry['id']} - {entry['age_seconds']}s old")
"""
entries = []
for cache_file in sorted(self.cache_dir.glob("*.json"), reverse=True):
# Check if expired
if self._is_expired(cache_file):
cache_file.unlink()
continue
try:
with open(cache_file) as f:
entry = json.load(f)
# Filter by type if specified
if cache_type and entry.get("cache_type") != cache_type:
continue
created_at = datetime.fromisoformat(entry.get("created_at", ""))
age_seconds = (datetime.now() - created_at).total_seconds()
entries.append(
{
"id": entry.get("cache_id"),
"type": entry.get("cache_type"),
"created_at": entry.get("created_at"),
"age_seconds": int(age_seconds),
}
)
except (OSError, json.JSONDecodeError, ValueError):
continue
return entries
def cleanup(self, max_age_hours: int | None = None) -> int:
"""Remove expired cache entries.
Args:
max_age_hours: Age threshold (default: uses instance max_age_hours)
Returns:
Number of entries deleted
Example:
deleted = cache.cleanup()
print(f"Deleted {deleted} expired cache entries")
"""
if max_age_hours is None:
max_age_hours = self.max_age_hours
deleted = 0
for cache_file in self.cache_dir.glob("*.json"):
if self._is_expired(cache_file, max_age_hours):
cache_file.unlink()
deleted += 1
return deleted
def clear(self, cache_type: str | None = None) -> int:
"""Clear all cache entries of a type.
Args:
cache_type: Type to clear (e.g., 'simulator-list'), or None to clear all
Returns:
Number of entries deleted
Example:
cleared = cache.clear('simulator-list')
print(f"Cleared {cleared} simulator list entries")
"""
deleted = 0
for cache_file in self.cache_dir.glob("*.json"):
if cache_type is None:
# Clear all
cache_file.unlink()
deleted += 1
else:
# Clear by type
try:
with open(cache_file) as f:
entry = json.load(f)
if entry.get("cache_type") == cache_type:
cache_file.unlink()
deleted += 1
except (OSError, json.JSONDecodeError):
pass
return deleted
def _is_expired(self, cache_file: Path, max_age_hours: int | None = None) -> bool:
"""Check if cache file is expired.
Args:
cache_file: Path to cache file
max_age_hours: Age threshold (default: uses instance max_age_hours)
Returns:
True if file is older than max_age_hours
"""
if max_age_hours is None:
max_age_hours = self.max_age_hours
try:
with open(cache_file) as f:
entry = json.load(f)
created_at = datetime.fromisoformat(entry.get("created_at", ""))
age = datetime.now() - created_at
return age > timedelta(hours=max_age_hours)
except (OSError, json.JSONDecodeError, ValueError):
return True
# Module-level cache instances (lazy-loaded)
_cache_instances: dict[str, ProgressiveCache] = {}
def get_cache(cache_dir: str | None = None) -> ProgressiveCache:
"""Get or create global cache instance.
Args:
cache_dir: Custom cache directory (uses default if None)
Returns:
ProgressiveCache instance
"""
# Use cache_dir as key, or 'default' if None
key = cache_dir or "default"
if key not in _cache_instances:
_cache_instances[key] = ProgressiveCache(cache_dir)
return _cache_instances[key]

View File

@@ -0,0 +1,432 @@
#!/usr/bin/env python3
"""
Shared device and simulator utilities.
Common patterns for interacting with simulators via xcrun simctl and IDB.
Standardizes command building and device targeting to prevent errors.
Follows Jackson's Law - only extracts genuinely reused patterns.
Used by:
- app_launcher.py (8 call sites) - App lifecycle commands
- Multiple scripts (15+ locations) - IDB command building
- navigator.py, gesture.py - Coordinate transformation
- test_recorder.py, app_state_capture.py - Auto-UDID detection
"""
import json
import re
import subprocess
def build_simctl_command(
operation: str,
udid: str | None = None,
*args,
) -> list[str]:
"""
Build xcrun simctl command with proper device handling.
Standardizes command building to prevent device targeting bugs.
Automatically uses "booted" if no UDID provided.
Used by:
- app_launcher.py: launch, terminate, install, uninstall, openurl, listapps, spawn
- Multiple scripts: generic simctl operations
Args:
operation: simctl operation (launch, terminate, install, etc.)
udid: Device UDID (uses 'booted' if None)
*args: Additional command arguments
Returns:
Complete command list ready for subprocess.run()
Examples:
# Launch app on booted simulator
cmd = build_simctl_command("launch", None, "com.app.bundle")
# Returns: ["xcrun", "simctl", "launch", "booted", "com.app.bundle"]
# Launch on specific device
cmd = build_simctl_command("launch", "ABC123", "com.app.bundle")
# Returns: ["xcrun", "simctl", "launch", "ABC123", "com.app.bundle"]
# Install app on specific device
cmd = build_simctl_command("install", "ABC123", "/path/to/app.app")
# Returns: ["xcrun", "simctl", "install", "ABC123", "/path/to/app.app"]
"""
cmd = ["xcrun", "simctl", operation]
# Add device (booted or specific UDID)
cmd.append(udid if udid else "booted")
# Add remaining arguments
cmd.extend(str(arg) for arg in args)
return cmd
def build_idb_command(
operation: str,
udid: str | None = None,
*args,
) -> list[str]:
"""
Build IDB command with proper device targeting.
Standardizes IDB command building across all scripts using IDB.
Handles device UDID consistently.
Used by:
- navigator.py: ui tap, ui text, ui describe-all
- gesture.py: ui swipe, ui tap
- keyboard.py: ui key, ui text, ui tap
- And more: 15+ locations
Args:
operation: IDB operation path (e.g., "ui tap", "ui text", "ui describe-all")
udid: Device UDID (omits --udid flag if None, IDB uses booted by default)
*args: Additional command arguments
Returns:
Complete command list ready for subprocess.run()
Examples:
# Tap on booted simulator
cmd = build_idb_command("ui tap", None, "200", "400")
# Returns: ["idb", "ui", "tap", "200", "400"]
# Tap on specific device
cmd = build_idb_command("ui tap", "ABC123", "200", "400")
# Returns: ["idb", "ui", "tap", "200", "400", "--udid", "ABC123"]
# Get accessibility tree
cmd = build_idb_command("ui describe-all", "ABC123", "--json", "--nested")
# Returns: ["idb", "ui", "describe-all", "--json", "--nested", "--udid", "ABC123"]
# Enter text
cmd = build_idb_command("ui text", None, "hello world")
# Returns: ["idb", "ui", "text", "hello world"]
"""
# Split operation into parts (e.g., "ui tap" -> ["ui", "tap"])
cmd = ["idb"] + operation.split()
# Add arguments
cmd.extend(str(arg) for arg in args)
# Add device targeting if specified (optional for IDB, uses booted by default)
if udid:
cmd.extend(["--udid", udid])
return cmd
def get_booted_device_udid() -> str | None:
"""
Auto-detect currently booted simulator UDID.
Queries xcrun simctl for booted devices and returns first match.
Returns:
UDID of booted simulator, or None if no simulator is booted.
Example:
udid = get_booted_device_udid()
if udid:
print(f"Booted simulator: {udid}")
else:
print("No simulator is currently booted")
"""
try:
result = subprocess.run(
["xcrun", "simctl", "list", "devices", "booted"],
capture_output=True,
text=True,
check=True,
)
# Parse output to find UDID
# Format: " iPhone 16 Pro (ABC123-DEF456) (Booted)"
for line in result.stdout.split("\n"):
# Look for UUID pattern in parentheses
match = re.search(r"\(([A-F0-9\-]{36})\)", line)
if match:
return match.group(1)
return None
except subprocess.CalledProcessError:
return None
def resolve_udid(udid_arg: str | None) -> str:
"""
Resolve device UDID with auto-detection fallback.
If udid_arg is provided, returns it immediately.
If None, attempts to auto-detect booted simulator.
Raises error if neither is available.
Args:
udid_arg: Explicit UDID from command line, or None
Returns:
Valid UDID string
Raises:
RuntimeError: If no UDID provided and no booted simulator found
Example:
try:
udid = resolve_udid(args.udid) # args.udid might be None
print(f"Using device: {udid}")
except RuntimeError as e:
print(f"Error: {e}")
sys.exit(1)
"""
if udid_arg:
return udid_arg
booted_udid = get_booted_device_udid()
if booted_udid:
return booted_udid
raise RuntimeError(
"No device UDID provided and no simulator is currently booted.\n"
"Boot a simulator or provide --udid explicitly:\n"
" xcrun simctl boot <device-name>\n"
" python scripts/script_name.py --udid <device-udid>"
)
def get_device_screen_size(udid: str) -> tuple[int, int]:
"""
Get actual screen dimensions for device via accessibility tree.
Queries IDB accessibility tree to determine actual device resolution.
Falls back to iPhone 14 defaults (390x844) if detection fails.
Args:
udid: Device UDID
Returns:
Tuple of (width, height) in pixels
Example:
width, height = get_device_screen_size("ABC123")
print(f"Device screen: {width}x{height}")
"""
try:
cmd = build_idb_command("ui describe-all", udid, "--json")
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
# Parse JSON response
data = json.loads(result.stdout)
tree = data[0] if isinstance(data, list) and len(data) > 0 else data
# Get frame size from root element
if tree and "frame" in tree:
frame = tree["frame"]
width = int(frame.get("width", 390))
height = int(frame.get("height", 844))
return (width, height)
# Fallback
return (390, 844)
except Exception:
# Graceful fallback to iPhone 14 Pro defaults
return (390, 844)
def resolve_device_identifier(identifier: str) -> str:
"""
Resolve device name or partial UDID to full UDID.
Supports multiple identifier formats:
- Full UDID: "ABC-123-DEF456..." (36 character UUID)
- Device name: "iPhone 16 Pro" (matches full name)
- Partial match: "iPhone 16" (matches first device containing this string)
- Special: "booted" (resolves to currently booted device)
Args:
identifier: Device UDID, name, or special value "booted"
Returns:
Full device UDID
Raises:
RuntimeError: If identifier cannot be resolved
Example:
udid = resolve_device_identifier("iPhone 16 Pro")
# Returns: "ABC123DEF456..."
udid = resolve_device_identifier("booted")
# Returns UDID of booted simulator
"""
# Handle "booted" special case
if identifier.lower() == "booted":
booted = get_booted_device_udid()
if booted:
return booted
raise RuntimeError(
"No simulator is currently booted. "
"Boot a simulator first: xcrun simctl boot <device-udid>"
)
# Check if already a full UDID (36 character UUID format)
if re.match(r"^[A-F0-9\-]{36}$", identifier, re.IGNORECASE):
return identifier.upper()
# Try to match by device name
simulators = list_simulators(state=None)
exact_matches = [s for s in simulators if s["name"].lower() == identifier.lower()]
if exact_matches:
return exact_matches[0]["udid"]
# Try partial match
partial_matches = [s for s in simulators if identifier.lower() in s["name"].lower()]
if partial_matches:
return partial_matches[0]["udid"]
# No match found
raise RuntimeError(
f"Device '{identifier}' not found. "
f"Use 'xcrun simctl list devices' to see available simulators."
)
def list_simulators(state: str | None = None) -> list[dict]:
"""
List iOS simulators with optional state filtering.
Queries xcrun simctl and returns structured list of simulators.
Optionally filters by state (available, booted, all).
Args:
state: Optional filter - "available", "booted", or None for all
Returns:
List of simulator dicts with keys:
- "name": Device name (e.g., "iPhone 16 Pro")
- "udid": Device UDID (36 char UUID)
- "state": Device state ("Booted", "Shutdown", "Unavailable")
- "runtime": iOS version (e.g., "iOS 18.0", "unavailable")
- "type": Device type ("iPhone", "iPad", "Apple Watch", etc.)
Example:
# List all simulators
all_sims = list_simulators()
print(f"Total simulators: {len(all_sims)}")
# List only available simulators
available = list_simulators(state="available")
for sim in available:
print(f"{sim['name']} ({sim['state']}) - {sim['udid']}")
# List only booted simulators
booted = list_simulators(state="booted")
for sim in booted:
print(f"Booted: {sim['name']}")
"""
try:
# Query simctl for device list
cmd = ["xcrun", "simctl", "list", "devices", "-j"]
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
data = json.loads(result.stdout)
simulators = []
# Parse JSON response
# Format: {"devices": {"iOS 18.0": [{...}, {...}], "iOS 17.0": [...], ...}}
for ios_version, devices in data.get("devices", {}).items():
for device in devices:
sim = {
"name": device.get("name", "Unknown"),
"udid": device.get("udid", ""),
"state": device.get("state", "Unknown"),
"runtime": ios_version,
"type": _extract_device_type(device.get("name", "")),
}
simulators.append(sim)
# Apply state filtering
if state == "booted":
return [s for s in simulators if s["state"] == "Booted"]
if state == "available":
return [s for s in simulators if s["state"] == "Shutdown"] # Available to boot
if state is None:
return simulators
return [s for s in simulators if s["state"].lower() == state.lower()]
except (subprocess.CalledProcessError, json.JSONDecodeError, KeyError) as e:
raise RuntimeError(f"Failed to list simulators: {e}") from e
def _extract_device_type(device_name: str) -> str:
"""
Extract device type from device name.
Parses device name to determine type (iPhone, iPad, Watch, etc.).
Args:
device_name: Full device name (e.g., "iPhone 16 Pro")
Returns:
Device type string
Example:
_extract_device_type("iPhone 16 Pro") # Returns "iPhone"
_extract_device_type("iPad Air") # Returns "iPad"
_extract_device_type("Apple Watch Series 9") # Returns "Watch"
"""
if "iPhone" in device_name:
return "iPhone"
if "iPad" in device_name:
return "iPad"
if "Watch" in device_name or "Apple Watch" in device_name:
return "Watch"
if "TV" in device_name or "Apple TV" in device_name:
return "TV"
return "Unknown"
def transform_screenshot_coords(
x: float,
y: float,
screenshot_width: int,
screenshot_height: int,
device_width: int,
device_height: int,
) -> tuple[int, int]:
"""
Transform screenshot coordinates to device coordinates.
Handles the case where a screenshot was downscaled (e.g., to 'half' size)
and needs to be transformed back to actual device pixel coordinates
for accurate tapping.
The transformation is linear:
device_x = (screenshot_x / screenshot_width) * device_width
device_y = (screenshot_y / screenshot_height) * device_height
Args:
x, y: Coordinates in the screenshot
screenshot_width, screenshot_height: Screenshot dimensions (e.g., 195, 422)
device_width, device_height: Actual device dimensions (e.g., 390, 844)
Returns:
Tuple of (device_x, device_y) in device pixels
Example:
# Screenshot taken at 'half' size: 195x422 (from 390x844 device)
device_x, device_y = transform_screenshot_coords(
100, 200, # Tap point in screenshot
195, 422, # Screenshot dimensions
390, 844 # Device dimensions
)
print(f"Tap at device coords: ({device_x}, {device_y})")
# Output: Tap at device coords: (200, 400)
"""
device_x = int((x / screenshot_width) * device_width)
device_y = int((y / screenshot_height) * device_height)
return (device_x, device_y)

View File

@@ -0,0 +1,180 @@
#!/usr/bin/env python3
"""
Shared IDB utility functions.
This module provides common IDB operations used across multiple scripts.
Follows Jackson's Law - only shared code that's truly reused, not speculative.
Used by:
- navigator.py - Accessibility tree navigation
- screen_mapper.py - UI element analysis
- accessibility_audit.py - WCAG compliance checking
- test_recorder.py - Test documentation
- app_state_capture.py - State snapshots
- gesture.py - Touch gesture operations
"""
import json
import subprocess
import sys
def get_accessibility_tree(udid: str | None = None, nested: bool = True) -> dict:
"""
Fetch accessibility tree from IDB.
The accessibility tree represents the complete UI hierarchy of the current
screen, with all element properties needed for semantic navigation.
Args:
udid: Device UDID (uses booted simulator if None)
nested: Include nested structure (default True). If False, returns flat array.
Returns:
Root element of accessibility tree as dict.
Structure: {
"type": "Window",
"AXLabel": "App Name",
"frame": {"x": 0, "y": 0, "width": 390, "height": 844},
"children": [...]
}
Raises:
SystemExit: If IDB command fails or returns invalid JSON
Example:
tree = get_accessibility_tree("UDID123")
# Root is Window element with all children nested
"""
cmd = ["idb", "ui", "describe-all", "--json"]
if nested:
cmd.append("--nested")
if udid:
cmd.extend(["--udid", udid])
try:
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
tree_data = json.loads(result.stdout)
# IDB returns array format, extract first element (root)
if isinstance(tree_data, list) and len(tree_data) > 0:
return tree_data[0]
return tree_data
except subprocess.CalledProcessError as e:
print(f"Error: Failed to get accessibility tree: {e.stderr}", file=sys.stderr)
sys.exit(1)
except json.JSONDecodeError:
print("Error: Invalid JSON from idb", file=sys.stderr)
sys.exit(1)
def flatten_tree(node: dict, depth: int = 0, elements: list[dict] | None = None) -> list[dict]:
"""
Flatten nested accessibility tree into list of elements.
Converts the hierarchical accessibility tree into a flat list where each
element includes its depth for context.
Used by:
- navigator.py - Element finding
- screen_mapper.py - Element analysis
- accessibility_audit.py - Audit scanning
Args:
node: Root node of tree (typically from get_accessibility_tree)
depth: Current depth (used internally, start at 0)
elements: Accumulator list (used internally, start as None)
Returns:
Flat list of elements, each with "depth" key indicating nesting level.
Structure of each element: {
"type": "Button",
"AXLabel": "Login",
"frame": {...},
"depth": 2,
...
}
Example:
tree = get_accessibility_tree()
flat = flatten_tree(tree)
for elem in flat:
print(f"{' ' * elem['depth']}{elem.get('type')}: {elem.get('AXLabel')}")
"""
if elements is None:
elements = []
# Add current node with depth tracking
node_copy = node.copy()
node_copy["depth"] = depth
elements.append(node_copy)
# Process children recursively
for child in node.get("children", []):
flatten_tree(child, depth + 1, elements)
return elements
def count_elements(node: dict) -> int:
"""
Count total elements in tree (recursive).
Traverses entire tree counting all elements for reporting purposes.
Used by:
- test_recorder.py - Element counting per step
- screen_mapper.py - Summary statistics
Args:
node: Root node of tree
Returns:
Total element count including root and all descendants
Example:
tree = get_accessibility_tree()
total = count_elements(tree)
print(f"Screen has {total} elements")
"""
count = 1
for child in node.get("children", []):
count += count_elements(child)
return count
def get_screen_size(udid: str | None = None) -> tuple[int, int]:
"""
Get screen dimensions from accessibility tree.
Extracts the screen size from the root element's frame. Useful for
gesture calculations and coordinate normalization.
Used by:
- gesture.py - Gesture positioning
- Potentially: screenshot positioning, screen-aware scaling
Args:
udid: Device UDID (uses booted if None)
Returns:
(width, height) tuple. Defaults to (390, 844) if detection fails
or tree cannot be accessed.
Example:
width, height = get_screen_size()
center_x = width // 2
center_y = height // 2
"""
DEFAULT_WIDTH = 390 # iPhone 14
DEFAULT_HEIGHT = 844
try:
tree = get_accessibility_tree(udid, nested=False)
frame = tree.get("frame", {})
width = int(frame.get("width", DEFAULT_WIDTH))
height = int(frame.get("height", DEFAULT_HEIGHT))
return (width, height)
except Exception:
# Silently fall back to defaults if tree access fails
return (DEFAULT_WIDTH, DEFAULT_HEIGHT)

View File

@@ -0,0 +1,338 @@
#!/usr/bin/env python3
"""
Screenshot utilities with dual-mode support.
Provides unified screenshot handling with:
- File-based mode: Persistent artifacts for test documentation
- Inline base64 mode: Vision-based automation for agent analysis
- Size presets: Token optimization (full/half/quarter/thumb)
- Semantic naming: {appName}_{screenName}_{state}_{timestamp}.png
Supports resize operations via PIL (optional dependency).
Used by:
- test_recorder.py - Step-based screenshot recording
- app_state_capture.py - State snapshot captures
"""
import base64
import os
import subprocess
import sys
from datetime import datetime
from pathlib import Path
from typing import Any
# Try to import PIL for resizing, but make it optional
try:
from PIL import Image
HAS_PIL = True
except ImportError:
HAS_PIL = False
def generate_screenshot_name(
app_name: str | None = None,
screen_name: str | None = None,
state: str | None = None,
timestamp: str | None = None,
extension: str = "png",
) -> str:
"""Generate semantic screenshot filename.
Format: {appName}_{screenName}_{state}_{timestamp}.{ext}
Falls back to: screenshot_{timestamp}.{ext}
Args:
app_name: Application name (e.g., 'MyApp')
screen_name: Screen name (e.g., 'Login')
state: State description (e.g., 'Empty', 'Filled', 'Error')
timestamp: ISO timestamp (uses current time if None)
extension: File extension (default: 'png')
Returns:
Semantic filename ready for safe file creation
Example:
name = generate_screenshot_name('MyApp', 'Login', 'Empty')
# Returns: 'MyApp_Login_Empty_20251028-143052.png'
name = generate_screenshot_name()
# Returns: 'screenshot_20251028-143052.png'
"""
if timestamp is None:
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
# Build semantic name
if app_name or screen_name or state:
parts = [app_name, screen_name, state]
parts = [p for p in parts if p] # Filter None/empty
name = "_".join(parts) + f"_{timestamp}"
else:
name = f"screenshot_{timestamp}"
return f"{name}.{extension}"
def get_size_preset(size: str = "half") -> tuple[float, float]:
"""Get scale factors for size preset.
Args:
size: 'full', 'half', 'quarter', 'thumb'
Returns:
Tuple of (scale_x, scale_y) for resizing
Example:
scale_x, scale_y = get_size_preset('half')
# Returns: (0.5, 0.5)
"""
presets = {
"full": (1.0, 1.0),
"half": (0.5, 0.5),
"quarter": (0.25, 0.25),
"thumb": (0.1, 0.1),
}
return presets.get(size, (0.5, 0.5))
def resize_screenshot(
input_path: str,
output_path: str | None = None,
size: str = "half",
quality: int = 85,
) -> tuple[str, int, int]:
"""Resize screenshot for token optimization.
Requires PIL (Pillow). Falls back gracefully without it.
Args:
input_path: Path to original screenshot
output_path: Output path (uses input_path if None)
size: 'full', 'half', 'quarter', 'thumb'
quality: JPEG quality (1-100, default: 85)
Returns:
Tuple of (output_path, width, height) of resized image
Raises:
FileNotFoundError: If input file doesn't exist
ValueError: If PIL not installed and size != 'full'
Example:
output, w, h = resize_screenshot(
'screenshot.png',
'screenshot_half.png',
'half'
)
print(f"Resized to {w}x{h}")
"""
input_file = Path(input_path)
if not input_file.exists():
raise FileNotFoundError(f"Screenshot not found: {input_path}")
# If full size, just copy
if size == "full":
if output_path:
import shutil
shutil.copy(input_path, output_path)
output_file = Path(output_path)
else:
output_file = input_file
# Get original dimensions
if HAS_PIL:
img = Image.open(str(output_file))
return (str(output_file), img.width, img.height)
return (str(output_file), 0, 0) # Dimensions unknown without PIL
# Need PIL to resize
if not HAS_PIL:
raise ValueError(
f"Size preset '{size}' requires PIL (Pillow). " "Install with: pip3 install pillow"
)
# Open original image
img = Image.open(str(input_file))
orig_w, orig_h = img.size
# Calculate new size
scale_x, scale_y = get_size_preset(size)
new_w = int(orig_w * scale_x)
new_h = int(orig_h * scale_y)
# Resize with high-quality resampling
resized = img.resize((new_w, new_h), Image.Resampling.LANCZOS)
# Determine output path
if output_path is None:
# Insert size marker before extension
stem = input_file.stem
suffix = input_file.suffix
output_path = str(input_file.parent / f"{stem}_{size}{suffix}")
# Save resized image
resized.save(output_path, quality=quality, optimize=True)
return (output_path, new_w, new_h)
def capture_screenshot(
udid: str,
output_path: str | None = None,
size: str = "half",
inline: bool = False,
app_name: str | None = None,
screen_name: str | None = None,
state: str | None = None,
) -> dict[str, Any]:
"""Capture screenshot with flexible output modes.
Supports both file-based (persistent artifacts) and inline base64 modes
(for vision-based automation).
Args:
udid: Device UDID
output_path: File path for file mode (generates semantic name if None)
size: 'full', 'half', 'quarter', 'thumb' (default: 'half')
inline: If True, returns base64 data instead of saving to file
app_name: App name for semantic naming
screen_name: Screen name for semantic naming
state: State description for semantic naming
Returns:
Dict with mode-specific fields:
File mode:
{
'mode': 'file',
'file_path': str,
'size_bytes': int,
'width': int,
'height': int,
'size_preset': str
}
Inline mode:
{
'mode': 'inline',
'base64_data': str,
'mime_type': 'image/png',
'width': int,
'height': int,
'size_preset': str
}
Example:
# File mode
result = capture_screenshot('ABC123', app_name='MyApp')
print(f"Saved to: {result['file_path']}")
# Inline mode
result = capture_screenshot('ABC123', inline=True, size='half')
print(f"Screenshot: {result['width']}x{result['height']}")
print(f"Base64: {result['base64_data'][:50]}...")
"""
try:
# Capture raw screenshot to temp file
temp_path = "/tmp/ios_simulator_screenshot.png"
cmd = ["xcrun", "simctl", "io", udid, "screenshot", temp_path]
subprocess.run(cmd, capture_output=True, text=True, check=True)
if inline:
# Inline mode: resize and convert to base64
# Resize if needed
if size != "full" and HAS_PIL:
resized_path, width, height = resize_screenshot(temp_path, size=size)
else:
resized_path = temp_path
# Get dimensions via PIL if available
if HAS_PIL:
img = Image.open(resized_path)
width, height = img.size
else:
width, height = 390, 844 # Fallback to common device size
# Read and encode as base64
with open(resized_path, "rb") as f:
base64_data = base64.b64encode(f.read()).decode("utf-8")
# Clean up temp files
Path(temp_path).unlink(missing_ok=True)
if resized_path != temp_path:
Path(resized_path).unlink(missing_ok=True)
return {
"mode": "inline",
"base64_data": base64_data,
"mime_type": "image/png",
"width": width,
"height": height,
"size_preset": size,
}
# File mode: save to output path with semantic naming
if output_path is None:
output_path = generate_screenshot_name(app_name, screen_name, state)
# Resize if needed
if size != "full" and HAS_PIL:
final_path, width, height = resize_screenshot(temp_path, output_path, size)
else:
# Just move temp to output
import shutil
shutil.move(temp_path, output_path)
final_path = output_path
# Get dimensions via PIL if available
if HAS_PIL:
img = Image.open(final_path)
width, height = img.size
else:
width, height = 390, 844 # Fallback
# Get file size
size_bytes = Path(final_path).stat().st_size
return {
"mode": "file",
"file_path": final_path,
"size_bytes": size_bytes,
"width": width,
"height": height,
"size_preset": size,
}
except subprocess.CalledProcessError as e:
raise RuntimeError(f"Failed to capture screenshot: {e.stderr}") from e
except Exception as e:
raise RuntimeError(f"Screenshot capture error: {e!s}") from e
def format_screenshot_result(result: dict[str, Any]) -> str:
"""Format screenshot result for human-readable output.
Args:
result: Result dictionary from capture_screenshot()
Returns:
Formatted string for printing
Example:
result = capture_screenshot('ABC123', inline=True)
print(format_screenshot_result(result))
"""
if result["mode"] == "file":
return (
f"Screenshot: {result['file_path']}\n"
f"Dimensions: {result['width']}x{result['height']}\n"
f"Size: {result['size_bytes']} bytes"
)
return (
f"Screenshot (inline): {result['width']}x{result['height']}\n"
f"Base64 length: {len(result['base64_data'])} chars"
)