test: harden agent verification workflow

2026-06-18 15:13:31 -07:00 · 2026-03-13 00:42:56 -07:00
parent 1b56360a24
commit d0b308f340
6 changed files with 1131 additions and 0 deletions
@@ -0,0 +1,127 @@
 ---
 name: "subminer-change-verification"
 description: "Use when working in the SubMiner repo and you need to verify code changes actually work. Covers targeted regression checks during debugging and pre-handoff verification, with cheap-first lane selection for config, docs, launcher/plugin, runtime-compat, and optional real-runtime escalation."
 ---
 # SubMiner Change Verification
 Use this skill for SubMiner code changes. Default to cheap, repo-native verification first. Escalate only when the changed behavior actually depends on Electron, mpv, overlay/window tracking, or other GUI-sensitive runtime behavior.
 ## Scripts
 - `scripts/classify_subminer_diff.sh`
  - Emits suggested lanes and flags from explicit paths or current git changes.
 - `scripts/verify_subminer_change.sh`
  - Runs selected lanes, captures artifacts, and writes a compact summary.
 If you need an explicit installed path, use the directory that contains this `SKILL.md`. The helper scripts live under:
 ```bash
 export SUBMINER_VERIFY_SKILL="<path-to-skill>"
 ```
 ## Default workflow
 1. Inspect the changed files or user-requested area.
 2. Run the classifier unless you already know the right lane.
 3. Run the verifier with the cheapest sufficient lane set.
 4. If the classifier emits `flag:real-runtime-candidate`, do not jump straight to runtime verification. First run the non-runtime lanes.
 5. Escalate to explicit `--lane real-runtime --allow-real-runtime` only when cheaper lanes cannot validate the behavior claim.
 6. Return:
   - verification summary
   - exact commands run
   - artifact paths
   - skipped lanes and blockers
 ## Quick start
 Repo-source quick start:
 ```bash
 bash .agents/skills/subminer-change-verification/scripts/classify_subminer_diff.sh
 ```
 Installed-skill quick start:
 ```bash
 bash "$SUBMINER_VERIFY_SKILL/scripts/classify_subminer_diff.sh"
 ```
 Classify explicit files:
 ```bash
 bash .agents/skills/subminer-change-verification/scripts/classify_subminer_diff.sh \
  launcher/main.ts \
  plugin/subminer/lifecycle.lua \
  src/main/runtime/mpv-client-runtime-service.ts
 ```
 Run automatic lane selection:
 ```bash
 bash .agents/skills/subminer-change-verification/scripts/verify_subminer_change.sh
 ```
 Installed-skill form:
 ```bash
 bash "$SUBMINER_VERIFY_SKILL/scripts/verify_subminer_change.sh"
 ```
 Run targeted lanes:
 ```bash
 bash .agents/skills/subminer-change-verification/scripts/verify_subminer_change.sh \
  --lane launcher-plugin \
  --lane runtime-compat
 ```
 Dry-run to inspect planned commands and artifact layout:
 ```bash
 bash .agents/skills/subminer-change-verification/scripts/verify_subminer_change.sh \
  --dry-run \
  launcher/main.ts \
  src/main.ts
 ```
 ## Lane guidance
 - `docs`
  - For `docs-site/`, `docs/`, and doc-only edits.
 - `config`
  - For `src/config/` and config-template-sensitive edits.
 - `core`
  - For general source changes where `typecheck` + `test:fast` is the best cheap signal.
 - `launcher-plugin`
  - For `launcher/`, `plugin/subminer/`, plugin gating scripts, and wrapper/mpv routing work.
 - `runtime-compat`
  - For `src/main*`, runtime/composer wiring, mpv/overlay services, window trackers, and dist-sensitive behavior.
 - `real-runtime`
  - Only after deliberate escalation.
 ## Real Runtime Escalation
 Escalate only when the change claim depends on actual runtime behavior, for example:
 - overlay appears, hides, or tracks a real mpv window
 - mpv launch flags or pause-until-ready behavior
 - plugin/socket/auto-start handshake under a real player
 - macOS/window-tracker/focus-sensitive behavior
 If the environment cannot support authoritative runtime verification, report the blocker explicitly. Do not silently downgrade a runtime-required claim to a pass.
 ## Artifact contract
 The verifier writes under `.tmp/skill-verification/<timestamp>/`:
 - `summary.json`
 - `summary.txt`
 - `classification.txt`
 - `env.txt`
 - `lanes.txt`
 - `steps.tsv`
 - `steps/*.stdout.log`
 - `steps/*.stderr.log`
 On failure, quote the exact failing command and point at the artifact directory.
@@ -0,0 +1,163 @@
 #!/usr/bin/env bash
 set -euo pipefail
 usage() {
  cat <<'EOF'
 Usage: classify_subminer_diff.sh [path ...]
 Emit suggested verification lanes for explicit paths or current local git changes.
 Output format:
  lane:<name>
  flag:<name>
  reason:<text>
 EOF
 }
 has_item() {
  local needle=$1
  shift || true
  local item
  for item in "$@"; do
    if [[ "$item" == "$needle" ]]; then
      return 0
    fi
  done
  return 1
 }
 add_lane() {
  local lane=$1
  if ! has_item "$lane" "${LANES[@]:-}"; then
    LANES+=("$lane")
  fi
 }
 add_flag() {
  local flag=$1
  if ! has_item "$flag" "${FLAGS[@]:-}"; then
    FLAGS+=("$flag")
  fi
 }
 add_reason() {
  REASONS+=("$1")
 }
 collect_git_paths() {
  local top_level
  if ! top_level=$(git rev-parse --show-toplevel 2>/dev/null); then
    return 0
  fi
  (
    cd "$top_level"
    if git rev-parse --verify HEAD >/dev/null 2>&1; then
      git diff --name-only --relative HEAD --
      git diff --name-only --relative --cached --
    else
      git diff --name-only --relative --
      git diff --name-only --relative --cached --
    fi
    git ls-files --others --exclude-standard
  ) | awk 'NF' | sort -u
 }
 if [[ "${1:-}" == "--help" || "${1:-}" == "-h" ]]; then
  usage
  exit 0
 fi
 declare -a PATHS=()
 declare -a LANES=()
 declare -a FLAGS=()
 declare -a REASONS=()
 if [[ $# -gt 0 ]]; then
  while [[ $# -gt 0 ]]; do
    PATHS+=("$1")
    shift
  done
 else
  while IFS= read -r line; do
    [[ -n "$line" ]] && PATHS+=("$line")
  done < <(collect_git_paths)
 fi
 if [[ ${#PATHS[@]} -eq 0 ]]; then
  add_lane "core"
  add_reason "no changed paths detected -> default to core"
 fi
 for path in "${PATHS[@]}"; do
  specialized=0
  case "$path" in
    docs-site/*|docs/*|changes/*|README.md)
      add_lane "docs"
      add_reason "$path -> docs"
      specialized=1
      ;;
  esac
  case "$path" in
    src/config/*|src/generate-config-example.ts|src/verify-config-example.ts|docs-site/public/config.example.jsonc|config.example.jsonc)
      add_lane "config"
      add_reason "$path -> config"
      specialized=1
      ;;
  esac
  case "$path" in
    launcher/*|plugin/subminer/*|plugin/subminer.conf|scripts/test-plugin-*|scripts/get-mpv-window-*|scripts/configure-plugin-binary-path.mjs)
      add_lane "launcher-plugin"
      add_reason "$path -> launcher-plugin"
      add_flag "real-runtime-candidate"
      add_reason "$path -> real-runtime-candidate"
      specialized=1
      ;;
  esac
  case "$path" in
    src/main.ts|src/main-entry.ts|src/preload.ts|src/main/*|src/core/services/mpv*|src/core/services/overlay*|src/renderer/*|src/window-trackers/*|scripts/prepare-build-assets.mjs)
      add_lane "runtime-compat"
      add_reason "$path -> runtime-compat"
      add_flag "real-runtime-candidate"
      add_reason "$path -> real-runtime-candidate"
      specialized=1
      ;;
  esac
  if [[ "$specialized" == "0" ]]; then
    case "$path" in
      src/*|package.json|tsconfig*.json|scripts/*|Makefile)
        add_lane "core"
        add_reason "$path -> core"
        ;;
    esac
  fi
  case "$path" in
    package.json|src/main.ts|src/main-entry.ts|src/preload.ts)
      add_flag "broad-impact"
      add_reason "$path -> broad-impact"
      ;;
  esac
 done
 if [[ ${#LANES[@]} -eq 0 ]]; then
  add_lane "core"
  add_reason "no lane-specific matches -> default to core"
 fi
 for lane in "${LANES[@]}"; do
  printf 'lane:%s\n' "$lane"
 done
 for flag in "${FLAGS[@]}"; do
  printf 'flag:%s\n' "$flag"
 done
 for reason in "${REASONS[@]}"; do
  printf 'reason:%s\n' "$reason"
 done
@@ -0,0 +1,566 @@
 #!/usr/bin/env bash
 set -euo pipefail
 usage() {
  cat <<'EOF'
 Usage: verify_subminer_change.sh [options] [path ...]
 Options:
  --lane <name>             Force a verification lane. Repeatable.
  --artifact-dir <dir>      Use an explicit artifact directory.
  --allow-real-runtime      Allow explicit real-runtime execution.
  --allow-real-gui          Deprecated alias for --allow-real-runtime.
  --dry-run                 Record planned steps without executing commands.
  --help                    Show this help text.
 If no lanes are supplied, the script classifies the provided paths. If no paths are
 provided, it classifies the current local git changes.
 Authoritative real-runtime verification should be requested with explicit path
 arguments instead of relying on inferred local git changes.
 EOF
 }
 timestamp() {
  date +%Y%m%d-%H%M%S
 }
 timestamp_iso() {
  date -u +%Y-%m-%dT%H:%M:%SZ
 }
 generate_session_id() {
  local tmp_dir
  tmp_dir=$(mktemp -d "${TMPDIR:-/tmp}/subminer-verify-$(timestamp)-XXXXXX")
  basename "$tmp_dir"
  rmdir "$tmp_dir"
 }
 has_item() {
  local needle=$1
  shift || true
  local item
  for item in "$@"; do
    if [[ "$item" == "$needle" ]]; then
      return 0
    fi
  done
  return 1
 }
 normalize_lane_name() {
  case "$1" in
    real-gui)
      printf '%s' "real-runtime"
      ;;
    *)
      printf '%s' "$1"
      ;;
  esac
 }
 add_lane() {
  local lane
  lane=$(normalize_lane_name "$1")
  if ! has_item "$lane" "${SELECTED_LANES[@]:-}"; then
    SELECTED_LANES+=("$lane")
  fi
 }
 add_blocker() {
  BLOCKERS+=("$1")
  BLOCKED=1
 }
 append_step_record() {
  printf '%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n' \
    "$1" "$2" "$3" "$4" "$5" "$6" "$7" "$8" >>"$STEPS_TSV"
 }
 record_env() {
  {
    printf 'repo_root=%s\n' "$REPO_ROOT"
    printf 'session_id=%s\n' "$SESSION_ID"
    printf 'artifact_dir=%s\n' "$ARTIFACT_DIR"
    printf 'path_selection_mode=%s\n' "$PATH_SELECTION_MODE"
    printf 'dry_run=%s\n' "$DRY_RUN"
    printf 'allow_real_runtime=%s\n' "$ALLOW_REAL_RUNTIME"
    printf 'session_home=%s\n' "$SESSION_HOME"
    printf 'session_xdg_config_home=%s\n' "$SESSION_XDG_CONFIG_HOME"
    printf 'session_mpv_dir=%s\n' "$SESSION_MPV_DIR"
    printf 'session_logs_dir=%s\n' "$SESSION_LOGS_DIR"
    printf 'session_mpv_log=%s\n' "$SESSION_MPV_LOG"
    printf 'pwd=%s\n' "$(pwd)"
    git rev-parse --short HEAD 2>/dev/null | sed 's/^/git_head=/' || true
    git status --short 2>/dev/null || true
    if [[ ${#PATH_ARGS[@]} -gt 0 ]]; then
      printf 'requested_paths=\n'
      printf '  %s\n' "${PATH_ARGS[@]}"
    fi
  } >"$ARTIFACT_DIR/env.txt"
 }
 run_step() {
  local lane=$1
  local name=$2
  local command=$3
  local note=${4:-}
  local slug=${name//[^a-zA-Z0-9_-]/-}
  local stdout_rel="steps/${slug}.stdout.log"
  local stderr_rel="steps/${slug}.stderr.log"
  local stdout_path="$ARTIFACT_DIR/$stdout_rel"
  local stderr_path="$ARTIFACT_DIR/$stderr_rel"
  local status exit_code
  COMMANDS_RUN+=("$command")
  printf '%s\n' "$command" >"$ARTIFACT_DIR/steps/${slug}.command.txt"
  if [[ "$DRY_RUN" == "1" ]]; then
    printf '[dry-run] %s\n' "$command" >"$stdout_path"
    : >"$stderr_path"
    status="dry-run"
    exit_code=0
  else
    if bash -lc "cd \"$REPO_ROOT\" && $command" >"$stdout_path" 2>"$stderr_path"; then
      status="passed"
      exit_code=0
      EXECUTED_REAL_STEPS=1
    else
      exit_code=$?
      status="failed"
      FAILED=1
    fi
  fi
  append_step_record "$lane" "$name" "$status" "$exit_code" "$command" "$stdout_rel" "$stderr_rel" "$note"
  printf '%s\t%s\t%s\n' "$lane" "$name" "$status"
  if [[ "$status" == "failed" ]]; then
    FAILURE_STEP="$name"
    FAILURE_COMMAND="$command"
    FAILURE_STDOUT="$stdout_rel"
    FAILURE_STDERR="$stderr_rel"
    return "$exit_code"
  fi
 }
 record_nonpassing_step() {
  local lane=$1
  local name=$2
  local status=$3
  local note=$4
  local slug=${name//[^a-zA-Z0-9_-]/-}
  local stdout_rel="steps/${slug}.stdout.log"
  local stderr_rel="steps/${slug}.stderr.log"
  printf '%s\n' "$note" >"$ARTIFACT_DIR/$stdout_rel"
  : >"$ARTIFACT_DIR/$stderr_rel"
  append_step_record "$lane" "$name" "$status" "0" "" "$stdout_rel" "$stderr_rel" "$note"
  printf '%s\t%s\t%s\n' "$lane" "$name" "$status"
 }
 record_skipped_step() {
  record_nonpassing_step "$1" "$2" "skipped" "$3"
 }
 record_blocked_step() {
  add_blocker "$3"
  record_nonpassing_step "$1" "$2" "blocked" "$3"
 }
 record_failed_step() {
  FAILED=1
  FAILURE_STEP=$2
  FAILURE_COMMAND=${FAILURE_COMMAND:-"(validation)"}
  FAILURE_STDOUT="steps/${2//[^a-zA-Z0-9_-]/-}.stdout.log"
  FAILURE_STDERR="steps/${2//[^a-zA-Z0-9_-]/-}.stderr.log"
  add_blocker "$3"
  record_nonpassing_step "$1" "$2" "failed" "$3"
 }
 find_real_runtime_helper() {
  local candidate
  for candidate in \
    "$SCRIPT_DIR/run_real_runtime_smoke.sh" \
    "$SCRIPT_DIR/run_real_mpv_smoke.sh"; do
    if [[ -x "$candidate" ]]; then
      printf '%s' "$candidate"
      return 0
    fi
  done
  return 1
 }
 acquire_real_runtime_lease() {
  local lease_root="$REPO_ROOT/.tmp/skill-verification/locks"
  local lease_dir="$lease_root/exclusive-real-runtime"
  mkdir -p "$lease_root"
  if mkdir "$lease_dir" 2>/dev/null; then
    REAL_RUNTIME_LEASE_DIR="$lease_dir"
    printf '%s\n' "$SESSION_ID" >"$lease_dir/session_id"
    return 0
  fi
  local owner=""
  if [[ -f "$lease_dir/session_id" ]]; then
    owner=$(cat "$lease_dir/session_id")
  fi
  add_blocker "real-runtime lease already held${owner:+ by $owner}"
  return 1
 }
 release_real_runtime_lease() {
  if [[ -n "$REAL_RUNTIME_LEASE_DIR" && -d "$REAL_RUNTIME_LEASE_DIR" ]]; then
    if [[ -f "$REAL_RUNTIME_LEASE_DIR/session_id" ]]; then
      local owner
      owner=$(cat "$REAL_RUNTIME_LEASE_DIR/session_id")
      if [[ "$owner" != "$SESSION_ID" ]]; then
        return 0
      fi
    fi
    rm -rf "$REAL_RUNTIME_LEASE_DIR"
  fi
 }
 compute_final_status() {
  if [[ "$FAILED" == "1" ]]; then
    FINAL_STATUS="failed"
  elif [[ "$BLOCKED" == "1" ]]; then
    FINAL_STATUS="blocked"
  elif [[ "$EXECUTED_REAL_STEPS" == "1" ]]; then
    FINAL_STATUS="passed"
  else
    FINAL_STATUS="skipped"
  fi
 }
 write_summary_files() {
  local lane_lines
  lane_lines=$(printf '%s\n' "${SELECTED_LANES[@]}")
  printf '%s\n' "$lane_lines" >"$ARTIFACT_DIR/lanes.txt"
  printf '%s\n' "${BLOCKERS[@]}" >"$ARTIFACT_DIR/blockers.txt"
  printf '%s\n' "${PATH_ARGS[@]}" >"$ARTIFACT_DIR/requested-paths.txt"
  ARTIFACT_DIR_ENV="$ARTIFACT_DIR" \
  SESSION_ID_ENV="$SESSION_ID" \
  FINAL_STATUS_ENV="$FINAL_STATUS" \
  PATH_SELECTION_MODE_ENV="$PATH_SELECTION_MODE" \
  ALLOW_REAL_RUNTIME_ENV="$ALLOW_REAL_RUNTIME" \
  SESSION_HOME_ENV="$SESSION_HOME" \
  SESSION_XDG_CONFIG_HOME_ENV="$SESSION_XDG_CONFIG_HOME" \
  SESSION_MPV_DIR_ENV="$SESSION_MPV_DIR" \
  SESSION_LOGS_DIR_ENV="$SESSION_LOGS_DIR" \
  SESSION_MPV_LOG_ENV="$SESSION_MPV_LOG" \
  STARTED_AT_ENV="$STARTED_AT" \
  FINISHED_AT_ENV="$FINISHED_AT" \
  FAILED_ENV="$FAILED" \
  FAILURE_COMMAND_ENV="${FAILURE_COMMAND:-}" \
  FAILURE_STDOUT_ENV="${FAILURE_STDOUT:-}" \
  FAILURE_STDERR_ENV="${FAILURE_STDERR:-}" \
  bun -e '
    const fs = require("fs");
    const path = require("path");
    function readLines(filePath) {
      if (!fs.existsSync(filePath)) return [];
      return fs.readFileSync(filePath, "utf8").split(/\r?\n/).filter(Boolean);
    }
    const artifactDir = process.env.ARTIFACT_DIR_ENV;
    const reportsDir = path.join(artifactDir, "reports");
    const lanes = readLines(path.join(artifactDir, "lanes.txt"));
    const blockers = readLines(path.join(artifactDir, "blockers.txt"));
    const requestedPaths = readLines(path.join(artifactDir, "requested-paths.txt"));
    const steps = readLines(path.join(artifactDir, "steps.tsv")).map((line) => {
      const [lane, name, status, exitCode, command, stdout, stderr, note] = line.split("\t");
      return {
        lane,
        name,
        status,
        exitCode: Number(exitCode || 0),
        command,
        stdout,
        stderr,
        note,
      };
    });
    const summary = {
      sessionId: process.env.SESSION_ID_ENV || "",
      artifactDir,
      reportsDir,
      status: process.env.FINAL_STATUS_ENV || "failed",
      selectedLanes: lanes,
      failed: process.env.FAILED_ENV === "1",
      failure:
        process.env.FAILED_ENV === "1"
          ? {
              command: process.env.FAILURE_COMMAND_ENV || "",
              stdout: process.env.FAILURE_STDOUT_ENV || "",
              stderr: process.env.FAILURE_STDERR_ENV || "",
            }
          : null,
      blockers,
      pathSelectionMode: process.env.PATH_SELECTION_MODE_ENV || "git-inferred",
      requestedPaths,
      allowRealRuntime: process.env.ALLOW_REAL_RUNTIME_ENV === "1",
      startedAt: process.env.STARTED_AT_ENV || "",
      finishedAt: process.env.FINISHED_AT_ENV || "",
      env: {
        home: process.env.SESSION_HOME_ENV || "",
        xdgConfigHome: process.env.SESSION_XDG_CONFIG_HOME_ENV || "",
        mpvDir: process.env.SESSION_MPV_DIR_ENV || "",
        logsDir: process.env.SESSION_LOGS_DIR_ENV || "",
        mpvLog: process.env.SESSION_MPV_LOG_ENV || "",
      },
      steps,
    };
    const summaryJson = JSON.stringify(summary, null, 2) + "\n";
    fs.writeFileSync(path.join(artifactDir, "summary.json"), summaryJson);
    fs.writeFileSync(path.join(reportsDir, "summary.json"), summaryJson);
    const lines = [];
    lines.push(`session_id: ${summary.sessionId}`);
    lines.push(`artifact_dir: ${artifactDir}`);
    lines.push(`selected_lanes: ${lanes.join(", ") || "(none)"}`);
    lines.push(`status: ${summary.status}`);
    lines.push(`path_selection_mode: ${summary.pathSelectionMode}`);
    if (requestedPaths.length > 0) {
      lines.push(`requested_paths: ${requestedPaths.join(", ")}`);
    }
    if (blockers.length > 0) {
      lines.push(`blockers: ${blockers.join(" | ")}`);
    }
    for (const step of steps) {
      lines.push(`${step.lane}/${step.name}: ${step.status}`);
      if (step.command) lines.push(`  command: ${step.command}`);
      lines.push(`  stdout: ${step.stdout}`);
      lines.push(`  stderr: ${step.stderr}`);
      if (step.note) lines.push(`  note: ${step.note}`);
    }
    if (summary.failed) {
      lines.push(`failure_command: ${process.env.FAILURE_COMMAND_ENV || ""}`);
    }
    const summaryText = lines.join("\n") + "\n";
    fs.writeFileSync(path.join(artifactDir, "summary.txt"), summaryText);
    fs.writeFileSync(path.join(reportsDir, "summary.txt"), summaryText);
  '
 }
 cleanup() {
  release_real_runtime_lease
 }
 CLASSIFIER_OUTPUT=""
 ARTIFACT_DIR=""
 ALLOW_REAL_RUNTIME=0
 DRY_RUN=0
 FAILED=0
 BLOCKED=0
 EXECUTED_REAL_STEPS=0
 FINAL_STATUS=""
 FAILURE_STEP=""
 FAILURE_COMMAND=""
 FAILURE_STDOUT=""
 FAILURE_STDERR=""
 REAL_RUNTIME_LEASE_DIR=""
 STARTED_AT=""
 FINISHED_AT=""
 declare -a EXPLICIT_LANES=()
 declare -a SELECTED_LANES=()
 declare -a PATH_ARGS=()
 declare -a COMMANDS_RUN=()
 declare -a BLOCKERS=()
 while [[ $# -gt 0 ]]; do
  case "$1" in
    --lane)
      EXPLICIT_LANES+=("$(normalize_lane_name "$2")")
      shift 2
      ;;
    --artifact-dir)
      ARTIFACT_DIR=$2
      shift 2
      ;;
    --allow-real-runtime|--allow-real-gui)
      ALLOW_REAL_RUNTIME=1
      shift
      ;;
    --dry-run)
      DRY_RUN=1
      shift
      ;;
    --help|-h)
      usage
      exit 0
      ;;
    --)
      shift
      while [[ $# -gt 0 ]]; do
        PATH_ARGS+=("$1")
        shift
      done
      ;;
    *)
      PATH_ARGS+=("$1")
      shift
      ;;
  esac
 done
 SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 REPO_ROOT=$(git rev-parse --show-toplevel 2>/dev/null || pwd)
 SESSION_ID=$(generate_session_id)
 PATH_SELECTION_MODE="git-inferred"
 if [[ ${#PATH_ARGS[@]} -gt 0 ]]; then
  PATH_SELECTION_MODE="explicit"
 fi
 if [[ -z "$ARTIFACT_DIR" ]]; then
  mkdir -p "$REPO_ROOT/.tmp/skill-verification"
  ARTIFACT_DIR="$REPO_ROOT/.tmp/skill-verification/$SESSION_ID"
 fi
 SESSION_HOME="$ARTIFACT_DIR/home"
 SESSION_XDG_CONFIG_HOME="$ARTIFACT_DIR/xdg"
 SESSION_MPV_DIR="$ARTIFACT_DIR/mpv"
 SESSION_LOGS_DIR="$ARTIFACT_DIR/logs"
 SESSION_MPV_LOG="$SESSION_LOGS_DIR/mpv.log"
 mkdir -p "$ARTIFACT_DIR/steps" "$ARTIFACT_DIR/reports" "$SESSION_HOME" "$SESSION_XDG_CONFIG_HOME" "$SESSION_MPV_DIR" "$SESSION_LOGS_DIR"
 STEPS_TSV="$ARTIFACT_DIR/steps.tsv"
 : >"$STEPS_TSV"
 trap cleanup EXIT
 STARTED_AT=$(timestamp_iso)
 if [[ ${#EXPLICIT_LANES[@]} -gt 0 ]]; then
  local_lane=""
  for local_lane in "${EXPLICIT_LANES[@]}"; do
    add_lane "$local_lane"
  done
  printf 'reason:explicit lanes supplied\n' >"$ARTIFACT_DIR/classification.txt"
 else
  if [[ ${#PATH_ARGS[@]} -gt 0 ]]; then
    CLASSIFIER_OUTPUT=$(bash "$SCRIPT_DIR/classify_subminer_diff.sh" "${PATH_ARGS[@]}")
  else
    CLASSIFIER_OUTPUT=$(bash "$SCRIPT_DIR/classify_subminer_diff.sh")
  fi
  printf '%s\n' "$CLASSIFIER_OUTPUT" >"$ARTIFACT_DIR/classification.txt"
  while IFS= read -r line; do
    case "$line" in
      lane:*)
        add_lane "${line#lane:}"
        ;;
    esac
  done <<<"$CLASSIFIER_OUTPUT"
 fi
 record_env
 printf 'artifact_dir=%s\n' "$ARTIFACT_DIR"
 printf 'selected_lanes=%s\n' "$(IFS=,; echo "${SELECTED_LANES[*]}")"
 for lane in "${SELECTED_LANES[@]}"; do
  case "$lane" in
    docs)
      run_step "$lane" "docs-test" "bun run docs:test" || break
      [[ "$FAILED" == "1" ]] && break
      run_step "$lane" "docs-build" "bun run docs:build" || break
      ;;
    config)
      run_step "$lane" "test-config" "bun run test:config" || break
      ;;
    core)
      run_step "$lane" "typecheck" "bun run typecheck" || break
      [[ "$FAILED" == "1" ]] && break
      run_step "$lane" "test-fast" "bun run test:fast" || break
      ;;
    launcher-plugin)
      run_step "$lane" "launcher-smoke-src" "bun run test:launcher:smoke:src" || break
      [[ "$FAILED" == "1" ]] && break
      run_step "$lane" "plugin-src" "bun run test:plugin:src" || break
      ;;
    runtime-compat)
      run_step "$lane" "build" "bun run build" || break
      [[ "$FAILED" == "1" ]] && break
      run_step "$lane" "test-runtime-compat" "bun run test:runtime:compat" || break
      [[ "$FAILED" == "1" ]] && break
      run_step "$lane" "test-smoke-dist" "bun run test:smoke:dist" || break
      ;;
    real-runtime)
      if [[ "$PATH_SELECTION_MODE" != "explicit" ]]; then
        record_blocked_step \
          "$lane" \
          "real-runtime-guard" \
          "real-runtime lane requires explicit paths; inferred local git changes are non-authoritative"
        break
      fi
      if [[ "$ALLOW_REAL_RUNTIME" != "1" ]]; then
        record_blocked_step \
          "$lane" \
          "real-runtime-guard" \
          "real-runtime lane requested but --allow-real-runtime was not supplied"
        break
      fi
      if ! acquire_real_runtime_lease; then
        record_blocked_step \
          "$lane" \
          "real-runtime-lease" \
          "real-runtime lease already held; rerun after the active runtime verification finishes"
        break
      fi
      if ! REAL_RUNTIME_HELPER=$(find_real_runtime_helper); then
        record_blocked_step \
          "$lane" \
          "real-runtime-helper" \
          "real-runtime helper not implemented yet"
        break
      fi
      printf -v REAL_RUNTIME_COMMAND \
        'SESSION_ID=%q HOME=%q XDG_CONFIG_HOME=%q SUBMINER_MPV_LOG=%q bash %q' \
        "$SESSION_ID" \
        "$SESSION_HOME" \
        "$SESSION_XDG_CONFIG_HOME" \
        "$SESSION_MPV_LOG" \
        "$REAL_RUNTIME_HELPER"
      run_step "$lane" "real-runtime-smoke" "$REAL_RUNTIME_COMMAND" || break
      ;;
    *)
      record_failed_step "$lane" "lane-validation" "unknown lane: $lane"
      break
      ;;
  esac
  if [[ "$FAILED" == "1" || "$BLOCKED" == "1" ]]; then
    break
  fi
 done
 FINISHED_AT=$(timestamp_iso)
 compute_final_status
 write_summary_files
 printf 'status=%s\n' "$FINAL_STATUS"
 printf 'artifact_dir=%s\n' "$ARTIFACT_DIR"
 case "$FINAL_STATUS" in
  failed)
    printf 'result=failed\n'
    printf 'failure_command=%s\n' "$FAILURE_COMMAND"
    exit 1
    ;;
  blocked)
    printf 'result=blocked\n'
    exit 2
    ;;
  *)
    printf 'result=ok\n'
    exit 0
    ;;
 esac
@@ -0,0 +1,131 @@
 ---
 name: "subminer-scrum-master"
 description: "Use in the SubMiner repo when a request should be turned into planned work and driven through execution. Assesses whether backlog tracking is warranted, creates or updates tasks when needed, records a plan, dispatches one or more subagents, and requires verification before handoff."
 ---
 # SubMiner Scrum Master
 Own workflow, not code by default.
 Use this skill when the user gives a feature request, bug report, issue, refactor, or implementation ask and the agent should manage intake, planning, backlog hygiene, worker dispatch, and verification through completion.
 ## Core Rules
 1. Decide first whether backlog tracking is warranted.
 2. If backlog is needed, search first. Update existing work when it clearly matches.
 3. If backlog is not needed, keep the process light. Do not invent ticket ceremony.
 4. Record a plan before dispatching coding work.
 5. Use parent + subtasks for multi-part work when backlog is used.
 6. Dispatch conservatively. Parallelize only disjoint write scopes.
 7. Require verification before handoff, typically via `subminer-change-verification`.
 8. Report backlog actions, dispatched workers, verification, blockers, and remaining risks.
 ## Backlog Decision
 Skip backlog when the request is:
 - question only
 - obvious mechanical edit
 - tiny isolated change with no real planning
 Use backlog when the work:
 - needs planning or scope decisions
 - spans multiple phases or subsystems
 - is likely to need subagent dispatch
 - should remain traceable for handoff/resume
 If backlog is used:
 - search existing tasks first
 - create/update a standalone task for one focused deliverable
 - create/update a parent task plus subtasks for multi-part work
 - record the implementation plan in the task before implementation begins
 ## Intake Workflow
 1. Parse the request.
   Classify it as question, mechanical edit, bugfix, feature, refactor, investigation, or follow-up.
 2. Decide whether backlog is needed.
 3. If backlog is needed:
   - search first
   - update existing task if clearly relevant
   - otherwise create the right structure
   - write the implementation plan before dispatch
 4. If backlog is skipped:
   - write a short working plan in-thread
   - proceed without fake ticketing
 5. Choose execution mode:
   - no subagents for trivial work
   - one worker for focused work
   - parallel workers only for disjoint scopes
 6. Run verification before handoff.
 ## Dispatch Rules
 The scrum master orchestrates. Workers implement.
 - Do not become the default implementer unless delegation is unnecessary.
 - Do not parallelize overlapping files or tightly coupled runtime work.
 - Give every worker explicit ownership of files/modules.
 - Tell every worker other agents may be active and they must not revert unrelated edits.
 - Require each worker to report:
  - changed files
  - tests run
  - blockers
 Use worker agents for implementation and explorer agents only for bounded codebase questions.
 ## Verification
 Every nontrivial code task gets verification.
 Preferred flow:
 1. use `subminer-change-verification`
 2. start with the cheapest sufficient lane
 3. escalate only when needed
 4. if worker verification is sufficient, accept it or run one final consolidating pass
 Never hand off nontrivial work without stating what was verified and what was skipped.
 ## Failure / Scope Handling
 - If a worker hits ambiguity, pause and ask the user.
 - If verification fails, either:
  - send the worker back with exact failure context, or
  - fix it directly if it is tiny and clearly in scope
 - If new scope appears, revisit backlog structure before silently expanding work.
 ## Representative Flows
 ### Trivial no-ticket work
 - decide backlog is unnecessary
 - keep a short plan
 - implement directly or with one worker if helpful
 - run targeted verification
 - report outcome concisely
 ### Single-task implementation
 - search/create/update one task
 - record plan
 - dispatch one worker
 - integrate
 - verify
 - update task and report outcome
 ### Parent + subtasks execution
 - search/create/update parent task
 - create subtasks for distinct deliverables/phases
 - record sequencing in the plan
 - dispatch workers only where scopes are disjoint
 - integrate
 - run consolidated verification
 - update task state and report outcome
 ## Output Expectations
 At the end, report:
 - whether backlog was used and what changed
 - which workers were dispatched and what they owned
 - what verification ran
 - blockers, skips, and risks
@@ -35,6 +35,19 @@ docs/.vitepress/cache/
 docs/.vitepress/dist/
 tests/*
 .worktrees/
 .tmp/
 .codex/*
 .agents/*
 !.agents/skills/
 .agents/skills/*
 !.agents/skills/subminer-change-verification/
 !.agents/skills/subminer-scrum-master/
 .agents/skills/subminer-change-verification/*
 !.agents/skills/subminer-change-verification/SKILL.md
 !.agents/skills/subminer-change-verification/scripts/
 .agents/skills/subminer-change-verification/scripts/*
 !.agents/skills/subminer-change-verification/scripts/classify_subminer_diff.sh
 !.agents/skills/subminer-change-verification/scripts/verify_subminer_change.sh
 .agents/skills/subminer-scrum-master/*
 !.agents/skills/subminer-scrum-master/SKILL.md
 favicon.png
@@ -0,0 +1,131 @@
 import assert from 'node:assert/strict';
 import fs from 'node:fs';
 import os from 'node:os';
 import path from 'node:path';
 import { spawnSync } from 'node:child_process';
 import test from 'node:test';
 const repoRoot = process.cwd();
 const classifyScript = path.join(
  repoRoot,
  '.agents/skills/subminer-change-verification/scripts/classify_subminer_diff.sh',
 );
 const verifyScript = path.join(
  repoRoot,
  '.agents/skills/subminer-change-verification/scripts/verify_subminer_change.sh',
 );
 function withTempDir<T>(fn: (dir: string) => T): T {
  const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'subminer-change-verification-test-'));
  try {
    return fn(dir);
  } finally {
    fs.rmSync(dir, { recursive: true, force: true });
  }
 }
 function runBash(args: string[]) {
  return spawnSync('bash', args, {
    cwd: repoRoot,
    env: process.env,
    encoding: 'utf8',
  });
 }
 function parseArtifactDir(stdout: string): string {
  const match = stdout.match(/^artifact_dir=(.+)$/m);
  assert.ok(match, `expected artifact_dir in stdout, got:\n${stdout}`);
  return match[1] ?? '';
 }
 function readSummaryJson(artifactDir: string) {
  return JSON.parse(fs.readFileSync(path.join(artifactDir, 'summary.json'), 'utf8')) as {
    sessionId: string;
    status: string;
    selectedLanes: string[];
    blockers?: string[];
    artifactDir: string;
    pathSelectionMode?: string;
  };
 }
 test('classifier marks launcher and plugin paths as real-runtime candidates', () => {
  const result = runBash([classifyScript, 'launcher/mpv.ts', 'plugin/subminer/process.lua']);
  assert.equal(result.status, 0, result.stderr || result.stdout);
  assert.match(result.stdout, /^lane:launcher-plugin$/m);
  assert.match(result.stdout, /^flag:real-runtime-candidate$/m);
  assert.doesNotMatch(result.stdout, /real-gui-candidate/);
 });
 test('verifier blocks requested real-runtime lane when runtime execution is not allowed', () => {
  withTempDir((root) => {
    const artifactDir = path.join(root, 'artifacts');
    const result = runBash([
      verifyScript,
      '--dry-run',
      '--artifact-dir',
      artifactDir,
      '--lane',
      'real-runtime',
      'launcher/mpv.ts',
    ]);
    assert.notEqual(result.status, 0, result.stdout);
    assert.match(result.stdout, /^result=blocked$/m);
    const summary = readSummaryJson(artifactDir);
    assert.equal(summary.status, 'blocked');
    assert.deepEqual(summary.selectedLanes, ['real-runtime']);
    assert.ok(summary.sessionId.length > 0);
    assert.ok(summary.blockers?.some((entry) => entry.includes('--allow-real-runtime')));
    assert.equal(fs.existsSync(path.join(artifactDir, 'reports', 'summary.json')), true);
  });
 });
 test('verifier fails closed for unknown lanes', () => {
  withTempDir((root) => {
    const artifactDir = path.join(root, 'artifacts');
    const result = runBash([
      verifyScript,
      '--dry-run',
      '--artifact-dir',
      artifactDir,
      '--lane',
      'not-a-lane',
      'src/main.ts',
    ]);
    assert.notEqual(result.status, 0, result.stdout);
    assert.match(result.stdout, /^result=failed$/m);
    const summary = readSummaryJson(artifactDir);
    assert.equal(summary.status, 'failed');
    assert.deepEqual(summary.selectedLanes, ['not-a-lane']);
    assert.ok(summary.blockers?.some((entry) => entry.includes('unknown lane')));
  });
 });
 test('verifier allocates unique session ids and artifact roots by default', () => {
  const first = runBash([verifyScript, '--dry-run', '--lane', 'core', 'src/main.ts']);
  const second = runBash([verifyScript, '--dry-run', '--lane', 'core', 'src/main.ts']);
  assert.equal(first.status, 0, first.stderr || first.stdout);
  assert.equal(second.status, 0, second.stderr || second.stdout);
  const firstArtifactDir = parseArtifactDir(first.stdout);
  const secondArtifactDir = parseArtifactDir(second.stdout);
  try {
    const firstSummary = readSummaryJson(firstArtifactDir);
    const secondSummary = readSummaryJson(secondArtifactDir);
    assert.notEqual(firstSummary.sessionId, secondSummary.sessionId);
    assert.notEqual(firstSummary.artifactDir, secondSummary.artifactDir);
    assert.equal(firstSummary.pathSelectionMode, 'explicit');
    assert.equal(secondSummary.pathSelectionMode, 'explicit');
  } finally {
    fs.rmSync(firstArtifactDir, { recursive: true, force: true });
    fs.rmSync(secondArtifactDir, { recursive: true, force: true });
  }
 });