feat: add AniList character dictionary sync

This commit is contained in:
2026-03-05 22:43:19 -08:00
parent 2f07c3407a
commit 33ded3c1bf
117 changed files with 3579 additions and 6443 deletions

View File

@@ -1,165 +0,0 @@
#!/usr/bin/env bash
set -euo pipefail
REPO="${REPO:-$HOME/projects/japanese/SubMiner}"
LOCK_FILE="${LOCK_FILE:-/tmp/subminer-doc-sweep.lock}"
STATE_FILE="${STATE_FILE:-/tmp/subminer-doc-sweep.state}"
LOG_FILE="${LOG_FILE:-$REPO/.codex-doc-sweep.log}"
TIMEOUT_SECONDS="${TIMEOUT_SECONDS:-240}"
SUBAGENT_ROOT="${SUBAGENT_ROOT:-$REPO/docs/subagents}"
SUBAGENT_INDEX_FILE="${SUBAGENT_INDEX_FILE:-$SUBAGENT_ROOT/INDEX.md}"
SUBAGENT_COLLAB_FILE="${SUBAGENT_COLLAB_FILE:-$SUBAGENT_ROOT/collaboration.md}"
SUBAGENT_AGENTS_DIR="${SUBAGENT_AGENTS_DIR:-$SUBAGENT_ROOT/agents}"
LEGACY_SUBAGENT_FILE="${LEGACY_SUBAGENT_FILE:-$REPO/docs/subagent.md}"
AGENT_ID="${AGENT_ID:-docs-sweep}"
AGENT_ALIAS="${AGENT_ALIAS:-Docs Sweep}"
AGENT_MISSION="${AGENT_MISSION:-Docs drift cleanup and coordination updates}"
# Non-interactive agent command used to run the prompt.
# Example:
# AGENT_CMD='codex exec'
# AGENT_CMD='opencode run'
AGENT_CMD="${AGENT_CMD:-codex exec}"
AGENT_ID_SAFE="$(printf '%s' "$AGENT_ID" | tr -c 'A-Za-z0-9._-' '_')"
AGENT_FILE="${SUBAGENT_AGENTS_DIR}/${AGENT_ID_SAFE}.md"
mkdir -p "$(dirname "$LOCK_FILE")"
mkdir -p "$(dirname "$STATE_FILE")"
mkdir -p "$SUBAGENT_ROOT" "$SUBAGENT_AGENTS_DIR" "$SUBAGENT_ROOT/archive"
exec 9> "$LOCK_FILE"
if ! flock -n 9; then
exit 0
fi
cd "$REPO"
current_state="$({
git status --porcelain=v1
git ls-files --others --exclude-standard
} | sha256sum | cut -d' ' -f1)"
previous_state="$(cat "$STATE_FILE" 2> /dev/null || true)"
if [[ "$current_state" == "$previous_state" ]]; then
exit 0
fi
printf '%s' "$current_state" > "$STATE_FILE"
run_started_at="$(date -Is)"
run_started_utc="$(date -u +%Y-%m-%dT%H:%M:%SZ)"
echo "[RUN] [$run_started_at] docs sweep running (agent_id=$AGENT_ID alias=$AGENT_ALIAS)"
echo "[$run_started_at] state changed; starting docs sweep (agent_id=$AGENT_ID alias=$AGENT_ALIAS)" >> "$LOG_FILE"
if [[ ! -f "$SUBAGENT_INDEX_FILE" ]]; then
cat > "$SUBAGENT_INDEX_FILE" << 'EOF'
# Subagents Index
Read first. Keep concise.
| agent_id | alias | mission | status | file | last_update_utc |
| --- | --- | --- | --- | --- | --- |
EOF
fi
if [[ ! -f "$SUBAGENT_COLLAB_FILE" ]]; then
cat > "$SUBAGENT_COLLAB_FILE" << 'EOF'
# Subagents Collaboration
Shared notes. Append-only.
- [YYYY-MM-DDTHH:MM:SSZ] [agent_id|alias] note, question, dependency, conflict, decision.
EOF
fi
if [[ ! -f "$AGENT_FILE" ]]; then
cat > "$AGENT_FILE" << EOF
# Agent: $AGENT_ID
- alias: $AGENT_ALIAS
- mission: $AGENT_MISSION
- status: planning
- branch: unknown
- started_at: $run_started_utc
- heartbeat_minutes: 20
## Current Work (newest first)
- [$run_started_utc] intent: initialize section
## Files Touched
- none yet
## Assumptions
- none yet
## Open Questions / Blockers
- none
## Next Step
- continue run
EOF
fi
if [[ -f "$LEGACY_SUBAGENT_FILE" ]]; then
echo "[WARN] [$run_started_at] legacy file exists; prefer sharded layout: $LEGACY_SUBAGENT_FILE" | tee -a "$LOG_FILE"
fi
read -r -d '' PROMPT << EOF || true
Watch for in-flight refactors. If repo changes introduced drift, update only:
- README.md
- AGENTS.md
- docs/**/*.md
- config.example.jsonc
- docs/public/config.example.jsonc <-- generated automatically with make generate-example-config / bun run generate:config-example
- package.json scripts/config references (only if needed)
Coordination protocol:
- Read in order before edits:
1) \`$SUBAGENT_INDEX_FILE\`
2) \`$SUBAGENT_COLLAB_FILE\`
3) \`$AGENT_FILE\`
- Edit scope:
- MAY edit own file: \`$AGENT_FILE\`
- MAY append to collaboration: \`$SUBAGENT_COLLAB_FILE\`
- MAY update own row in index: \`$SUBAGENT_INDEX_FILE\`
- MUST NOT edit other agent files in \`$SUBAGENT_AGENTS_DIR\`
- Ensure own file has updated: alias, mission, status, branch, started_at, heartbeat_minutes.
- Add UTC ISO entries in "Current Work (newest first)" for intent/progress/handoff for this run.
- Keep own file sections current: Files Touched, assumptions, blockers, next step.
- Ensure index row for \`$AGENT_ID\` reflects alias/mission/status/file/last_update_utc.
- If file conflict/dependency seen, append note in collaboration.
Run metadata:
- run_started_at_utc: $run_started_utc
- repo: $REPO
- agent_id: $AGENT_ID
- agent_alias: $AGENT_ALIAS
- agent_file: $AGENT_FILE
Rules:
- Keep edits minimal and accurate to current code.
- Do not commit.
- Do not push.
- If ambiguous, do not guess; skip and report uncertainty.
- Print concise summary with:
1) files changed + why
2) coordination updates made (\`$SUBAGENT_INDEX_FILE\`, \`$SUBAGENT_COLLAB_FILE\`, \`$AGENT_FILE\`)
3) open questions/blockers
EOF
quoted_prompt="$(printf '%q' "$PROMPT")"
job_status=0
if timeout "${TIMEOUT_SECONDS}s" bash -lc "$AGENT_CMD $quoted_prompt" >> "$LOG_FILE" 2>&1; then
run_finished_at="$(date -Is)"
echo "[OK] [$run_finished_at] docs sweep complete (agent_id=$AGENT_ID)"
echo "[$run_finished_at] docs sweep complete (agent_id=$AGENT_ID)" >> "$LOG_FILE"
else
run_failed_at="$(date -Is)"
exit_code=$?
job_status=$exit_code
echo "[FAIL] [$run_failed_at] docs sweep failed (exit $exit_code, agent_id=$AGENT_ID)"
echo "[$run_failed_at] docs sweep failed (exit $exit_code, agent_id=$AGENT_ID)" >> "$LOG_FILE"
fi
exit "$job_status"

View File

@@ -1,192 +0,0 @@
#!/usr/bin/env bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
RUN_ONCE_SCRIPT="$SCRIPT_DIR/docs-sweep-once.sh"
INTERVAL_SECONDS="${INTERVAL_SECONDS:-300}"
REPO="${REPO:-$HOME/projects/japanese/SubMiner}"
LOG_FILE="${LOG_FILE:-$REPO/.codex-doc-sweep.log}"
SUBAGENT_ROOT="${SUBAGENT_ROOT:-$REPO/docs/subagents}"
SUBAGENT_INDEX_FILE="${SUBAGENT_INDEX_FILE:-$SUBAGENT_ROOT/INDEX.md}"
SUBAGENT_COLLAB_FILE="${SUBAGENT_COLLAB_FILE:-$SUBAGENT_ROOT/collaboration.md}"
SUBAGENT_AGENTS_DIR="${SUBAGENT_AGENTS_DIR:-$SUBAGENT_ROOT/agents}"
AGENT_ID="${AGENT_ID:-docs-sweep}"
AGENT_ID_SAFE="$(printf '%s' "$AGENT_ID" | tr -c 'A-Za-z0-9._-' '_')"
AGENT_FILE="${AGENT_FILE:-$SUBAGENT_AGENTS_DIR/${AGENT_ID_SAFE}.md}"
REPORT_WITH_CODEX=false
REPORT_TIMEOUT_SECONDS="${REPORT_TIMEOUT_SECONDS:-120}"
REPORT_AGENT_CMD="${REPORT_AGENT_CMD:-codex exec}"
if [[ ! -x "$RUN_ONCE_SCRIPT" ]]; then
echo "Missing executable: $RUN_ONCE_SCRIPT"
echo "Run: chmod +x scripts/docs-sweep-once.sh"
exit 1
fi
usage() {
cat << 'EOF'
Usage: scripts/docs-sweep-watch.sh [options]
Options:
-r, --report One-off: summarize current log with Codex and exit.
-h, --help Show this help message.
Environment:
AGENT_ID Stable agent id (default: docs-sweep)
AGENT_ALIAS Human label shown in logs/coordination (default: Docs Sweep)
AGENT_MISSION One-line focus for this run
SUBAGENT_ROOT Coordination root (default: docs/subagents)
EOF
}
trim_log_runs() {
# Keep only the last 50 docs-sweep runs in the shared log file.
if [[ ! -f "$LOG_FILE" ]]; then
return
fi
local keep_runs=50
local start_line
start_line="$(
awk -v max="$keep_runs" '
/state changed; starting docs sweep/ { lines[++count] = NR }
END {
if (count > max) print lines[count - max + 1]
else print 0
}
' "$LOG_FILE"
)"
if [[ "$start_line" =~ ^[0-9]+$ ]] && (( start_line > 0 )); then
local tmp_file
tmp_file="$(mktemp "${LOG_FILE}.XXXXXX")"
tail -n +"$start_line" "$LOG_FILE" > "$tmp_file"
mv "$tmp_file" "$LOG_FILE"
fi
}
run_report() {
local has_log=false
local has_index=false
local has_collab=false
local has_agent_file=false
if [[ -s "$LOG_FILE" ]]; then
has_log=true
fi
if [[ -s "$SUBAGENT_INDEX_FILE" ]]; then
has_index=true
fi
if [[ -s "$SUBAGENT_COLLAB_FILE" ]]; then
has_collab=true
fi
if [[ -s "$AGENT_FILE" ]]; then
has_agent_file=true
fi
if [[ "$has_log" != "true" && "$has_index" != "true" && "$has_collab" != "true" && "$has_agent_file" != "true" ]]; then
echo "[REPORT] no inputs; missing/empty files:"
echo "[REPORT] - $LOG_FILE"
echo "[REPORT] - $SUBAGENT_INDEX_FILE"
echo "[REPORT] - $SUBAGENT_COLLAB_FILE"
echo "[REPORT] - $AGENT_FILE"
return
fi
local report_prompt
read -r -d '' report_prompt << EOF || true
Summarize docs sweep state. Output:
- Changes made (short bullets; file-focused when possible)
- Agent coordination updates from sharded docs/subagents files
- Open questions / uncertainty
- Left undone / follow-up items
Constraints:
- Be concise.
- If uncertain, say uncertain.
Read these files directly if present:
$LOG_FILE
$SUBAGENT_INDEX_FILE
$SUBAGENT_COLLAB_FILE
$AGENT_FILE
EOF
echo "[REPORT] codex summary start"
local report_file
local report_stderr
report_file="$(mktemp /tmp/docs-sweep-report.XXXXXX)"
report_stderr="$(mktemp /tmp/docs-sweep-report-stderr.XXXXXX)"
(
cd "$REPO"
timeout "${REPORT_TIMEOUT_SECONDS}s" bash -lc "$REPORT_AGENT_CMD -o $(printf '%q' "$report_file") $(printf '%q' "$report_prompt")" > /dev/null 2> "$report_stderr"
)
local report_exit=$?
if (( report_exit != 0 )); then
echo "[REPORT] codex summary failed (exit $report_exit)"
cat "$report_stderr"
echo
echo "[REPORT] codex summary end"
return
fi
if [[ -s "$report_file" ]]; then
cat "$report_file"
else
echo "[REPORT] codex produced no final message"
fi
echo
echo "[REPORT] codex summary end"
}
while (( $# > 0 )); do
case "$1" in
-r|--report)
REPORT_WITH_CODEX=true
shift
;;
-h|--help)
usage
exit 0
;;
*)
echo "Unknown option: $1"
usage
exit 1
;;
esac
done
if [[ "$REPORT_WITH_CODEX" == "true" ]]; then
trim_log_runs
run_report
exit 0
fi
stop_requested=false
trap 'stop_requested=true' INT TERM
echo "Starting docs sweep watcher (interval: ${INTERVAL_SECONDS}s, subagent_root: ${SUBAGENT_ROOT}). Press Ctrl+C to stop."
while true; do
run_started_at="$(date -Is)"
echo "[RUN] [$run_started_at] docs sweep cycle running"
if "$RUN_ONCE_SCRIPT"; then
run_finished_at="$(date -Is)"
echo "[OK] [$run_finished_at] docs sweep cycle complete"
else
run_failed_at="$(date -Is)"
exit_code=$?
echo "[FAIL] [$run_failed_at] docs sweep cycle failed (exit $exit_code)"
fi
trim_log_runs
if [[ "$stop_requested" == "true" ]]; then
break
fi
sleep "$INTERVAL_SECONDS" &
wait $!
if [[ "$stop_requested" == "true" ]]; then
break
fi
done
echo "Docs sweep watcher stopped."