Enhance AniList character dictionary sync and subtitle features (#15)

This commit is contained in:
2026-03-07 18:30:59 -08:00
committed by GitHub
parent 2f07c3407a
commit e18985fb14
696 changed files with 14297 additions and 173564 deletions

144
scripts/build-yomitan.mjs Normal file
View File

@@ -0,0 +1,144 @@
import fs from 'node:fs';
import os from 'node:os';
import path from 'node:path';
import { createHash } from 'node:crypto';
import { execFileSync } from 'node:child_process';
import { fileURLToPath } from 'node:url';
const dirname = path.dirname(fileURLToPath(import.meta.url));
const repoRoot = path.resolve(dirname, '..');
const submoduleDir = path.join(repoRoot, 'vendor', 'subminer-yomitan');
const submodulePackagePath = path.join(submoduleDir, 'package.json');
const submodulePackageLockPath = path.join(submoduleDir, 'package-lock.json');
const buildOutputDir = path.join(repoRoot, 'build', 'yomitan');
const stampPath = path.join(buildOutputDir, '.subminer-build.json');
const zipPath = path.join(submoduleDir, 'builds', 'yomitan-chrome.zip');
const npmCommand = process.platform === 'win32' ? 'npm.cmd' : 'npm';
const dependencyStampPath = path.join(submoduleDir, 'node_modules', '.subminer-package-lock-hash');
function run(command, args, cwd) {
execFileSync(command, args, { cwd, stdio: 'inherit' });
}
function readCommand(command, args, cwd) {
return execFileSync(command, args, { cwd, encoding: 'utf8' }).trim();
}
function readStamp() {
try {
return JSON.parse(fs.readFileSync(stampPath, 'utf8'));
} catch {
return null;
}
}
function hashFile(filePath) {
const hash = createHash('sha256');
hash.update(fs.readFileSync(filePath));
return hash.digest('hex');
}
function ensureSubmodulePresent() {
if (!fs.existsSync(submodulePackagePath)) {
throw new Error(
'Missing vendor/subminer-yomitan submodule. Run `git submodule update --init --recursive`.',
);
}
}
function getSourceState() {
const revision = readCommand('git', ['rev-parse', 'HEAD'], submoduleDir);
const dirty = readCommand('git', ['status', '--short', '--untracked-files=no'], submoduleDir);
return { revision, dirty };
}
function isBuildCurrent(force) {
if (force) {
return false;
}
if (!fs.existsSync(path.join(buildOutputDir, 'manifest.json'))) {
return false;
}
const stamp = readStamp();
if (!stamp) {
return false;
}
const currentState = getSourceState();
return stamp.revision === currentState.revision && stamp.dirty === currentState.dirty;
}
function ensureDependenciesInstalled() {
const nodeModulesDir = path.join(submoduleDir, 'node_modules');
const currentLockHash = hashFile(submodulePackageLockPath);
let installedLockHash = '';
try {
installedLockHash = fs.readFileSync(dependencyStampPath, 'utf8').trim();
} catch {}
if (!fs.existsSync(nodeModulesDir) || installedLockHash !== currentLockHash) {
run(npmCommand, ['ci'], submoduleDir);
fs.mkdirSync(nodeModulesDir, { recursive: true });
fs.writeFileSync(dependencyStampPath, `${currentLockHash}\n`, 'utf8');
}
}
function installAndBuild() {
ensureDependenciesInstalled();
run(npmCommand, ['run', 'build', '--', '--target', 'chrome'], submoduleDir);
}
function extractBuild() {
if (!fs.existsSync(zipPath)) {
throw new Error(`Expected Yomitan build artifact at ${zipPath}`);
}
const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'subminer-yomitan-'));
try {
run('unzip', ['-qo', zipPath, '-d', tempDir], repoRoot);
fs.rmSync(buildOutputDir, { recursive: true, force: true });
fs.mkdirSync(path.dirname(buildOutputDir), { recursive: true });
fs.cpSync(tempDir, buildOutputDir, { recursive: true });
if (!fs.existsSync(path.join(buildOutputDir, 'manifest.json'))) {
throw new Error(`Extracted Yomitan build missing manifest.json in ${buildOutputDir}`);
}
} finally {
fs.rmSync(tempDir, { recursive: true, force: true });
}
}
function writeStamp() {
const state = getSourceState();
fs.writeFileSync(
stampPath,
`${JSON.stringify(
{
revision: state.revision,
dirty: state.dirty,
builtAt: new Date().toISOString(),
},
null,
2,
)}\n`,
'utf8',
);
}
function main() {
const force = process.argv.includes('--force');
ensureSubmodulePresent();
if (isBuildCurrent(force)) {
process.stdout.write(`Yomitan build current: ${buildOutputDir}\n`);
return;
}
process.stdout.write('Building Yomitan Chrome artifact...\n');
installAndBuild();
extractBuild();
writeStamp();
process.stdout.write(`Yomitan extracted to ${buildOutputDir}\n`);
}
main();

View File

@@ -1,165 +0,0 @@
#!/usr/bin/env bash
set -euo pipefail
REPO="${REPO:-$HOME/projects/japanese/SubMiner}"
LOCK_FILE="${LOCK_FILE:-/tmp/subminer-doc-sweep.lock}"
STATE_FILE="${STATE_FILE:-/tmp/subminer-doc-sweep.state}"
LOG_FILE="${LOG_FILE:-$REPO/.codex-doc-sweep.log}"
TIMEOUT_SECONDS="${TIMEOUT_SECONDS:-240}"
SUBAGENT_ROOT="${SUBAGENT_ROOT:-$REPO/docs/subagents}"
SUBAGENT_INDEX_FILE="${SUBAGENT_INDEX_FILE:-$SUBAGENT_ROOT/INDEX.md}"
SUBAGENT_COLLAB_FILE="${SUBAGENT_COLLAB_FILE:-$SUBAGENT_ROOT/collaboration.md}"
SUBAGENT_AGENTS_DIR="${SUBAGENT_AGENTS_DIR:-$SUBAGENT_ROOT/agents}"
LEGACY_SUBAGENT_FILE="${LEGACY_SUBAGENT_FILE:-$REPO/docs/subagent.md}"
AGENT_ID="${AGENT_ID:-docs-sweep}"
AGENT_ALIAS="${AGENT_ALIAS:-Docs Sweep}"
AGENT_MISSION="${AGENT_MISSION:-Docs drift cleanup and coordination updates}"
# Non-interactive agent command used to run the prompt.
# Example:
# AGENT_CMD='codex exec'
# AGENT_CMD='opencode run'
AGENT_CMD="${AGENT_CMD:-codex exec}"
AGENT_ID_SAFE="$(printf '%s' "$AGENT_ID" | tr -c 'A-Za-z0-9._-' '_')"
AGENT_FILE="${SUBAGENT_AGENTS_DIR}/${AGENT_ID_SAFE}.md"
mkdir -p "$(dirname "$LOCK_FILE")"
mkdir -p "$(dirname "$STATE_FILE")"
mkdir -p "$SUBAGENT_ROOT" "$SUBAGENT_AGENTS_DIR" "$SUBAGENT_ROOT/archive"
exec 9> "$LOCK_FILE"
if ! flock -n 9; then
exit 0
fi
cd "$REPO"
current_state="$({
git status --porcelain=v1
git ls-files --others --exclude-standard
} | sha256sum | cut -d' ' -f1)"
previous_state="$(cat "$STATE_FILE" 2> /dev/null || true)"
if [[ "$current_state" == "$previous_state" ]]; then
exit 0
fi
printf '%s' "$current_state" > "$STATE_FILE"
run_started_at="$(date -Is)"
run_started_utc="$(date -u +%Y-%m-%dT%H:%M:%SZ)"
echo "[RUN] [$run_started_at] docs sweep running (agent_id=$AGENT_ID alias=$AGENT_ALIAS)"
echo "[$run_started_at] state changed; starting docs sweep (agent_id=$AGENT_ID alias=$AGENT_ALIAS)" >> "$LOG_FILE"
if [[ ! -f "$SUBAGENT_INDEX_FILE" ]]; then
cat > "$SUBAGENT_INDEX_FILE" << 'EOF'
# Subagents Index
Read first. Keep concise.
| agent_id | alias | mission | status | file | last_update_utc |
| --- | --- | --- | --- | --- | --- |
EOF
fi
if [[ ! -f "$SUBAGENT_COLLAB_FILE" ]]; then
cat > "$SUBAGENT_COLLAB_FILE" << 'EOF'
# Subagents Collaboration
Shared notes. Append-only.
- [YYYY-MM-DDTHH:MM:SSZ] [agent_id|alias] note, question, dependency, conflict, decision.
EOF
fi
if [[ ! -f "$AGENT_FILE" ]]; then
cat > "$AGENT_FILE" << EOF
# Agent: $AGENT_ID
- alias: $AGENT_ALIAS
- mission: $AGENT_MISSION
- status: planning
- branch: unknown
- started_at: $run_started_utc
- heartbeat_minutes: 20
## Current Work (newest first)
- [$run_started_utc] intent: initialize section
## Files Touched
- none yet
## Assumptions
- none yet
## Open Questions / Blockers
- none
## Next Step
- continue run
EOF
fi
if [[ -f "$LEGACY_SUBAGENT_FILE" ]]; then
echo "[WARN] [$run_started_at] legacy file exists; prefer sharded layout: $LEGACY_SUBAGENT_FILE" | tee -a "$LOG_FILE"
fi
read -r -d '' PROMPT << EOF || true
Watch for in-flight refactors. If repo changes introduced drift, update only:
- README.md
- AGENTS.md
- docs/**/*.md
- config.example.jsonc
- docs/public/config.example.jsonc <-- generated automatically with make generate-example-config / bun run generate:config-example
- package.json scripts/config references (only if needed)
Coordination protocol:
- Read in order before edits:
1) \`$SUBAGENT_INDEX_FILE\`
2) \`$SUBAGENT_COLLAB_FILE\`
3) \`$AGENT_FILE\`
- Edit scope:
- MAY edit own file: \`$AGENT_FILE\`
- MAY append to collaboration: \`$SUBAGENT_COLLAB_FILE\`
- MAY update own row in index: \`$SUBAGENT_INDEX_FILE\`
- MUST NOT edit other agent files in \`$SUBAGENT_AGENTS_DIR\`
- Ensure own file has updated: alias, mission, status, branch, started_at, heartbeat_minutes.
- Add UTC ISO entries in "Current Work (newest first)" for intent/progress/handoff for this run.
- Keep own file sections current: Files Touched, assumptions, blockers, next step.
- Ensure index row for \`$AGENT_ID\` reflects alias/mission/status/file/last_update_utc.
- If file conflict/dependency seen, append note in collaboration.
Run metadata:
- run_started_at_utc: $run_started_utc
- repo: $REPO
- agent_id: $AGENT_ID
- agent_alias: $AGENT_ALIAS
- agent_file: $AGENT_FILE
Rules:
- Keep edits minimal and accurate to current code.
- Do not commit.
- Do not push.
- If ambiguous, do not guess; skip and report uncertainty.
- Print concise summary with:
1) files changed + why
2) coordination updates made (\`$SUBAGENT_INDEX_FILE\`, \`$SUBAGENT_COLLAB_FILE\`, \`$AGENT_FILE\`)
3) open questions/blockers
EOF
quoted_prompt="$(printf '%q' "$PROMPT")"
job_status=0
if timeout "${TIMEOUT_SECONDS}s" bash -lc "$AGENT_CMD $quoted_prompt" >> "$LOG_FILE" 2>&1; then
run_finished_at="$(date -Is)"
echo "[OK] [$run_finished_at] docs sweep complete (agent_id=$AGENT_ID)"
echo "[$run_finished_at] docs sweep complete (agent_id=$AGENT_ID)" >> "$LOG_FILE"
else
run_failed_at="$(date -Is)"
exit_code=$?
job_status=$exit_code
echo "[FAIL] [$run_failed_at] docs sweep failed (exit $exit_code, agent_id=$AGENT_ID)"
echo "[$run_failed_at] docs sweep failed (exit $exit_code, agent_id=$AGENT_ID)" >> "$LOG_FILE"
fi
exit "$job_status"

View File

@@ -1,192 +0,0 @@
#!/usr/bin/env bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
RUN_ONCE_SCRIPT="$SCRIPT_DIR/docs-sweep-once.sh"
INTERVAL_SECONDS="${INTERVAL_SECONDS:-300}"
REPO="${REPO:-$HOME/projects/japanese/SubMiner}"
LOG_FILE="${LOG_FILE:-$REPO/.codex-doc-sweep.log}"
SUBAGENT_ROOT="${SUBAGENT_ROOT:-$REPO/docs/subagents}"
SUBAGENT_INDEX_FILE="${SUBAGENT_INDEX_FILE:-$SUBAGENT_ROOT/INDEX.md}"
SUBAGENT_COLLAB_FILE="${SUBAGENT_COLLAB_FILE:-$SUBAGENT_ROOT/collaboration.md}"
SUBAGENT_AGENTS_DIR="${SUBAGENT_AGENTS_DIR:-$SUBAGENT_ROOT/agents}"
AGENT_ID="${AGENT_ID:-docs-sweep}"
AGENT_ID_SAFE="$(printf '%s' "$AGENT_ID" | tr -c 'A-Za-z0-9._-' '_')"
AGENT_FILE="${AGENT_FILE:-$SUBAGENT_AGENTS_DIR/${AGENT_ID_SAFE}.md}"
REPORT_WITH_CODEX=false
REPORT_TIMEOUT_SECONDS="${REPORT_TIMEOUT_SECONDS:-120}"
REPORT_AGENT_CMD="${REPORT_AGENT_CMD:-codex exec}"
if [[ ! -x "$RUN_ONCE_SCRIPT" ]]; then
echo "Missing executable: $RUN_ONCE_SCRIPT"
echo "Run: chmod +x scripts/docs-sweep-once.sh"
exit 1
fi
usage() {
cat << 'EOF'
Usage: scripts/docs-sweep-watch.sh [options]
Options:
-r, --report One-off: summarize current log with Codex and exit.
-h, --help Show this help message.
Environment:
AGENT_ID Stable agent id (default: docs-sweep)
AGENT_ALIAS Human label shown in logs/coordination (default: Docs Sweep)
AGENT_MISSION One-line focus for this run
SUBAGENT_ROOT Coordination root (default: docs/subagents)
EOF
}
trim_log_runs() {
# Keep only the last 50 docs-sweep runs in the shared log file.
if [[ ! -f "$LOG_FILE" ]]; then
return
fi
local keep_runs=50
local start_line
start_line="$(
awk -v max="$keep_runs" '
/state changed; starting docs sweep/ { lines[++count] = NR }
END {
if (count > max) print lines[count - max + 1]
else print 0
}
' "$LOG_FILE"
)"
if [[ "$start_line" =~ ^[0-9]+$ ]] && (( start_line > 0 )); then
local tmp_file
tmp_file="$(mktemp "${LOG_FILE}.XXXXXX")"
tail -n +"$start_line" "$LOG_FILE" > "$tmp_file"
mv "$tmp_file" "$LOG_FILE"
fi
}
run_report() {
local has_log=false
local has_index=false
local has_collab=false
local has_agent_file=false
if [[ -s "$LOG_FILE" ]]; then
has_log=true
fi
if [[ -s "$SUBAGENT_INDEX_FILE" ]]; then
has_index=true
fi
if [[ -s "$SUBAGENT_COLLAB_FILE" ]]; then
has_collab=true
fi
if [[ -s "$AGENT_FILE" ]]; then
has_agent_file=true
fi
if [[ "$has_log" != "true" && "$has_index" != "true" && "$has_collab" != "true" && "$has_agent_file" != "true" ]]; then
echo "[REPORT] no inputs; missing/empty files:"
echo "[REPORT] - $LOG_FILE"
echo "[REPORT] - $SUBAGENT_INDEX_FILE"
echo "[REPORT] - $SUBAGENT_COLLAB_FILE"
echo "[REPORT] - $AGENT_FILE"
return
fi
local report_prompt
read -r -d '' report_prompt << EOF || true
Summarize docs sweep state. Output:
- Changes made (short bullets; file-focused when possible)
- Agent coordination updates from sharded docs/subagents files
- Open questions / uncertainty
- Left undone / follow-up items
Constraints:
- Be concise.
- If uncertain, say uncertain.
Read these files directly if present:
$LOG_FILE
$SUBAGENT_INDEX_FILE
$SUBAGENT_COLLAB_FILE
$AGENT_FILE
EOF
echo "[REPORT] codex summary start"
local report_file
local report_stderr
report_file="$(mktemp /tmp/docs-sweep-report.XXXXXX)"
report_stderr="$(mktemp /tmp/docs-sweep-report-stderr.XXXXXX)"
(
cd "$REPO"
timeout "${REPORT_TIMEOUT_SECONDS}s" bash -lc "$REPORT_AGENT_CMD -o $(printf '%q' "$report_file") $(printf '%q' "$report_prompt")" > /dev/null 2> "$report_stderr"
)
local report_exit=$?
if (( report_exit != 0 )); then
echo "[REPORT] codex summary failed (exit $report_exit)"
cat "$report_stderr"
echo
echo "[REPORT] codex summary end"
return
fi
if [[ -s "$report_file" ]]; then
cat "$report_file"
else
echo "[REPORT] codex produced no final message"
fi
echo
echo "[REPORT] codex summary end"
}
while (( $# > 0 )); do
case "$1" in
-r|--report)
REPORT_WITH_CODEX=true
shift
;;
-h|--help)
usage
exit 0
;;
*)
echo "Unknown option: $1"
usage
exit 1
;;
esac
done
if [[ "$REPORT_WITH_CODEX" == "true" ]]; then
trim_log_runs
run_report
exit 0
fi
stop_requested=false
trap 'stop_requested=true' INT TERM
echo "Starting docs sweep watcher (interval: ${INTERVAL_SECONDS}s, subagent_root: ${SUBAGENT_ROOT}). Press Ctrl+C to stop."
while true; do
run_started_at="$(date -Is)"
echo "[RUN] [$run_started_at] docs sweep cycle running"
if "$RUN_ONCE_SCRIPT"; then
run_finished_at="$(date -Is)"
echo "[OK] [$run_finished_at] docs sweep cycle complete"
else
run_failed_at="$(date -Is)"
exit_code=$?
echo "[FAIL] [$run_failed_at] docs sweep cycle failed (exit $exit_code)"
fi
trim_log_runs
if [[ "$stop_requested" == "true" ]]; then
break
fi
sleep "$INTERVAL_SECONDS" &
wait $!
if [[ "$stop_requested" == "true" ]]; then
break
fi
done
echo "Docs sweep watcher stopped."

View File

@@ -4,6 +4,7 @@ import process from 'node:process';
import { createTokenizerDepsRuntime, tokenizeSubtitle } from '../src/core/services/tokenizer.js';
import { createFrequencyDictionaryLookup } from '../src/core/services/frequency-dictionary.js';
import { resolveYomitanExtensionPath as resolveBuiltYomitanExtensionPath } from '../src/core/services/yomitan-extension-paths.js';
import { MecabTokenizer } from '../src/mecab-tokenizer.js';
import type { MergedToken, FrequencyDictionaryLookup } from '../src/types.js';
@@ -14,7 +15,7 @@ interface CliOptions {
emitDiagnostics: boolean;
mecabCommand?: string;
mecabDictionaryPath?: string;
forceMecabOnly?: boolean;
forceMecabOnly: boolean;
yomitanExtensionPath?: string;
yomitanUserDataPath?: string;
emitColoredLine: boolean;
@@ -48,7 +49,7 @@ function parseCliArgs(argv: string[]): CliOptions {
let colorBand1 = '#ed8796';
let colorBand2 = '#f5a97f';
let colorBand3 = '#f9e2af';
let colorBand4 = '#a6e3a1';
let colorBand4 = '#8bd5ca';
let colorBand5 = '#8aadf4';
let colorKnown = '#a6da95';
let colorNPlusOne = '#c6a0f6';
@@ -94,7 +95,7 @@ function parseCliArgs(argv: string[]): CliOptions {
if (!next) {
throw new Error('Missing value for --yomitan-extension');
}
yomitanExtensionPath = path.resolve(next);
yomitanExtensionPath = next;
continue;
}
@@ -103,7 +104,7 @@ function parseCliArgs(argv: string[]): CliOptions {
if (!next) {
throw new Error('Missing value for --yomitan-user-data');
}
yomitanUserDataPath = path.resolve(next);
yomitanUserDataPath = next;
continue;
}
@@ -225,12 +226,12 @@ function parseCliArgs(argv: string[]): CliOptions {
}
if (arg.startsWith('--yomitan-extension=')) {
yomitanExtensionPath = path.resolve(arg.slice('--yomitan-extension='.length));
yomitanExtensionPath = arg.slice('--yomitan-extension='.length);
continue;
}
if (arg.startsWith('--yomitan-user-data=')) {
yomitanUserDataPath = path.resolve(arg.slice('--yomitan-user-data='.length));
yomitanUserDataPath = arg.slice('--yomitan-user-data='.length);
continue;
}
@@ -524,7 +525,10 @@ function destroyUnknownParserWindow(window: unknown): void {
}
}
async function createYomitanRuntimeState(userDataPath: string): Promise<YomitanRuntimeState> {
async function createYomitanRuntimeState(
userDataPath: string,
extensionPath?: string,
): Promise<YomitanRuntimeState> {
const state: YomitanRuntimeState = {
yomitanExt: null,
parserWindow: null,
@@ -547,6 +551,7 @@ async function createYomitanRuntimeState(userDataPath: string): Promise<YomitanR
const loadYomitanExtension = (await import('../src/core/services/yomitan-extension-loader.js'))
.loadYomitanExtension as (options: {
userDataPath: string;
extensionPath?: string;
getYomitanParserWindow: () => unknown;
setYomitanParserWindow: (window: unknown) => void;
setYomitanParserReadyPromise: (promise: Promise<void> | null) => void;
@@ -556,6 +561,7 @@ async function createYomitanRuntimeState(userDataPath: string): Promise<YomitanR
const extension = await loadYomitanExtension({
userDataPath,
extensionPath,
getYomitanParserWindow: () => state.parserWindow,
setYomitanParserWindow: (window) => {
state.parserWindow = window;
@@ -589,17 +595,16 @@ async function createYomitanRuntimeStateWithSearch(
userDataPath: string,
extensionPath?: string,
): Promise<YomitanRuntimeState> {
const preferredPath = extensionPath ? path.resolve(extensionPath) : undefined;
const defaultVendorPath = path.resolve(process.cwd(), 'vendor', 'yomitan');
const candidates = [...(preferredPath ? [preferredPath] : []), defaultVendorPath];
const resolvedExtensionPath = resolveBuiltYomitanExtensionPath({
explicitPath: extensionPath,
cwd: process.cwd(),
});
const candidates = resolvedExtensionPath ? [resolvedExtensionPath] : [];
for (const candidate of candidates) {
if (!candidate) {
continue;
}
try {
if (fs.existsSync(path.join(candidate, 'manifest.json'))) {
const state = await createYomitanRuntimeState(userDataPath);
const state = await createYomitanRuntimeState(userDataPath, candidate);
if (state.available) {
return state;
}
@@ -613,7 +618,7 @@ async function createYomitanRuntimeStateWithSearch(
}
}
return createYomitanRuntimeState(userDataPath);
return createYomitanRuntimeState(userDataPath, resolvedExtensionPath ?? undefined);
}
async function getFrequencyLookup(dictionaryPath: string): Promise<FrequencyDictionaryLookup> {
@@ -678,7 +683,7 @@ function getBandColor(
}
const normalizedBand = Math.ceil((safeRank / topX) * bandedColors.length);
const band = Math.min(bandedColors.length, Math.max(1, normalizedBand));
return bandedColors[band - 1];
return bandedColors[band - 1] ?? colorSingle;
}
function getTokenColor(token: MergedToken, args: CliOptions): string {
@@ -845,7 +850,26 @@ async function main(): Promise<void> {
? simplifyTokenWithVerbose(token, getFrequencyRank)
: simplifyToken(token),
) ?? null;
const diagnostics = {
const diagnostics: {
yomitan: {
available: boolean;
loaded: boolean;
forceMecabOnly: boolean;
note: string | null;
};
mecab: {
command: string;
dictionaryPath: string | null;
available: boolean;
status?: 'ok' | 'no-tokens';
note?: string;
};
tokenizer: {
sourceHint: 'none' | 'yomitan-merged' | 'mecab-merge';
mergedTokenCount: number;
totalTokenCount: number;
};
} = {
yomitan: {
available: Boolean(yomitanState?.available),
loaded: useYomitan,
@@ -864,11 +888,11 @@ async function main(): Promise<void> {
},
};
if (tokens === null) {
diagnostics.mecab['status'] = 'no-tokens';
diagnostics.mecab['note'] =
diagnostics.mecab.status = 'no-tokens';
diagnostics.mecab.note =
'MeCab returned no parseable tokens. This is often caused by a missing/invalid MeCab dictionary path.';
} else {
diagnostics.mecab['status'] = 'ok';
diagnostics.mecab.status = 'ok';
}
const output = {

View File

@@ -1,261 +1,16 @@
#!/bin/bash
#
# SubMiner - All-in-one sentence mining overlay
# Copyright (C) 2024 sudacode
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#
# patch-yomitan.sh - Apply Electron compatibility patches to Yomitan
#
# This script applies the necessary patches to make Yomitan work in Electron
# after upgrading to a new version. Run this after extracting a fresh Yomitan release.
#
# Usage: ./patch-yomitan.sh [yomitan_dir]
# yomitan_dir: Path to the Yomitan directory (default: vendor/yomitan)
#
set -e
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
YOMITAN_DIR="${1:-$SCRIPT_DIR/../vendor/yomitan}"
cat <<'EOF'
patch-yomitan.sh is retired.
if [ ! -d "$YOMITAN_DIR" ]; then
echo "Error: Yomitan directory not found: $YOMITAN_DIR"
exit 1
fi
SubMiner now uses the forked source submodule at vendor/subminer-yomitan and builds the
Chromium extension artifact into build/yomitan.
echo "Patching Yomitan in: $YOMITAN_DIR"
Use:
git submodule update --init --recursive
bun run build:yomitan
PERMISSIONS_UTIL="$YOMITAN_DIR/js/data/permissions-util.js"
if [ ! -f "$PERMISSIONS_UTIL" ]; then
echo "Error: permissions-util.js not found at $PERMISSIONS_UTIL"
exit 1
fi
echo "Patching permissions-util.js..."
if grep -q "Electron workaround" "$PERMISSIONS_UTIL"; then
echo " - Already patched, skipping"
else
cat > "$PERMISSIONS_UTIL.tmp" << 'PATCH_EOF'
/*
* Copyright (C) 2023-2025 Yomitan Authors
* Copyright (C) 2021-2022 Yomichan Authors
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
import {getFieldMarkers} from './anki-util.js';
/**
* This function returns whether an Anki field marker might require clipboard permissions.
* This is speculative and may not guarantee that the field marker actually does require the permission,
* as the custom handlebars template is not deeply inspected.
* @param {string} marker
* @returns {boolean}
*/
function ankiFieldMarkerMayUseClipboard(marker) {
switch (marker) {
case 'clipboard-image':
case 'clipboard-text':
return true;
default:
return false;
}
}
/**
* @param {chrome.permissions.Permissions} permissions
* @returns {Promise<boolean>}
*/
export function hasPermissions(permissions) {
return new Promise((resolve, reject) => {
chrome.permissions.contains(permissions, (result) => {
const e = chrome.runtime.lastError;
if (e) {
reject(new Error(e.message));
} else {
resolve(result);
}
});
});
}
/**
* @param {chrome.permissions.Permissions} permissions
* @param {boolean} shouldHave
* @returns {Promise<boolean>}
*/
export function setPermissionsGranted(permissions, shouldHave) {
return (
shouldHave ?
new Promise((resolve, reject) => {
chrome.permissions.request(permissions, (result) => {
const e = chrome.runtime.lastError;
if (e) {
reject(new Error(e.message));
} else {
resolve(result);
}
});
}) :
new Promise((resolve, reject) => {
chrome.permissions.remove(permissions, (result) => {
const e = chrome.runtime.lastError;
if (e) {
reject(new Error(e.message));
} else {
resolve(!result);
}
});
})
);
}
/**
* @returns {Promise<chrome.permissions.Permissions>}
*/
export function getAllPermissions() {
// Electron workaround - chrome.permissions.getAll() not available
return Promise.resolve({
origins: ["<all_urls>"],
permissions: ["clipboardWrite", "storage", "unlimitedStorage", "scripting", "contextMenus"]
});
}
/**
* @param {string} fieldValue
* @returns {string[]}
*/
export function getRequiredPermissionsForAnkiFieldValue(fieldValue) {
const markers = getFieldMarkers(fieldValue);
for (const marker of markers) {
if (ankiFieldMarkerMayUseClipboard(marker)) {
return ['clipboardRead'];
}
}
return [];
}
/**
* @param {chrome.permissions.Permissions} permissions
* @param {import('settings').ProfileOptions} options
* @returns {boolean}
*/
export function hasRequiredPermissionsForOptions(permissions, options) {
const permissionsSet = new Set(permissions.permissions);
if (!permissionsSet.has('nativeMessaging') && (options.parsing.enableMecabParser || options.general.enableYomitanApi)) {
return false;
}
if (!permissionsSet.has('clipboardRead')) {
if (options.clipboard.enableBackgroundMonitor || options.clipboard.enableSearchPageMonitor) {
return false;
}
const fieldsList = options.anki.cardFormats.map((cardFormat) => cardFormat.fields);
for (const fields of fieldsList) {
for (const {value: fieldValue} of Object.values(fields)) {
const markers = getFieldMarkers(fieldValue);
for (const marker of markers) {
if (ankiFieldMarkerMayUseClipboard(marker)) {
return false;
}
}
}
}
}
return true;
}
PATCH_EOF
mv "$PERMISSIONS_UTIL.tmp" "$PERMISSIONS_UTIL"
echo " - Patched successfully"
fi
OPTIONS_SCHEMA="$YOMITAN_DIR/data/schemas/options-schema.json"
if [ ! -f "$OPTIONS_SCHEMA" ]; then
echo "Error: options-schema.json not found at $OPTIONS_SCHEMA"
exit 1
fi
echo "Patching options-schema.json..."
if grep -q '"selectText".*"default": true' "$OPTIONS_SCHEMA"; then
sed -i '/"selectText": {/,/"default":/{s/"default": true/"default": false/}' "$OPTIONS_SCHEMA"
echo " - Changed selectText default to false"
elif grep -q '"selectText".*"default": false' "$OPTIONS_SCHEMA"; then
echo " - selectText already set to false, skipping"
else
echo " - Warning: Could not find selectText setting"
fi
if grep -q '"layoutAwareScan".*"default": true' "$OPTIONS_SCHEMA"; then
sed -i '/"layoutAwareScan": {/,/"default":/{s/"default": true/"default": false/}' "$OPTIONS_SCHEMA"
echo " - Changed layoutAwareScan default to false"
elif grep -q '"layoutAwareScan".*"default": false' "$OPTIONS_SCHEMA"; then
echo " - layoutAwareScan already set to false, skipping"
else
echo " - Warning: Could not find layoutAwareScan setting"
fi
POPUP_JS="$YOMITAN_DIR/js/app/popup.js"
if [ ! -f "$POPUP_JS" ]; then
echo "Error: popup.js not found at $POPUP_JS"
exit 1
fi
echo "Patching popup.js..."
if grep -q "yomitan-popup-shown" "$POPUP_JS"; then
echo " - Already patched, skipping"
else
# Add the visibility event dispatch after the existing _onVisibleChange code
# We need to add it after: void this._invokeSafe('displayVisibilityChanged', {value});
sed -i "/void this._invokeSafe('displayVisibilityChanged', {value});/a\\
\\
// Dispatch custom events for popup visibility (Electron integration)\\
if (value) {\\
window.dispatchEvent(new CustomEvent('yomitan-popup-shown'));\\
} else {\\
window.dispatchEvent(new CustomEvent('yomitan-popup-hidden'));\\
}" "$POPUP_JS"
echo " - Added visibility events"
fi
echo ""
echo "Yomitan patching complete!"
echo ""
echo "Changes applied:"
echo " 1. permissions-util.js: Hardcoded permissions (Electron workaround)"
echo " 2. options-schema.json: selectText=false, layoutAwareScan=false"
echo " 3. popup.js: Added yomitan-popup-shown/hidden events"
echo ""
echo "To verify: Run 'bun run dev' and check for 'Yomitan extension loaded successfully'"
If you need to change Electron compatibility behavior, patch the forked source repo and rebuild.
EOF

20
scripts/prettier-scope.sh Normal file
View File

@@ -0,0 +1,20 @@
#!/usr/bin/env bash
set -euo pipefail
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
cd "$ROOT_DIR"
paths=(
"package.json"
"tsconfig.json"
"tsconfig.renderer.json"
"tsconfig.typecheck.json"
".prettierrc.json"
".github"
"build"
"launcher"
"scripts"
"src"
)
exec bunx prettier "$@" "${paths[@]}"

72
scripts/run-test-lane.mjs Normal file
View File

@@ -0,0 +1,72 @@
import { readdirSync } from 'node:fs';
import { relative, resolve } from 'node:path';
import { spawnSync } from 'node:child_process';
const repoRoot = resolve(new URL('..', import.meta.url).pathname);
const lanes = {
'bun-src-full': {
roots: ['src'],
include: ['.test.ts', '.type-test.ts'],
exclude: new Set([
'src/core/services/anki-jimaku-ipc.test.ts',
'src/core/services/ipc.test.ts',
'src/core/services/overlay-manager.test.ts',
'src/main/config-validation.test.ts',
'src/main/runtime/registry.test.ts',
'src/main/runtime/startup-config.test.ts',
]),
},
'bun-launcher-unit': {
roots: ['launcher'],
include: ['.test.ts'],
exclude: new Set(['launcher/smoke.e2e.test.ts']),
},
};
function collectFiles(rootDir, includeSuffixes, excludeSet) {
const out = [];
const visit = (currentDir) => {
for (const entry of readdirSync(currentDir, { withFileTypes: true })) {
const fullPath = resolve(currentDir, entry.name);
if (entry.isDirectory()) {
visit(fullPath);
continue;
}
const relPath = relative(repoRoot, fullPath).replaceAll('\\', '/');
if (excludeSet.has(relPath)) continue;
if (includeSuffixes.some((suffix) => relPath.endsWith(suffix))) {
out.push(relPath);
}
}
};
visit(resolve(repoRoot, rootDir));
out.sort();
return out;
}
const lane = lanes[process.argv[2]];
if (!lane) {
process.stderr.write(`Unknown test lane: ${process.argv[2] ?? '(missing)'}\n`);
process.exit(1);
}
const files = lane.roots.flatMap((rootDir) => collectFiles(rootDir, lane.include, lane.exclude));
if (files.length === 0) {
process.stderr.write(`No test files found for lane: ${process.argv[2]}\n`);
process.exit(1);
}
const result = spawnSync('bun', ['test', ...files.map((file) => `./${file}`)], {
cwd: repoRoot,
stdio: 'inherit',
});
if (result.error) {
throw result.error;
}
process.exit(result.status ?? 1);

View File

@@ -4,6 +4,7 @@ import path from 'node:path';
import process from 'node:process';
import { createTokenizerDepsRuntime, tokenizeSubtitle } from '../src/core/services/tokenizer.js';
import { resolveYomitanExtensionPath as resolveBuiltYomitanExtensionPath } from '../src/core/services/yomitan-extension-paths.js';
import { MecabTokenizer } from '../src/mecab-tokenizer.js';
import type { MergedToken } from '../src/types.js';
@@ -112,12 +113,12 @@ function parseCliArgs(argv: string[]): CliOptions {
if (!next) {
throw new Error('Missing value for --yomitan-extension');
}
yomitanExtensionPath = path.resolve(next);
yomitanExtensionPath = next;
continue;
}
if (arg.startsWith('--yomitan-extension=')) {
yomitanExtensionPath = path.resolve(arg.slice('--yomitan-extension='.length));
yomitanExtensionPath = arg.slice('--yomitan-extension='.length);
continue;
}
@@ -126,12 +127,12 @@ function parseCliArgs(argv: string[]): CliOptions {
if (!next) {
throw new Error('Missing value for --yomitan-user-data');
}
yomitanUserDataPath = path.resolve(next);
yomitanUserDataPath = next;
continue;
}
if (arg.startsWith('--yomitan-user-data=')) {
yomitanUserDataPath = path.resolve(arg.slice('--yomitan-user-data='.length));
yomitanUserDataPath = arg.slice('--yomitan-user-data='.length);
continue;
}
@@ -348,7 +349,11 @@ function findSelectedCandidateIndexes(
const mergedSignatures = mergedTokens.map(mergedTokenSignature);
const selected: number[] = [];
for (let i = 0; i < candidates.length; i += 1) {
const candidateSignatures = candidates[i].tokens.map(candidateTokenSignature);
const candidate = candidates[i];
if (!candidate) {
continue;
}
const candidateSignatures = candidate.tokens.map(candidateTokenSignature);
if (candidateSignatures.length !== mergedSignatures.length) {
continue;
}
@@ -368,21 +373,10 @@ function findSelectedCandidateIndexes(
}
function resolveYomitanExtensionPath(explicitPath?: string): string | null {
const candidates = [
explicitPath ? path.resolve(explicitPath) : null,
path.resolve(process.cwd(), 'vendor', 'yomitan'),
];
for (const candidate of candidates) {
if (!candidate) {
continue;
}
if (fs.existsSync(path.join(candidate, 'manifest.json'))) {
return candidate;
}
}
return null;
return resolveBuiltYomitanExtensionPath({
explicitPath,
cwd: process.cwd(),
});
}
async function setupYomitanRuntime(options: CliOptions): Promise<YomitanRuntimeState> {
@@ -416,7 +410,7 @@ async function setupYomitanRuntime(options: CliOptions): Promise<YomitanRuntimeS
const extensionPath = resolveYomitanExtensionPath(options.yomitanExtensionPath);
if (!extensionPath) {
state.note = 'no Yomitan extension directory found';
state.note = 'no built Yomitan extension directory found; run `bun run build:yomitan`';
return state;
}
@@ -490,6 +484,9 @@ function renderTextOutput(payload: Record<string, unknown>): void {
} else {
for (let i = 0; i < finalTokens.length; i += 1) {
const token = finalTokens[i];
if (!token) {
continue;
}
process.stdout.write(
` [${i}] ${token.surface} -> ${token.headword} (${token.reading}) [${token.startPos}, ${token.endPos})\n`,
);
@@ -505,6 +502,9 @@ function renderTextOutput(payload: Record<string, unknown>): void {
for (let i = 0; i < candidates.length; i += 1) {
const candidate = candidates[i];
if (!candidate) {
continue;
}
process.stdout.write(
` [${i}] source=${String(candidate.source)} index=${String(candidate.index)} selectedByTokenizer=${String(candidate.selectedByTokenizer)} tokenCount=${String(candidate.tokenCount)}\n`,
);
@@ -514,6 +514,9 @@ function renderTextOutput(payload: Record<string, unknown>): void {
}
for (let j = 0; j < tokens.length; j += 1) {
const token = tokens[j];
if (!token) {
continue;
}
process.stdout.write(
` - ${token.surface} -> ${token.headword} (${token.reading}) [${token.startPos}, ${token.endPos})\n`,
);