feat(stats): add v1 immersion stats dashboard (#19)

This commit is contained in:
2026-03-20 02:43:28 -07:00
committed by GitHub
parent 42abdd1268
commit 6749ff843c
555 changed files with 46356 additions and 2553 deletions

138
scripts/update-frequency.ts Normal file
View File

@@ -0,0 +1,138 @@
#!/usr/bin/env bun
/**
* Backfill frequency_rank in imm_words from a Yomitan-format frequency dictionary.
*
* Usage:
* bun update-frequency.ts <path-to-frequency-dictionary-directory>
*
* The directory should contain term_meta_bank_*.json files (Yomitan format)
* and optionally an index.json with metadata.
*
* Example dictionaries: JPDB, BCCWJ, Innocent Corpus (in Yomitan format).
*/
import { readFileSync, readdirSync, existsSync } from 'node:fs';
import { join } from 'node:path';
import Database from 'libsql';
const DB_PATH = join(process.env.HOME ?? '~', '.config/SubMiner/immersion.sqlite');
function parsePositiveNumber(value: unknown): number | null {
if (typeof value !== 'number' || !Number.isFinite(value) || value <= 0) return null;
return Math.floor(value);
}
function parseDisplayValue(value: unknown): number | null {
if (typeof value === 'string') {
const match = value.trim().match(/^\d+/)?.[0];
if (!match) return null;
const n = Number.parseInt(match, 10);
return Number.isFinite(n) && n > 0 ? n : null;
}
return parsePositiveNumber(value);
}
function extractRank(meta: unknown): number | null {
if (!meta || typeof meta !== 'object') return null;
const freq = (meta as Record<string, unknown>).frequency;
if (!freq || typeof freq !== 'object') return null;
const f = freq as Record<string, unknown>;
return parseDisplayValue(f.displayValue) ?? parsePositiveNumber(f.value);
}
function loadDictionary(dirPath: string): Map<string, number> {
const terms = new Map<string, number>();
const files = readdirSync(dirPath)
.filter((f) => /^term_meta_bank.*\.json$/.test(f))
.sort();
if (files.length === 0) {
console.error(`No term_meta_bank_*.json files found in ${dirPath}`);
process.exit(1);
}
for (const file of files) {
const raw = JSON.parse(readFileSync(join(dirPath, file), 'utf-8')) as unknown[];
for (const entry of raw) {
if (!Array.isArray(entry) || entry.length < 3) continue;
const [term, , meta] = entry;
if (typeof term !== 'string') continue;
const rank = extractRank(meta);
if (rank === null) continue;
const normalized = term.trim().toLowerCase();
if (!normalized) continue;
const existing = terms.get(normalized);
if (existing === undefined || rank < existing) {
terms.set(normalized, rank);
}
}
console.log(` Loaded ${file} (${terms.size} terms total)`);
}
return terms;
}
function main() {
const dictPath = process.argv[2];
if (!dictPath) {
console.error('Usage: bun update-frequency.ts <path-to-frequency-dictionary-directory>');
console.error('');
console.error('The directory should contain Yomitan term_meta_bank_*.json files.');
console.error('Examples: JPDB, BCCWJ, Innocent Corpus frequency lists.');
process.exit(1);
}
if (!existsSync(dictPath)) {
console.error(`Directory not found: ${dictPath}`);
process.exit(1);
}
if (!existsSync(DB_PATH)) {
console.error(`Database not found: ${DB_PATH}`);
process.exit(1);
}
console.log(`Loading frequency dictionary from ${dictPath}...`);
const dict = loadDictionary(dictPath);
console.log(`Loaded ${dict.size} terms from frequency dictionary.\n`);
console.log(`Opening database: ${DB_PATH}`);
const db = new Database(DB_PATH);
db.exec('PRAGMA journal_mode = WAL');
db.exec('PRAGMA foreign_keys = ON');
const words = db.prepare('SELECT id, headword, word FROM imm_words').all() as Array<{
id: number;
headword: string;
word: string;
}>;
console.log(`Found ${words.length} words in imm_words.\n`);
const updateStmt = db.prepare(
'UPDATE imm_words SET frequency_rank = ? WHERE id = ? AND (frequency_rank IS NULL OR frequency_rank > ?)',
);
let updated = 0;
let matched = 0;
for (const w of words) {
const headwordNorm = w.headword.trim().toLowerCase();
const wordNorm = w.word.trim().toLowerCase();
const rank = dict.get(headwordNorm) ?? dict.get(wordNorm) ?? null;
if (rank === null) continue;
matched++;
const result = updateStmt.run(rank, w.id, rank);
if (result.changes > 0) updated++;
}
console.log(`Matched: ${matched}/${words.length} words found in frequency dictionary`);
console.log(`Updated: ${updated} rows with new or better frequency_rank`);
db.close();
console.log('Done.');
}
main();