feat(stats): add v1 immersion stats dashboard (#19)

This commit is contained in:
2026-03-20 02:43:28 -07:00
committed by GitHub
parent 42abdd1268
commit 6749ff843c
555 changed files with 46356 additions and 2553 deletions

View File

@@ -0,0 +1,68 @@
import assert from 'node:assert/strict';
import { existsSync, readFileSync } from 'node:fs';
import { join } from 'node:path';
import test from 'node:test';
const repoRoot = process.cwd();
function read(relativePath: string): string {
return readFileSync(join(repoRoot, relativePath), 'utf8');
}
const requiredDocs = [
'docs/README.md',
'docs/architecture/README.md',
'docs/architecture/domains.md',
'docs/architecture/layering.md',
'docs/knowledge-base/README.md',
'docs/knowledge-base/core-beliefs.md',
'docs/knowledge-base/catalog.md',
'docs/knowledge-base/quality.md',
'docs/workflow/README.md',
'docs/workflow/planning.md',
'docs/workflow/verification.md',
] as const;
const metadataFields = ['Status:', 'Last verified:', 'Owner:', 'Read when:'] as const;
test('required internal knowledge-base docs exist', () => {
for (const relativePath of requiredDocs) {
assert.equal(existsSync(join(repoRoot, relativePath)), true, `${relativePath} should exist`);
}
});
test('core internal docs include metadata fields', () => {
for (const relativePath of requiredDocs) {
const contents = read(relativePath);
for (const field of metadataFields) {
assert.match(contents, new RegExp(field.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')));
}
}
});
test('AGENTS.md is a compact map to internal docs', () => {
const agentsContents = read('AGENTS.md');
const lineCount = agentsContents.trimEnd().split('\n').length;
assert.ok(lineCount <= 110, `AGENTS.md should stay compact; got ${lineCount} lines`);
assert.match(agentsContents, /\.\/docs\/README\.md/);
assert.match(agentsContents, /\.\/docs\/architecture\/README\.md/);
assert.match(agentsContents, /\.\/docs\/workflow\/README\.md/);
assert.match(agentsContents, /\.\/docs\/workflow\/verification\.md/);
assert.match(agentsContents, /\.\/docs\/knowledge-base\/README\.md/);
assert.match(agentsContents, /\.\/docs\/RELEASING\.md/);
assert.match(agentsContents, /`docs-site\/` is user-facing/);
assert.doesNotMatch(agentsContents, /\.\/docs-site\/development\.md/);
assert.doesNotMatch(agentsContents, /\.\/docs-site\/architecture\.md/);
});
test('docs-site contributor docs point internal readers to docs/', () => {
const developmentContents = read('docs-site/development.md');
const architectureContents = read('docs-site/architecture.md');
const docsReadmeContents = read('docs-site/README.md');
assert.match(developmentContents, /docs\/README\.md/);
assert.match(developmentContents, /docs\/architecture\/README\.md/);
assert.match(architectureContents, /docs\/architecture\/README\.md/);
assert.match(docsReadmeContents, /docs\/README\.md/);
});

View File

@@ -0,0 +1,45 @@
import assert from 'node:assert/strict';
import fs from 'node:fs';
import path from 'node:path';
import test from 'node:test';
import { spawnSync } from 'node:child_process';
function createWorkspace(name: string): string {
const baseDir = path.join(process.cwd(), '.tmp', 'get-frequency-typecheck-test');
fs.mkdirSync(baseDir, { recursive: true });
return fs.mkdtempSync(path.join(baseDir, `${name}-`));
}
test('scripts/get_frequency.ts typechecks in isolation', () => {
const workspace = createWorkspace('isolated-script');
const tsconfigPath = path.join(workspace, 'tsconfig.json');
fs.writeFileSync(
tsconfigPath,
JSON.stringify(
{
extends: '../../../tsconfig.typecheck.json',
include: ['../../../scripts/get_frequency.ts'],
exclude: [],
},
null,
2,
),
'utf8',
);
try {
const result = spawnSync('bunx', ['tsc', '--noEmit', '-p', tsconfigPath], {
cwd: process.cwd(),
encoding: 'utf8',
});
assert.equal(
result.status,
0,
`expected scripts/get_frequency.ts to typecheck\nstdout:\n${result.stdout}\nstderr:\n${result.stderr}`,
);
} finally {
fs.rmSync(workspace, { recursive: true, force: true });
}
});

View File

@@ -482,6 +482,7 @@ function simplifyTokenWithVerbose(
interface YomitanRuntimeState {
yomitanExt: unknown | null;
yomitanSession: unknown | null;
parserWindow: unknown | null;
parserReadyPromise: Promise<void> | null;
parserInitPromise: Promise<boolean> | null;
@@ -525,24 +526,38 @@ function destroyUnknownParserWindow(window: unknown): void {
}
}
async function loadElectronModule(): Promise<typeof import('electron') | null> {
try {
const electronImport = await import('electron');
return (electronImport.default ?? electronImport) as typeof import('electron');
} catch {
return null;
}
}
async function createYomitanRuntimeState(
userDataPath: string,
extensionPath?: string,
): Promise<YomitanRuntimeState> {
const state: YomitanRuntimeState = {
yomitanExt: null,
yomitanSession: null,
parserWindow: null,
parserReadyPromise: null,
parserInitPromise: null,
available: false,
};
const electronImport = await import('electron').catch((error) => {
state.note = error instanceof Error ? error.message : 'unknown error';
return null;
});
if (!electronImport || !electronImport.app || !electronImport.app.whenReady) {
state.note = 'electron runtime not available in this process';
const electronImport = await loadElectronModule();
if (
!electronImport ||
!electronImport.app ||
typeof electronImport.app.whenReady !== 'function' ||
!electronImport.session
) {
state.note = electronImport
? 'electron runtime not available in this process'
: 'electron import failed';
return state;
}
@@ -557,6 +572,7 @@ async function createYomitanRuntimeState(
setYomitanParserReadyPromise: (promise: Promise<void> | null) => void;
setYomitanParserInitPromise: (promise: Promise<boolean> | null) => void;
setYomitanExtension: (extension: unknown) => void;
setYomitanSession: (session: unknown) => void;
}) => Promise<unknown>;
const extension = await loadYomitanExtension({
@@ -575,6 +591,9 @@ async function createYomitanRuntimeState(
setYomitanExtension: (extension) => {
state.yomitanExt = extension;
},
setYomitanSession: (nextSession) => {
state.yomitanSession = nextSession;
},
});
if (!extension) {
@@ -768,8 +787,12 @@ async function main(): Promise<void> {
);
}
electronModule = await import('electron').catch(() => null);
if (electronModule && args.yomitanUserDataPath) {
electronModule = await loadElectronModule();
if (
electronModule?.app &&
typeof electronModule.app.setPath === 'function' &&
args.yomitanUserDataPath
) {
electronModule.app.setPath('userData', args.yomitanUserDataPath);
}
yomitanState = !args.forceMecabOnly
@@ -783,6 +806,7 @@ async function main(): Promise<void> {
const deps = createTokenizerDepsRuntime({
getYomitanExt: () => (useYomitan ? yomitanState!.yomitanExt : null) as never,
getYomitanSession: () => (useYomitan ? yomitanState!.yomitanSession : null) as never,
getYomitanParserWindow: () => (useYomitan ? yomitanState!.parserWindow : null) as never,
setYomitanParserWindow: (window) => {
if (!useYomitan) {

View File

@@ -344,6 +344,27 @@ local function count_start_calls(async_calls)
return count
end
local function find_texthooker_call(async_calls)
for _, call in ipairs(async_calls) do
local args = call.args or {}
for i = 1, #args do
if args[i] == "--texthooker" then
return call
end
end
end
return nil
end
local function find_call_index(async_calls, target_call)
for index, call in ipairs(async_calls) do
if call == target_call then
return index
end
end
return nil
end
local function find_control_call(async_calls, flag)
for _, call in ipairs(async_calls) do
local args = call.args or {}
@@ -643,6 +664,8 @@ do
fire_event(recorded, "file-loaded")
local start_call = find_start_call(recorded.async_calls)
assert_true(start_call ~= nil, "auto-start should issue --start command")
local texthooker_call = find_texthooker_call(recorded.async_calls)
assert_true(texthooker_call ~= nil, "auto-start should issue texthooker helper command when enabled")
assert_true(
call_has_arg(start_call, "--show-visible-overlay"),
"auto-start with visible overlay enabled should include --show-visible-overlay on --start"
@@ -655,6 +678,10 @@ do
find_control_call(recorded.async_calls, "--show-visible-overlay") ~= nil,
"auto-start with visible overlay enabled should issue a separate --show-visible-overlay command"
)
assert_true(
find_call_index(recorded.async_calls, start_call) < find_call_index(recorded.async_calls, texthooker_call),
"auto-start should launch --start before separate --texthooker helper startup"
)
assert_true(
not has_property_set(recorded.property_sets, "pause", true),
"auto-start visible overlay should not force pause without explicit pause-until-ready option"

View File

@@ -379,6 +379,15 @@ function resolveYomitanExtensionPath(explicitPath?: string): string | null {
});
}
async function loadElectronModule(): Promise<typeof import('electron') | null> {
try {
const electronImport = await import('electron');
return (electronImport.default ?? electronImport) as typeof import('electron');
} catch {
return null;
}
}
async function setupYomitanRuntime(options: CliOptions): Promise<YomitanRuntimeState> {
const state: YomitanRuntimeState = {
available: false,
@@ -394,16 +403,13 @@ async function setupYomitanRuntime(options: CliOptions): Promise<YomitanRuntimeS
return state;
}
const electronModule = await import('electron').catch((error) => {
state.note = error instanceof Error ? error.message : 'electron import failed';
return null;
});
const electronModule = await loadElectronModule();
if (!electronModule?.app || !electronModule?.session) {
state.note = 'electron runtime not available in this process';
return state;
}
if (options.yomitanUserDataPath) {
if (options.yomitanUserDataPath && typeof electronModule.app.setPath === 'function') {
electronModule.app.setPath('userData', options.yomitanUserDataPath);
}
await electronModule.app.whenReady();

138
scripts/update-frequency.ts Normal file
View File

@@ -0,0 +1,138 @@
#!/usr/bin/env bun
/**
* Backfill frequency_rank in imm_words from a Yomitan-format frequency dictionary.
*
* Usage:
* bun update-frequency.ts <path-to-frequency-dictionary-directory>
*
* The directory should contain term_meta_bank_*.json files (Yomitan format)
* and optionally an index.json with metadata.
*
* Example dictionaries: JPDB, BCCWJ, Innocent Corpus (in Yomitan format).
*/
import { readFileSync, readdirSync, existsSync } from 'node:fs';
import { join } from 'node:path';
import Database from 'libsql';
const DB_PATH = join(process.env.HOME ?? '~', '.config/SubMiner/immersion.sqlite');
function parsePositiveNumber(value: unknown): number | null {
if (typeof value !== 'number' || !Number.isFinite(value) || value <= 0) return null;
return Math.floor(value);
}
function parseDisplayValue(value: unknown): number | null {
if (typeof value === 'string') {
const match = value.trim().match(/^\d+/)?.[0];
if (!match) return null;
const n = Number.parseInt(match, 10);
return Number.isFinite(n) && n > 0 ? n : null;
}
return parsePositiveNumber(value);
}
function extractRank(meta: unknown): number | null {
if (!meta || typeof meta !== 'object') return null;
const freq = (meta as Record<string, unknown>).frequency;
if (!freq || typeof freq !== 'object') return null;
const f = freq as Record<string, unknown>;
return parseDisplayValue(f.displayValue) ?? parsePositiveNumber(f.value);
}
function loadDictionary(dirPath: string): Map<string, number> {
const terms = new Map<string, number>();
const files = readdirSync(dirPath)
.filter((f) => /^term_meta_bank.*\.json$/.test(f))
.sort();
if (files.length === 0) {
console.error(`No term_meta_bank_*.json files found in ${dirPath}`);
process.exit(1);
}
for (const file of files) {
const raw = JSON.parse(readFileSync(join(dirPath, file), 'utf-8')) as unknown[];
for (const entry of raw) {
if (!Array.isArray(entry) || entry.length < 3) continue;
const [term, , meta] = entry;
if (typeof term !== 'string') continue;
const rank = extractRank(meta);
if (rank === null) continue;
const normalized = term.trim().toLowerCase();
if (!normalized) continue;
const existing = terms.get(normalized);
if (existing === undefined || rank < existing) {
terms.set(normalized, rank);
}
}
console.log(` Loaded ${file} (${terms.size} terms total)`);
}
return terms;
}
function main() {
const dictPath = process.argv[2];
if (!dictPath) {
console.error('Usage: bun update-frequency.ts <path-to-frequency-dictionary-directory>');
console.error('');
console.error('The directory should contain Yomitan term_meta_bank_*.json files.');
console.error('Examples: JPDB, BCCWJ, Innocent Corpus frequency lists.');
process.exit(1);
}
if (!existsSync(dictPath)) {
console.error(`Directory not found: ${dictPath}`);
process.exit(1);
}
if (!existsSync(DB_PATH)) {
console.error(`Database not found: ${DB_PATH}`);
process.exit(1);
}
console.log(`Loading frequency dictionary from ${dictPath}...`);
const dict = loadDictionary(dictPath);
console.log(`Loaded ${dict.size} terms from frequency dictionary.\n`);
console.log(`Opening database: ${DB_PATH}`);
const db = new Database(DB_PATH);
db.exec('PRAGMA journal_mode = WAL');
db.exec('PRAGMA foreign_keys = ON');
const words = db.prepare('SELECT id, headword, word FROM imm_words').all() as Array<{
id: number;
headword: string;
word: string;
}>;
console.log(`Found ${words.length} words in imm_words.\n`);
const updateStmt = db.prepare(
'UPDATE imm_words SET frequency_rank = ? WHERE id = ? AND (frequency_rank IS NULL OR frequency_rank > ?)',
);
let updated = 0;
let matched = 0;
for (const w of words) {
const headwordNorm = w.headword.trim().toLowerCase();
const wordNorm = w.word.trim().toLowerCase();
const rank = dict.get(headwordNorm) ?? dict.get(wordNorm) ?? null;
if (rank === null) continue;
matched++;
const result = updateStmt.run(rank, w.id, rank);
if (result.changes > 0) updated++;
}
console.log(`Matched: ${matched}/${words.length} words found in frequency dictionary`);
console.log(`Updated: ${updated} rows with new or better frequency_rank`);
db.close();
console.log('Done.');
}
main();