mirror of
https://github.com/ksyasuda/SubMiner.git
synced 2026-03-20 03:16:46 -07:00
feat(stats): add v1 immersion stats dashboard (#19)
This commit is contained in:
68
scripts/docs-knowledge-base.test.ts
Normal file
68
scripts/docs-knowledge-base.test.ts
Normal file
@@ -0,0 +1,68 @@
|
||||
import assert from 'node:assert/strict';
|
||||
import { existsSync, readFileSync } from 'node:fs';
|
||||
import { join } from 'node:path';
|
||||
import test from 'node:test';
|
||||
|
||||
const repoRoot = process.cwd();
|
||||
|
||||
function read(relativePath: string): string {
|
||||
return readFileSync(join(repoRoot, relativePath), 'utf8');
|
||||
}
|
||||
|
||||
const requiredDocs = [
|
||||
'docs/README.md',
|
||||
'docs/architecture/README.md',
|
||||
'docs/architecture/domains.md',
|
||||
'docs/architecture/layering.md',
|
||||
'docs/knowledge-base/README.md',
|
||||
'docs/knowledge-base/core-beliefs.md',
|
||||
'docs/knowledge-base/catalog.md',
|
||||
'docs/knowledge-base/quality.md',
|
||||
'docs/workflow/README.md',
|
||||
'docs/workflow/planning.md',
|
||||
'docs/workflow/verification.md',
|
||||
] as const;
|
||||
|
||||
const metadataFields = ['Status:', 'Last verified:', 'Owner:', 'Read when:'] as const;
|
||||
|
||||
test('required internal knowledge-base docs exist', () => {
|
||||
for (const relativePath of requiredDocs) {
|
||||
assert.equal(existsSync(join(repoRoot, relativePath)), true, `${relativePath} should exist`);
|
||||
}
|
||||
});
|
||||
|
||||
test('core internal docs include metadata fields', () => {
|
||||
for (const relativePath of requiredDocs) {
|
||||
const contents = read(relativePath);
|
||||
for (const field of metadataFields) {
|
||||
assert.match(contents, new RegExp(field.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')));
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
test('AGENTS.md is a compact map to internal docs', () => {
|
||||
const agentsContents = read('AGENTS.md');
|
||||
const lineCount = agentsContents.trimEnd().split('\n').length;
|
||||
|
||||
assert.ok(lineCount <= 110, `AGENTS.md should stay compact; got ${lineCount} lines`);
|
||||
assert.match(agentsContents, /\.\/docs\/README\.md/);
|
||||
assert.match(agentsContents, /\.\/docs\/architecture\/README\.md/);
|
||||
assert.match(agentsContents, /\.\/docs\/workflow\/README\.md/);
|
||||
assert.match(agentsContents, /\.\/docs\/workflow\/verification\.md/);
|
||||
assert.match(agentsContents, /\.\/docs\/knowledge-base\/README\.md/);
|
||||
assert.match(agentsContents, /\.\/docs\/RELEASING\.md/);
|
||||
assert.match(agentsContents, /`docs-site\/` is user-facing/);
|
||||
assert.doesNotMatch(agentsContents, /\.\/docs-site\/development\.md/);
|
||||
assert.doesNotMatch(agentsContents, /\.\/docs-site\/architecture\.md/);
|
||||
});
|
||||
|
||||
test('docs-site contributor docs point internal readers to docs/', () => {
|
||||
const developmentContents = read('docs-site/development.md');
|
||||
const architectureContents = read('docs-site/architecture.md');
|
||||
const docsReadmeContents = read('docs-site/README.md');
|
||||
|
||||
assert.match(developmentContents, /docs\/README\.md/);
|
||||
assert.match(developmentContents, /docs\/architecture\/README\.md/);
|
||||
assert.match(architectureContents, /docs\/architecture\/README\.md/);
|
||||
assert.match(docsReadmeContents, /docs\/README\.md/);
|
||||
});
|
||||
45
scripts/get_frequency.test.ts
Normal file
45
scripts/get_frequency.test.ts
Normal file
@@ -0,0 +1,45 @@
|
||||
import assert from 'node:assert/strict';
|
||||
import fs from 'node:fs';
|
||||
import path from 'node:path';
|
||||
import test from 'node:test';
|
||||
import { spawnSync } from 'node:child_process';
|
||||
|
||||
function createWorkspace(name: string): string {
|
||||
const baseDir = path.join(process.cwd(), '.tmp', 'get-frequency-typecheck-test');
|
||||
fs.mkdirSync(baseDir, { recursive: true });
|
||||
return fs.mkdtempSync(path.join(baseDir, `${name}-`));
|
||||
}
|
||||
|
||||
test('scripts/get_frequency.ts typechecks in isolation', () => {
|
||||
const workspace = createWorkspace('isolated-script');
|
||||
const tsconfigPath = path.join(workspace, 'tsconfig.json');
|
||||
|
||||
fs.writeFileSync(
|
||||
tsconfigPath,
|
||||
JSON.stringify(
|
||||
{
|
||||
extends: '../../../tsconfig.typecheck.json',
|
||||
include: ['../../../scripts/get_frequency.ts'],
|
||||
exclude: [],
|
||||
},
|
||||
null,
|
||||
2,
|
||||
),
|
||||
'utf8',
|
||||
);
|
||||
|
||||
try {
|
||||
const result = spawnSync('bunx', ['tsc', '--noEmit', '-p', tsconfigPath], {
|
||||
cwd: process.cwd(),
|
||||
encoding: 'utf8',
|
||||
});
|
||||
|
||||
assert.equal(
|
||||
result.status,
|
||||
0,
|
||||
`expected scripts/get_frequency.ts to typecheck\nstdout:\n${result.stdout}\nstderr:\n${result.stderr}`,
|
||||
);
|
||||
} finally {
|
||||
fs.rmSync(workspace, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
@@ -482,6 +482,7 @@ function simplifyTokenWithVerbose(
|
||||
|
||||
interface YomitanRuntimeState {
|
||||
yomitanExt: unknown | null;
|
||||
yomitanSession: unknown | null;
|
||||
parserWindow: unknown | null;
|
||||
parserReadyPromise: Promise<void> | null;
|
||||
parserInitPromise: Promise<boolean> | null;
|
||||
@@ -525,24 +526,38 @@ function destroyUnknownParserWindow(window: unknown): void {
|
||||
}
|
||||
}
|
||||
|
||||
async function loadElectronModule(): Promise<typeof import('electron') | null> {
|
||||
try {
|
||||
const electronImport = await import('electron');
|
||||
return (electronImport.default ?? electronImport) as typeof import('electron');
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
async function createYomitanRuntimeState(
|
||||
userDataPath: string,
|
||||
extensionPath?: string,
|
||||
): Promise<YomitanRuntimeState> {
|
||||
const state: YomitanRuntimeState = {
|
||||
yomitanExt: null,
|
||||
yomitanSession: null,
|
||||
parserWindow: null,
|
||||
parserReadyPromise: null,
|
||||
parserInitPromise: null,
|
||||
available: false,
|
||||
};
|
||||
|
||||
const electronImport = await import('electron').catch((error) => {
|
||||
state.note = error instanceof Error ? error.message : 'unknown error';
|
||||
return null;
|
||||
});
|
||||
if (!electronImport || !electronImport.app || !electronImport.app.whenReady) {
|
||||
state.note = 'electron runtime not available in this process';
|
||||
const electronImport = await loadElectronModule();
|
||||
if (
|
||||
!electronImport ||
|
||||
!electronImport.app ||
|
||||
typeof electronImport.app.whenReady !== 'function' ||
|
||||
!electronImport.session
|
||||
) {
|
||||
state.note = electronImport
|
||||
? 'electron runtime not available in this process'
|
||||
: 'electron import failed';
|
||||
return state;
|
||||
}
|
||||
|
||||
@@ -557,6 +572,7 @@ async function createYomitanRuntimeState(
|
||||
setYomitanParserReadyPromise: (promise: Promise<void> | null) => void;
|
||||
setYomitanParserInitPromise: (promise: Promise<boolean> | null) => void;
|
||||
setYomitanExtension: (extension: unknown) => void;
|
||||
setYomitanSession: (session: unknown) => void;
|
||||
}) => Promise<unknown>;
|
||||
|
||||
const extension = await loadYomitanExtension({
|
||||
@@ -575,6 +591,9 @@ async function createYomitanRuntimeState(
|
||||
setYomitanExtension: (extension) => {
|
||||
state.yomitanExt = extension;
|
||||
},
|
||||
setYomitanSession: (nextSession) => {
|
||||
state.yomitanSession = nextSession;
|
||||
},
|
||||
});
|
||||
|
||||
if (!extension) {
|
||||
@@ -768,8 +787,12 @@ async function main(): Promise<void> {
|
||||
);
|
||||
}
|
||||
|
||||
electronModule = await import('electron').catch(() => null);
|
||||
if (electronModule && args.yomitanUserDataPath) {
|
||||
electronModule = await loadElectronModule();
|
||||
if (
|
||||
electronModule?.app &&
|
||||
typeof electronModule.app.setPath === 'function' &&
|
||||
args.yomitanUserDataPath
|
||||
) {
|
||||
electronModule.app.setPath('userData', args.yomitanUserDataPath);
|
||||
}
|
||||
yomitanState = !args.forceMecabOnly
|
||||
@@ -783,6 +806,7 @@ async function main(): Promise<void> {
|
||||
|
||||
const deps = createTokenizerDepsRuntime({
|
||||
getYomitanExt: () => (useYomitan ? yomitanState!.yomitanExt : null) as never,
|
||||
getYomitanSession: () => (useYomitan ? yomitanState!.yomitanSession : null) as never,
|
||||
getYomitanParserWindow: () => (useYomitan ? yomitanState!.parserWindow : null) as never,
|
||||
setYomitanParserWindow: (window) => {
|
||||
if (!useYomitan) {
|
||||
|
||||
@@ -344,6 +344,27 @@ local function count_start_calls(async_calls)
|
||||
return count
|
||||
end
|
||||
|
||||
local function find_texthooker_call(async_calls)
|
||||
for _, call in ipairs(async_calls) do
|
||||
local args = call.args or {}
|
||||
for i = 1, #args do
|
||||
if args[i] == "--texthooker" then
|
||||
return call
|
||||
end
|
||||
end
|
||||
end
|
||||
return nil
|
||||
end
|
||||
|
||||
local function find_call_index(async_calls, target_call)
|
||||
for index, call in ipairs(async_calls) do
|
||||
if call == target_call then
|
||||
return index
|
||||
end
|
||||
end
|
||||
return nil
|
||||
end
|
||||
|
||||
local function find_control_call(async_calls, flag)
|
||||
for _, call in ipairs(async_calls) do
|
||||
local args = call.args or {}
|
||||
@@ -643,6 +664,8 @@ do
|
||||
fire_event(recorded, "file-loaded")
|
||||
local start_call = find_start_call(recorded.async_calls)
|
||||
assert_true(start_call ~= nil, "auto-start should issue --start command")
|
||||
local texthooker_call = find_texthooker_call(recorded.async_calls)
|
||||
assert_true(texthooker_call ~= nil, "auto-start should issue texthooker helper command when enabled")
|
||||
assert_true(
|
||||
call_has_arg(start_call, "--show-visible-overlay"),
|
||||
"auto-start with visible overlay enabled should include --show-visible-overlay on --start"
|
||||
@@ -655,6 +678,10 @@ do
|
||||
find_control_call(recorded.async_calls, "--show-visible-overlay") ~= nil,
|
||||
"auto-start with visible overlay enabled should issue a separate --show-visible-overlay command"
|
||||
)
|
||||
assert_true(
|
||||
find_call_index(recorded.async_calls, start_call) < find_call_index(recorded.async_calls, texthooker_call),
|
||||
"auto-start should launch --start before separate --texthooker helper startup"
|
||||
)
|
||||
assert_true(
|
||||
not has_property_set(recorded.property_sets, "pause", true),
|
||||
"auto-start visible overlay should not force pause without explicit pause-until-ready option"
|
||||
|
||||
@@ -379,6 +379,15 @@ function resolveYomitanExtensionPath(explicitPath?: string): string | null {
|
||||
});
|
||||
}
|
||||
|
||||
async function loadElectronModule(): Promise<typeof import('electron') | null> {
|
||||
try {
|
||||
const electronImport = await import('electron');
|
||||
return (electronImport.default ?? electronImport) as typeof import('electron');
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
async function setupYomitanRuntime(options: CliOptions): Promise<YomitanRuntimeState> {
|
||||
const state: YomitanRuntimeState = {
|
||||
available: false,
|
||||
@@ -394,16 +403,13 @@ async function setupYomitanRuntime(options: CliOptions): Promise<YomitanRuntimeS
|
||||
return state;
|
||||
}
|
||||
|
||||
const electronModule = await import('electron').catch((error) => {
|
||||
state.note = error instanceof Error ? error.message : 'electron import failed';
|
||||
return null;
|
||||
});
|
||||
const electronModule = await loadElectronModule();
|
||||
if (!electronModule?.app || !electronModule?.session) {
|
||||
state.note = 'electron runtime not available in this process';
|
||||
return state;
|
||||
}
|
||||
|
||||
if (options.yomitanUserDataPath) {
|
||||
if (options.yomitanUserDataPath && typeof electronModule.app.setPath === 'function') {
|
||||
electronModule.app.setPath('userData', options.yomitanUserDataPath);
|
||||
}
|
||||
await electronModule.app.whenReady();
|
||||
|
||||
138
scripts/update-frequency.ts
Normal file
138
scripts/update-frequency.ts
Normal file
@@ -0,0 +1,138 @@
|
||||
#!/usr/bin/env bun
|
||||
/**
|
||||
* Backfill frequency_rank in imm_words from a Yomitan-format frequency dictionary.
|
||||
*
|
||||
* Usage:
|
||||
* bun update-frequency.ts <path-to-frequency-dictionary-directory>
|
||||
*
|
||||
* The directory should contain term_meta_bank_*.json files (Yomitan format)
|
||||
* and optionally an index.json with metadata.
|
||||
*
|
||||
* Example dictionaries: JPDB, BCCWJ, Innocent Corpus (in Yomitan format).
|
||||
*/
|
||||
|
||||
import { readFileSync, readdirSync, existsSync } from 'node:fs';
|
||||
import { join } from 'node:path';
|
||||
import Database from 'libsql';
|
||||
|
||||
const DB_PATH = join(process.env.HOME ?? '~', '.config/SubMiner/immersion.sqlite');
|
||||
|
||||
function parsePositiveNumber(value: unknown): number | null {
|
||||
if (typeof value !== 'number' || !Number.isFinite(value) || value <= 0) return null;
|
||||
return Math.floor(value);
|
||||
}
|
||||
|
||||
function parseDisplayValue(value: unknown): number | null {
|
||||
if (typeof value === 'string') {
|
||||
const match = value.trim().match(/^\d+/)?.[0];
|
||||
if (!match) return null;
|
||||
const n = Number.parseInt(match, 10);
|
||||
return Number.isFinite(n) && n > 0 ? n : null;
|
||||
}
|
||||
return parsePositiveNumber(value);
|
||||
}
|
||||
|
||||
function extractRank(meta: unknown): number | null {
|
||||
if (!meta || typeof meta !== 'object') return null;
|
||||
const freq = (meta as Record<string, unknown>).frequency;
|
||||
if (!freq || typeof freq !== 'object') return null;
|
||||
const f = freq as Record<string, unknown>;
|
||||
return parseDisplayValue(f.displayValue) ?? parsePositiveNumber(f.value);
|
||||
}
|
||||
|
||||
function loadDictionary(dirPath: string): Map<string, number> {
|
||||
const terms = new Map<string, number>();
|
||||
|
||||
const files = readdirSync(dirPath)
|
||||
.filter((f) => /^term_meta_bank.*\.json$/.test(f))
|
||||
.sort();
|
||||
|
||||
if (files.length === 0) {
|
||||
console.error(`No term_meta_bank_*.json files found in ${dirPath}`);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
for (const file of files) {
|
||||
const raw = JSON.parse(readFileSync(join(dirPath, file), 'utf-8')) as unknown[];
|
||||
for (const entry of raw) {
|
||||
if (!Array.isArray(entry) || entry.length < 3) continue;
|
||||
const [term, , meta] = entry;
|
||||
if (typeof term !== 'string') continue;
|
||||
const rank = extractRank(meta);
|
||||
if (rank === null) continue;
|
||||
const normalized = term.trim().toLowerCase();
|
||||
if (!normalized) continue;
|
||||
const existing = terms.get(normalized);
|
||||
if (existing === undefined || rank < existing) {
|
||||
terms.set(normalized, rank);
|
||||
}
|
||||
}
|
||||
console.log(` Loaded ${file} (${terms.size} terms total)`);
|
||||
}
|
||||
|
||||
return terms;
|
||||
}
|
||||
|
||||
function main() {
|
||||
const dictPath = process.argv[2];
|
||||
if (!dictPath) {
|
||||
console.error('Usage: bun update-frequency.ts <path-to-frequency-dictionary-directory>');
|
||||
console.error('');
|
||||
console.error('The directory should contain Yomitan term_meta_bank_*.json files.');
|
||||
console.error('Examples: JPDB, BCCWJ, Innocent Corpus frequency lists.');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
if (!existsSync(dictPath)) {
|
||||
console.error(`Directory not found: ${dictPath}`);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
if (!existsSync(DB_PATH)) {
|
||||
console.error(`Database not found: ${DB_PATH}`);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
console.log(`Loading frequency dictionary from ${dictPath}...`);
|
||||
const dict = loadDictionary(dictPath);
|
||||
console.log(`Loaded ${dict.size} terms from frequency dictionary.\n`);
|
||||
|
||||
console.log(`Opening database: ${DB_PATH}`);
|
||||
const db = new Database(DB_PATH);
|
||||
db.exec('PRAGMA journal_mode = WAL');
|
||||
db.exec('PRAGMA foreign_keys = ON');
|
||||
|
||||
const words = db.prepare('SELECT id, headword, word FROM imm_words').all() as Array<{
|
||||
id: number;
|
||||
headword: string;
|
||||
word: string;
|
||||
}>;
|
||||
console.log(`Found ${words.length} words in imm_words.\n`);
|
||||
|
||||
const updateStmt = db.prepare(
|
||||
'UPDATE imm_words SET frequency_rank = ? WHERE id = ? AND (frequency_rank IS NULL OR frequency_rank > ?)',
|
||||
);
|
||||
|
||||
let updated = 0;
|
||||
let matched = 0;
|
||||
|
||||
for (const w of words) {
|
||||
const headwordNorm = w.headword.trim().toLowerCase();
|
||||
const wordNorm = w.word.trim().toLowerCase();
|
||||
|
||||
const rank = dict.get(headwordNorm) ?? dict.get(wordNorm) ?? null;
|
||||
if (rank === null) continue;
|
||||
|
||||
matched++;
|
||||
const result = updateStmt.run(rank, w.id, rank);
|
||||
if (result.changes > 0) updated++;
|
||||
}
|
||||
|
||||
console.log(`Matched: ${matched}/${words.length} words found in frequency dictionary`);
|
||||
console.log(`Updated: ${updated} rows with new or better frequency_rank`);
|
||||
|
||||
db.close();
|
||||
console.log('Done.');
|
||||
}
|
||||
|
||||
main();
|
||||
Reference in New Issue
Block a user