Add opt-in JLPT tagging flow

This commit is contained in:
2026-02-15 16:28:00 -08:00
parent ca2b7bb2fe
commit f492622a8b
27 changed files with 1116 additions and 38 deletions

View File

@@ -0,0 +1,194 @@
import * as fs from "fs";
import * as path from "path";
import type { JlptLevel } from "../../types";
export interface JlptVocabLookupOptions {
searchPaths: string[];
log: (message: string) => void;
}
const JLPT_BANK_FILES: { level: JlptLevel; filename: string }[] = [
{ level: "N1", filename: "term_meta_bank_1.json" },
{ level: "N2", filename: "term_meta_bank_2.json" },
{ level: "N3", filename: "term_meta_bank_3.json" },
{ level: "N4", filename: "term_meta_bank_4.json" },
{ level: "N5", filename: "term_meta_bank_5.json" },
];
const NOOP_LOOKUP = (): null => null;
function normalizeJlptTerm(value: string): string {
return value.trim();
}
function hasFrequencyDisplayValue(meta: unknown): boolean {
if (!meta || typeof meta !== "object") return false;
const frequency = (meta as { frequency?: unknown }).frequency;
if (!frequency || typeof frequency !== "object") return false;
return Object.prototype.hasOwnProperty.call(
frequency as Record<string, unknown>,
"displayValue",
);
}
function addEntriesToMap(
rawEntries: unknown,
level: JlptLevel,
terms: Map<string, JlptLevel>,
log: (message: string) => void,
): void {
if (!Array.isArray(rawEntries)) {
return;
}
for (const rawEntry of rawEntries) {
if (!Array.isArray(rawEntry)) {
continue;
}
const [term, _entryId, meta] = rawEntry as [unknown, unknown, unknown];
if (typeof term !== "string") {
continue;
}
const normalizedTerm = normalizeJlptTerm(term);
if (!normalizedTerm) {
continue;
}
if (!hasFrequencyDisplayValue(meta)) {
continue;
}
if (!terms.has(normalizedTerm)) {
terms.set(normalizedTerm, level);
continue;
}
if (terms.get(normalizedTerm) !== "N1" && level === "N1") {
terms.set(normalizedTerm, level);
continue;
}
if (terms.get(normalizedTerm) !== "N1" && terms.get(normalizedTerm) !== "N2" && level === "N2") {
terms.set(normalizedTerm, level);
continue;
}
if (
terms.get(normalizedTerm) !== "N1" &&
terms.get(normalizedTerm) !== "N2" &&
terms.get(normalizedTerm) !== "N3" &&
level === "N3"
) {
terms.set(normalizedTerm, level);
continue;
}
if (
terms.get(normalizedTerm) !== "N1" &&
terms.get(normalizedTerm) !== "N2" &&
terms.get(normalizedTerm) !== "N3" &&
terms.get(normalizedTerm) !== "N4" &&
level === "N4"
) {
terms.set(normalizedTerm, level);
continue;
}
if (
terms.get(normalizedTerm) !== "N1" &&
terms.get(normalizedTerm) !== "N2" &&
terms.get(normalizedTerm) !== "N3" &&
terms.get(normalizedTerm) !== "N4" &&
terms.get(normalizedTerm) !== "N5" &&
level === "N5"
) {
terms.set(normalizedTerm, level);
}
log(
`JLPT dictionary already has ${normalizedTerm} as ${terms.get(normalizedTerm)}; keeping that level instead of ${level}`,
);
}
}
function collectDictionaryFromPath(
dictionaryPath: string,
log: (message: string) => void,
): Map<string, JlptLevel> {
const terms = new Map<string, JlptLevel>();
for (const bank of JLPT_BANK_FILES) {
const bankPath = path.join(dictionaryPath, bank.filename);
if (!fs.existsSync(bankPath)) {
continue;
}
let rawText: string;
try {
rawText = fs.readFileSync(bankPath, "utf-8");
} catch {
continue;
}
let rawEntries: unknown;
try {
rawEntries = JSON.parse(rawText) as unknown;
} catch {
continue;
}
addEntriesToMap(rawEntries, bank.level, terms, log);
}
return terms;
}
export async function createJlptVocabularyLookupService(
options: JlptVocabLookupOptions,
): Promise<(term: string) => JlptLevel | null> {
const attemptedPaths: string[] = [];
let foundDirectoryCount = 0;
let foundBankCount = 0;
for (const dictionaryPath of options.searchPaths) {
attemptedPaths.push(dictionaryPath);
if (!fs.existsSync(dictionaryPath)) {
continue;
}
if (!fs.statSync(dictionaryPath).isDirectory()) {
continue;
}
foundDirectoryCount += 1;
const terms = collectDictionaryFromPath(dictionaryPath, options.log);
if (terms.size > 0) {
foundBankCount += 1;
options.log(
`JLPT dictionary loaded from ${dictionaryPath} (${terms.size} entries)`,
);
return (term: string): JlptLevel | null => {
if (!term) return null;
const normalized = normalizeJlptTerm(term);
return normalized ? terms.get(normalized) ?? null : null;
};
}
options.log(
`JLPT dictionary directory exists but contains no readable term_meta_bank_*.json files: ${dictionaryPath}`,
);
}
options.log(
`JLPT dictionary not found. Searched ${attemptedPaths.length} candidate path(s): ${attemptedPaths.join(", ")}`,
);
if (foundDirectoryCount > 0 && foundBankCount === 0) {
options.log(
"JLPT dictionary directories found, but none contained valid term_meta_bank_*.json files.",
);
}
return NOOP_LOOKUP;
}