feat(stats): add v1 immersion stats dashboard (#19)

This commit is contained in:
2026-03-20 02:43:28 -07:00
committed by GitHub
parent 42abdd1268
commit 6749ff843c
555 changed files with 46356 additions and 2553 deletions

View File

@@ -19,6 +19,7 @@
import { PartOfSpeech, Token, MergedToken } from './types';
import { DEFAULT_ANNOTATION_POS1_EXCLUSION_CONFIG } from './token-pos1-exclusions';
import { DEFAULT_ANNOTATION_POS2_EXCLUSION_CONFIG } from './token-pos2-exclusions';
import { shouldExcludeTokenFromSubtitleAnnotations } from './core/services/tokenizer/subtitle-annotation-filter';
export function isNoun(tok: Token): boolean {
return tok.partOfSpeech === PartOfSpeech.noun;
@@ -169,13 +170,17 @@ export function mergeTokens(
isKnownWord: (text: string) => boolean = () => false,
knownWordMatchMode: 'headword' | 'surface' = 'headword',
shouldLookupKnownWords = true,
sourceText?: string,
): MergedToken[] {
if (!tokens || tokens.length === 0) {
return [];
}
const result: MergedToken[] = [];
const normalizedSourceText =
typeof sourceText === 'string' ? sourceText.replace(/\r?\n/g, ' ').trim() : null;
let charOffset = 0;
let sourceCursor = 0;
let lastStandaloneToken: Token | null = null;
const resolveKnownMatch = (text: string | undefined): boolean => {
if (!shouldLookupKnownWords || !text) {
@@ -185,9 +190,12 @@ export function mergeTokens(
};
for (const token of tokens) {
const start = charOffset;
const end = charOffset + token.word.length;
const matchedStart =
normalizedSourceText !== null ? normalizedSourceText.indexOf(token.word, sourceCursor) : -1;
const start = matchedStart >= sourceCursor ? matchedStart : charOffset;
const end = start + token.word.length;
charOffset = end;
sourceCursor = end;
let shouldMergeToken = false;
@@ -290,6 +298,10 @@ function isNPlusOneWordCountToken(
pos1Exclusions: ReadonlySet<string> = N_PLUS_ONE_IGNORED_POS1,
pos2Exclusions: ReadonlySet<string> = N_PLUS_ONE_IGNORED_POS2,
): boolean {
if (shouldExcludeTokenFromSubtitleAnnotations(token, { pos1Exclusions, pos2Exclusions })) {
return false;
}
const normalizedPos1 = normalizePos1Tag(token.pos1);
const hasPos1 = normalizedPos1.length > 0;
if (isExcludedByTagSet(normalizedPos1, pos1Exclusions)) {