SubMiner/src/core/services/immersion-tracker/legacy-vocabulary-pos.ts

import type { Token } from '../../../types';
import type { LegacyVocabularyPosResolution } from './types';
import { deriveStoredPartOfSpeech } from '../tokenizer/part-of-speech';

const KATAKANA_TO_HIRAGANA_OFFSET = 0x60;
const KATAKANA_CODEPOINT_START = 0x30a1;
const KATAKANA_CODEPOINT_END = 0x30f6;

function normalizeLookupText(value: string | null | undefined): string {
  return typeof value === 'string' ? value.trim() : '';
}

function katakanaToHiragana(text: string): string {
  let normalized = '';
  for (const char of text) {
    const code = char.codePointAt(0);
    if (code === undefined) {
      continue;
    }
    if (code >= KATAKANA_CODEPOINT_START && code <= KATAKANA_CODEPOINT_END) {
      normalized += String.fromCodePoint(code - KATAKANA_TO_HIRAGANA_OFFSET);
      continue;
    }
    normalized += char;
  }
  return normalized;
}

function toResolution(token: Token): LegacyVocabularyPosResolution {
  return {
    headword: normalizeLookupText(token.headword) || normalizeLookupText(token.word),
    reading: katakanaToHiragana(normalizeLookupText(token.katakanaReading)),
    partOfSpeech: deriveStoredPartOfSpeech({
      partOfSpeech: token.partOfSpeech,
      pos1: token.pos1,
    }),
    pos1: normalizeLookupText(token.pos1),
    pos2: normalizeLookupText(token.pos2),
    pos3: normalizeLookupText(token.pos3),
  };
}

export function resolveLegacyVocabularyPosFromTokens(
  lookupText: string,
  tokens: Token[] | null,
): LegacyVocabularyPosResolution | null {
  const normalizedLookup = normalizeLookupText(lookupText);
  if (!normalizedLookup || !tokens || tokens.length === 0) {
    return null;
  }

  const exactSurfaceMatches = tokens.filter(
    (token) => normalizeLookupText(token.word) === normalizedLookup,
  );
  if (exactSurfaceMatches.length === 1) {
    return toResolution(exactSurfaceMatches[0]!);
  }

  const exactHeadwordMatches = tokens.filter(
    (token) => normalizeLookupText(token.headword) === normalizedLookup,
  );
  if (exactHeadwordMatches.length === 1) {
    return toResolution(exactHeadwordMatches[0]!);
  }

  if (tokens.length === 1) {
    return toResolution(tokens[0]!);
  }

  return null;
}