initial commit

This commit is contained in:
2026-02-09 19:04:19 -08:00
commit f92b57c7b6
531 changed files with 196294 additions and 0 deletions

138
vendor/yomitan/js/language/CJK-util.js vendored Normal file
View File

@@ -0,0 +1,138 @@
/*
* Copyright (C) 2024-2025 Yomitan Authors
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
import {basicTextProcessorOptions} from './text-processors.js';
/** @type {import('CJK-util').CodepointRange} */
const CJK_UNIFIED_IDEOGRAPHS_RANGE = [0x4e00, 0x9fff];
/** @type {import('CJK-util').CodepointRange} */
const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_RANGE = [0x3400, 0x4dbf];
/** @type {import('CJK-util').CodepointRange} */
const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_RANGE = [0x20000, 0x2a6df];
/** @type {import('CJK-util').CodepointRange} */
const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_RANGE = [0x2a700, 0x2b73f];
/** @type {import('CJK-util').CodepointRange} */
const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_RANGE = [0x2b740, 0x2b81f];
/** @type {import('CJK-util').CodepointRange} */
const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_RANGE = [0x2b820, 0x2ceaf];
/** @type {import('CJK-util').CodepointRange} */
const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F_RANGE = [0x2ceb0, 0x2ebef];
/** @type {import('CJK-util').CodepointRange} */
const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G_RANGE = [0x30000, 0x3134f];
/** @type {import('CJK-util').CodepointRange} */
const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_H_RANGE = [0x31350, 0x323af];
/** @type {import('CJK-util').CodepointRange} */
const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_I_RANGE = [0x2ebf0, 0x2ee5f];
/** @type {import('CJK-util').CodepointRange} */
const CJK_COMPATIBILITY_IDEOGRAPHS_RANGE = [0xf900, 0xfaff];
/** @type {import('CJK-util').CodepointRange} */
const CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_RANGE = [0x2f800, 0x2fa1f];
/** @type {import('CJK-util').CodepointRange[]} */
export const CJK_IDEOGRAPH_RANGES = [
CJK_UNIFIED_IDEOGRAPHS_RANGE,
CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_RANGE,
CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_RANGE,
CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_RANGE,
CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_RANGE,
CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_RANGE,
CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F_RANGE,
CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G_RANGE,
CJK_UNIFIED_IDEOGRAPHS_EXTENSION_H_RANGE,
CJK_UNIFIED_IDEOGRAPHS_EXTENSION_I_RANGE,
CJK_COMPATIBILITY_IDEOGRAPHS_RANGE,
CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_RANGE,
];
/** @type {import('CJK-util').CodepointRange[]} */
export const FULLWIDTH_CHARACTER_RANGES = [
[0xff10, 0xff19], // Fullwidth numbers
[0xff21, 0xff3a], // Fullwidth upper case Latin letters
[0xff41, 0xff5a], // Fullwidth lower case Latin letters
[0xff01, 0xff0f], // Fullwidth punctuation 1
[0xff1a, 0xff1f], // Fullwidth punctuation 2
[0xff3b, 0xff3f], // Fullwidth punctuation 3
[0xff5b, 0xff60], // Fullwidth punctuation 4
[0xffe0, 0xffee], // Currency markers
];
/** @type {import('CJK-util').CodepointRange} */
export const CJK_PUNCTUATION_RANGE = [0x3000, 0x303f];
/** @type {import('CJK-util').CodepointRange} */
export const CJK_COMPATIBILITY = [0x3300, 0x33ff];
/**
* @param {number} codePoint
* @param {import('CJK-util').CodepointRange} range
* @returns {boolean}
*/
export function isCodePointInRange(codePoint, [min, max]) {
return (codePoint >= min && codePoint <= max);
}
/**
* @param {number} codePoint
* @param {import('CJK-util').CodepointRange[]} ranges
* @returns {boolean}
*/
export function isCodePointInRanges(codePoint, ranges) {
for (const [min, max] of ranges) {
if (codePoint >= min && codePoint <= max) {
return true;
}
}
return false;
}
/** @type {import('CJK-util').CodepointRange} */
export const KANGXI_RADICALS_RANGE = [0x2f00, 0x2fdf];
/** @type {import('CJK-util').CodepointRange} */
export const CJK_RADICALS_SUPPLEMENT_RANGE = [0x2e80, 0x2eff];
/** @type {import('CJK-util').CodepointRange} */
export const CJK_STROKES_RANGE = [0x31c0, 0x31ef];
/** @type {import('CJK-util').CodepointRange[]} */
export const CJK_RADICALS_RANGES = [
KANGXI_RADICALS_RANGE,
CJK_RADICALS_SUPPLEMENT_RANGE,
CJK_STROKES_RANGE,
];
/**
* @param {string} text
* @returns {string}
*/
export function normalizeRadicals(text) {
let result = '';
for (let i = 0; i < text.length; i++) {
const codePoint = text[i].codePointAt(0);
result += codePoint && (isCodePointInRanges(codePoint, CJK_RADICALS_RANGES)) ? text[i].normalize('NFKD') : text[i];
}
return result;
}
/** @type {import('language').TextProcessor<boolean>} */
export const normalizeRadicalCharacters = {
name: 'Normalize radical characters',
description: '⼀ → 一 (U+2F00 → U+4E00)',
options: basicTextProcessorOptions,
process: (str, setting) => (setting ? normalizeRadicals(str) : str),
};

View File

@@ -0,0 +1,32 @@
/*
* Copyright (C) 2024-2025 Yomitan Authors
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
import {basicTextProcessorOptions} from '../text-processors.js';
const optionalDiacritics = ['\u0303', '\u0304', '\u0307', '\u0308', '\u0323', '\u032E', '\u0330', '\u0331', '\u0730', '\u0731', '\u0732', '\u0733', '\u0734', '\u0735', '\u0736', '\u0737', '\u0738', '\u0739', '\u073A', '\u073B', '\u073C', '\u073D', '\u073E', '\u073F', '\u0740', '\u0741', '\u0742', '\u0743', '\u0744', '\u0745', '\u0746', '\u0747', '\u0748', '\u0749', '\u074A'];
const diacriticsRegex = new RegExp(`[${optionalDiacritics.join('')}]`, 'g');
/** @type {import('language').TextProcessor<boolean>} */
export const removeSyriacScriptDiacritics = {
name: 'Remove diacritics',
description: 'ܟܵܬܹܒ݂ ⬅️ ܟܬܒ',
options: basicTextProcessorOptions,
process: (text, setting) => {
return setting ? text.replace(diacriticsRegex, '') : text;
},
};

View File

@@ -0,0 +1,109 @@
/*
* Copyright (C) 2024-2025 Yomitan Authors
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
import {basicTextProcessorOptions} from '../text-processors.js';
const optionalDiacritics = [
'\u0618', // Small Fatha
'\u0619', // Small Damma
'\u061A', // Small Kasra
'\u064B', // Fathatan
'\u064C', // Dammatan
'\u064D', // Kasratan
'\u064E', // Fatha
'\u064F', // Damma
'\u0650', // Kasra
'\u0651', // Shadda
'\u0652', // Sukun
'\u0653', // Maddah
'\u0654', // Hamza Above
'\u0655', // Hamza Below
'\u0656', // Subscript Alef
'\u0670', // Dagger Alef
];
const diacriticsRegex = new RegExp(`[${optionalDiacritics.join('')}]`, 'g');
/** @type {import('language').TextProcessor<boolean>} */
export const removeArabicScriptDiacritics = {
name: 'Remove diacritics',
description: 'وَلَدَ → ولد',
options: basicTextProcessorOptions,
process: (text, setting) => {
return setting ? text.replace(diacriticsRegex, '') : text;
},
};
/** @type {import('language').TextProcessor<boolean>} */
export const removeTatweel = {
name: 'Remove tatweel characters',
description: 'لـكن → لكن',
options: basicTextProcessorOptions,
process: (text, setting) => {
return setting ? text.replaceAll('ـ', '') : text;
},
};
/** @type {import('language').TextProcessor<boolean>} */
export const normalizeUnicode = {
name: 'Normalize unicode',
description: 'ﻴ → ي',
options: basicTextProcessorOptions,
process: (text, setting) => {
return setting ? text.normalize('NFKC') : text;
},
};
/** @type {import('language').TextProcessor<boolean>} */
export const addHamzaTop = {
name: 'Add Hamza to top of Alif',
description: 'اكبر → أكبر',
options: basicTextProcessorOptions,
process: (text, setting) => {
return setting ? text.replace('ا', 'أ') : text;
},
};
/** @type {import('language').TextProcessor<boolean>} */
export const addHamzaBottom = {
name: 'Add Hamza to bottom of Alif',
description: 'اسلام → إسلام',
options: basicTextProcessorOptions,
process: (text, setting) => {
return setting ? text.replace('ا', 'إ') : text;
},
};
/** @type {import('language').TextProcessor<boolean>} */
export const convertAlifMaqsuraToYaa = {
name: 'Convert Alif Maqsura to Yaa',
description: 'فى → في',
options: basicTextProcessorOptions,
process: (text, setting) => {
return setting ? text.replace(/ى$/, 'ي') : text;
},
};
/** @type {import('language').TextProcessor<boolean>} */
export const convertHaToTaMarbuta = {
name: 'Convert final Ha to Ta Marbuta',
description: 'لغه → لغة',
options: basicTextProcessorOptions,
process: (text, setting) => {
return setting ? text.replace(/ه$/, 'ة') : text;
},
};

View File

@@ -0,0 +1,834 @@
/*
* Copyright (C) 2025 Yomitan Authors
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
import {prefixInflection, suffixInflection} from '../language-transforms.js';
const arabicLetters = '[\u0620-\u065F\u066E-\u06D3\u06D5\u06EE\u06EF\u06FA-\u06FC\u06FF]';
const directObjectPronouns1st = ['ني', 'نا'];
const directObjectPronouns2nd = ['ك', 'كما', 'كم', 'كن'];
const directObjectPronouns3rd = ['ه', 'ها', 'هما', 'هم', 'هن'];
const directObjectPronouns = [...directObjectPronouns1st, ...directObjectPronouns2nd, ...directObjectPronouns3rd];
const possessivePronouns = ['ي', 'نا', ...directObjectPronouns2nd, ...directObjectPronouns3rd];
const nonAssimilatingPossessivePronouns = ['نا', ...directObjectPronouns2nd, ...directObjectPronouns3rd];
/**
* @param {string} prefix
* @param {boolean} includeLiPrefix
* @returns {string[]}
*/
function getImperfectPrefixes(prefix, includeLiPrefix = true) {
return [
`${prefix}`,
`و${prefix}`,
`ف${prefix}`,
`س${prefix}`,
`وس${prefix}`,
`فس${prefix}`,
...(includeLiPrefix ? [`ل${prefix}`, `ول${prefix}`, `فل${prefix}`] : []),
];
}
/**
* @param {string} inflectedPrefix
* @param {string} deinflectedPrefix
* @param {string} initialStemSegment
* @param {Condition[]} conditionsIn
* @param {Condition[]} conditionsOut
* @returns {import('language-transformer').Rule<Condition>}
*/
function conditionalPrefixInflection(inflectedPrefix, deinflectedPrefix, initialStemSegment, conditionsIn, conditionsOut) {
const prefixRegExp = new RegExp('^' + inflectedPrefix + initialStemSegment);
return {
type: 'prefix',
isInflected: prefixRegExp,
deinflect: (text) => deinflectedPrefix + text.slice(inflectedPrefix.length),
conditionsIn,
conditionsOut,
};
}
/**
* @param {string} inflectedSuffix
* @param {string} deinflectedSuffix
* @param {string} finalStemSegment
* @param {Condition[]} conditionsIn
* @param {Condition[]} conditionsOut
* @returns {import('language-transformer').SuffixRule<Condition>}
*/
function conditionalSuffixInflection(inflectedSuffix, deinflectedSuffix, finalStemSegment, conditionsIn, conditionsOut) {
const suffixRegExp = new RegExp(finalStemSegment + inflectedSuffix + '$');
return {
type: 'suffix',
isInflected: suffixRegExp,
deinflected: deinflectedSuffix,
deinflect: (text) => text.slice(0, -inflectedSuffix.length) + deinflectedSuffix,
conditionsIn,
conditionsOut,
};
}
/**
* @param {string} inflectedPrefix
* @param {string} deinflectedPrefix
* @param {string} inflectedSuffix
* @param {string} deinflectedSuffix
* @param {Condition[]} conditionsIn
* @param {Condition[]} conditionsOut
* @param {object} [options={}]
* @param {string} [options.initialStemSegment = '']
* @param {string} [options.finalStemSegment = '']
* @returns {import('language-transformer').Rule<Condition>}
*/
function sandwichInflection(
inflectedPrefix,
deinflectedPrefix,
inflectedSuffix,
deinflectedSuffix,
conditionsIn,
conditionsOut,
{initialStemSegment = '', finalStemSegment = ''} = {},
) {
if (!inflectedSuffix && !deinflectedSuffix) {
return conditionalPrefixInflection(
inflectedPrefix,
deinflectedPrefix,
initialStemSegment,
conditionsIn,
conditionsOut,
);
}
if (!inflectedPrefix && !deinflectedPrefix) {
return conditionalSuffixInflection(
inflectedSuffix,
deinflectedSuffix,
finalStemSegment,
conditionsIn,
conditionsOut,
);
}
const regex = new RegExp(
`^${inflectedPrefix}${initialStemSegment}${arabicLetters}+${finalStemSegment}${inflectedSuffix}$`,
);
return {
type: 'other',
isInflected: regex,
deinflect: (text) => deinflectedPrefix + text.slice(inflectedPrefix.length, -inflectedSuffix.length) + deinflectedSuffix,
conditionsIn,
conditionsOut,
};
}
/**
* @param {string} inflectedPrefix
* @param {string} deinflectedPrefix
* @param {string} inflectedSuffix
* @param {string} deinflectedSuffix
* @param {object} [options={}]
* @param {string} [options.attachedSuffix = inflectedSuffix]
* @param {boolean} [options.attachesTo1st = true]
* @param {boolean} [options.attachesTo2nd = true]
* @param {boolean} [options.includeLiPrefix = true]
* @param {string} [options.initialStemSegment = '']
* @param {string} [options.finalStemSegment = '']
* @returns {import('language-transformer').Rule<Condition>[]}
*/
function getImperfectRules(
inflectedPrefix,
deinflectedPrefix,
inflectedSuffix,
deinflectedSuffix,
{
attachedSuffix = inflectedSuffix,
attachesTo1st = true,
attachesTo2nd = true,
includeLiPrefix = true,
initialStemSegment = '',
finalStemSegment = '',
} = {},
) {
const stemSegments = {initialStemSegment, finalStemSegment};
const rules = getImperfectPrefixes(inflectedPrefix, includeLiPrefix).flatMap((pre) => [
sandwichInflection(pre, deinflectedPrefix, inflectedSuffix, deinflectedSuffix, ['iv_p'], ['iv'], stemSegments),
// With attached direct object pronouns
...(attachesTo1st ?
directObjectPronouns1st.map((p) => sandwichInflection(
pre,
deinflectedPrefix,
attachedSuffix + p,
deinflectedSuffix,
['iv_p'],
['iv'],
stemSegments,
)) :
[]),
...(attachesTo2nd ?
directObjectPronouns2nd.map((p) => sandwichInflection(
pre,
deinflectedPrefix,
attachedSuffix + p,
deinflectedSuffix,
['iv_p'],
['iv'],
stemSegments,
)) :
[]),
...directObjectPronouns3rd.map((p) => sandwichInflection(
pre,
deinflectedPrefix,
attachedSuffix + p,
deinflectedSuffix,
['iv_p'],
['iv'],
stemSegments,
)),
]);
if (!deinflectedPrefix) {
const opts = {
attachedSuffix,
attachesTo1st,
attachesTo2nd,
includeLiPrefix,
initialStemSegment,
finalStemSegment,
};
// For Form IV, VII, VIII, IX, X, XI, XII, XIII, XIV, XV verbs
rules.push(
...getImperfectRules(inflectedPrefix, 'أ', inflectedSuffix, deinflectedSuffix, opts),
...getImperfectRules(inflectedPrefix, 'ا', inflectedSuffix, deinflectedSuffix, opts),
);
}
return rules;
}
/** @typedef {keyof typeof conditions} Condition */
const conditions = {
n: {
name: 'Noun',
isDictionaryForm: true,
},
n_p: {
name: 'Noun with Prefix only',
isDictionaryForm: false,
subConditions: ['n_wa', 'n_bi', 'n_ka', 'n_li', 'n_al', 'n_bi_al', 'n_ka_al', 'n_lil', 'n_li_al'],
},
n_def: {
name: 'Noun with Definite Prefix',
isDictionaryForm: false,
subConditions: ['n_al', 'n_bi_al', 'n_ka_al', 'n_lil', 'n_li_al'],
},
n_indef: {
name: 'Noun with Indefinite Prefix',
isDictionaryForm: false,
subConditions: ['n_wa', 'n_bi', 'n_ka', 'n_li'],
},
n_nom: {
name: 'Nominative Noun with Prefix',
isDictionaryForm: false,
subConditions: ['n_wa', 'n_li', 'n_al'],
},
n_nom_indef: {
name: 'Nominative Noun with Indefinite Prefix',
isDictionaryForm: false,
subConditions: ['n_wa', 'n_li'],
},
n_wa: {
name: 'Noun with و Prefix',
isDictionaryForm: false,
},
n_bi: {
name: 'Noun with ب Prefix',
isDictionaryForm: false,
},
n_ka: {
name: 'Noun with ك Prefix',
isDictionaryForm: false,
},
n_li: {
name: 'Noun with ل Prefix',
isDictionaryForm: false,
},
n_al: {
name: 'Noun with ال Prefix',
isDictionaryForm: false,
},
n_bi_al: {
name: 'Noun with بال Prefix',
isDictionaryForm: false,
},
n_ka_al: {
name: 'Noun with كال Prefix',
isDictionaryForm: false,
},
n_lil: {
name: 'Noun with لل Prefix',
isDictionaryForm: false,
},
n_li_al: {
name: 'Noun with Assimilated لل Prefix',
isDictionaryForm: false,
},
n_s: {
name: 'Noun with Suffix',
isDictionaryForm: false,
},
v: {
name: 'Verb',
isDictionaryForm: true,
subConditions: ['pv', 'iv', 'cv'],
},
pv: {
name: 'Perfect Verb (no affixes)',
isDictionaryForm: true,
},
pv_p: {
name: 'Perfect Verb with Prefix',
isDictionaryForm: false,
},
pv_s: {
name: 'Perfect Verb with Suffix only',
isDictionaryForm: false,
},
iv: {
name: 'Imperfect Verb (no affixes)',
isDictionaryForm: true,
},
iv_p: {
name: 'Imperfect Verb with Prefix',
isDictionaryForm: false,
},
iv_s: {
name: 'Imperfect Verb with Suffix only',
isDictionaryForm: false,
},
cv: {
name: 'Command Verb (no affixes)',
isDictionaryForm: true,
},
cv_p: {
name: 'Command Verb with Prefix',
isDictionaryForm: false,
},
cv_s: {
name: 'Command Verb with Suffix only',
isDictionaryForm: false,
},
};
/** @type {import('language-transformer').LanguageTransformDescriptor<Condition>} */
export const arabicTransforms = {
language: 'ar',
conditions,
transforms: {
// Noun
'NPref-Wa': {
name: 'and',
description: 'and (و); and, so (ف)',
rules: [
prefixInflection('و', '', ['n_wa'], ['n']),
prefixInflection('ف', '', ['n_wa'], ['n']),
],
},
'NPref-Bi': {
name: 'by, with',
description: 'by, with',
rules: [
prefixInflection('ب', '', ['n_bi'], ['n']),
prefixInflection('وب', '', ['n_bi'], ['n']),
prefixInflection('فب', '', ['n_bi'], ['n']),
],
},
'NPref-Ka': {
name: 'like, such as',
description: 'like, such as',
rules: [
prefixInflection('ك', '', ['n_ka'], ['n']),
prefixInflection('وك', '', ['n_ka'], ['n']),
prefixInflection('فك', '', ['n_ka'], ['n']),
],
},
'NPref-Li': {
name: 'for, to; indeed, truly',
description: 'for, to (لِ); indeed, truly (لَ)',
rules: [
prefixInflection('ل', '', ['n_li'], ['n']),
prefixInflection('ول', '', ['n_li'], ['n']),
prefixInflection('فل', '', ['n_li'], ['n']),
],
},
'NPref-Al': {
name: 'the',
description: 'the',
rules: [
prefixInflection('ال', '', ['n_al'], ['n']),
prefixInflection('وال', '', ['n_al'], ['n']),
prefixInflection('فال', '', ['n_al'], ['n']),
],
},
'NPref-BiAl': {
name: 'by/with + the',
description: 'by/with + the',
rules: [
prefixInflection('بال', '', ['n_bi_al'], ['n']),
prefixInflection('وبال', '', ['n_bi_al'], ['n']),
prefixInflection('فبال', '', ['n_bi_al'], ['n']),
],
},
'NPref-KaAl': {
name: 'like/such as + the',
description: 'like/such as + the',
rules: [
prefixInflection('كال', '', ['n_ka_al'], ['n']),
prefixInflection('وكال', '', ['n_ka_al'], ['n']),
prefixInflection('فكال', '', ['n_ka_al'], ['n']),
],
},
'NPref-Lil': {
name: 'for/to + the',
description: 'for/to + the',
rules: [
conditionalPrefixInflection('لل', '', '(?!ل)', ['n_lil'], ['n']),
conditionalPrefixInflection('ولل', '', '(?!ل)', ['n_lil'], ['n']),
conditionalPrefixInflection('فلل', '', '(?!ل)', ['n_lil'], ['n']),
],
},
'NPref-LiAl': {
name: 'for/to + the',
description: 'for/to + the, assimilated with initial ل',
rules: [
prefixInflection('لل', 'ل', ['n_li_al'], ['n']),
prefixInflection('ولل', 'ل', ['n_li_al'], ['n']),
prefixInflection('فلل', 'ل', ['n_li_al'], ['n']),
],
},
'NSuff-h': {
name: 'pos. pron.',
description: 'possessive pronoun',
rules: [
...nonAssimilatingPossessivePronouns.map((p) => suffixInflection(p, '', ['n_s'], ['n_indef', 'n'])),
conditionalSuffixInflection('ي', '', '(?<!ي)', ['n_s'], ['n_indef', 'n']),
],
},
'NSuff-ap': {
name: 'fem. sg.',
description: 'fem. sg.',
rules: [
suffixInflection('ة', '', ['n_s'], ['n_p', 'n']),
],
},
'NSuff-ath': {
name: 'fem. sg. + pos. pron.',
description: 'fem. sg. + possessive pronoun',
rules: [
...possessivePronouns.map((p) => suffixInflection(`ت${p}`, '', ['n_s'], ['n_indef', 'n'])),
...possessivePronouns.map((p) => suffixInflection(`ت${p}`, 'ة', ['n_s'], ['n_indef', 'n'])),
],
},
'NSuff-AF': {
name: 'acc. indef.',
description: 'accusative indefinite (اً)',
rules: [
suffixInflection('ا', '', ['n_s'], ['n_wa', 'n']),
suffixInflection('اً', '', ['n_s'], ['n_wa', 'n']),
suffixInflection('ًا', '', ['n_s'], ['n_wa', 'n']),
],
},
'NSuff-An': {
name: 'dual',
description: 'nominative m. dual',
rules: [
suffixInflection('ان', '', ['n_s'], ['n_nom', 'n']),
suffixInflection('آن', 'أ', ['n_s'], ['n_nom', 'n']),
],
},
'NSuff-Ah': {
name: 'dual + pos. pron.',
description: 'nominative m. dual + possessive pronoun',
rules: [
suffixInflection('ا', '', ['n_s'], ['n_nom_indef', 'n']),
suffixInflection('آ', 'أ', ['n_s'], ['n_nom_indef', 'n']),
...possessivePronouns.map((p) => suffixInflection(`ا${p}`, '', ['n_s'], ['n_nom_indef', 'n'])),
...possessivePronouns.map((p) => suffixInflection(`آ${p}`, 'أ', ['n_s'], ['n_nom_indef', 'n'])),
],
},
'NSuff-ayn': {
name: 'dual',
description: 'accusative/genitive m. dual',
rules: [
suffixInflection('ين', '', ['n_s'], ['n_p', 'n']),
],
},
'NSuff-ayh': {
name: 'dual + pos. pron.',
description: 'accusative/genitive m. dual + possessive pronoun',
rules: [
suffixInflection('ي', '', ['n_s'], ['n_indef', 'n']),
...nonAssimilatingPossessivePronouns.map((p) => suffixInflection(`ي${p}`, '', ['n_s'], ['n_indef', 'n'])),
],
},
'NSuff-atAn': {
name: 'dual',
description: 'nominative f. dual',
rules: [
suffixInflection('تان', '', ['n_s'], ['n_nom', 'n']),
suffixInflection('تان', 'ة', ['n_s'], ['n_nom', 'n']),
],
},
'NSuff-atAh': {
name: 'dual + pos. pron.',
description: 'nominative f. dual + possessive pronoun',
rules: [
suffixInflection('تا', '', ['n_s'], ['n_nom_indef', 'n']),
suffixInflection('تا', 'ة', ['n_s'], ['n_nom_indef', 'n']),
...possessivePronouns.map((p) => suffixInflection(`تا${p}`, '', ['n_s'], ['n_nom_indef', 'n'])),
...possessivePronouns.map((p) => suffixInflection(`تا${p}`, 'ة', ['n_s'], ['n_nom_indef', 'n'])),
],
},
'NSuff-tayn': {
name: 'dual',
description: 'accusative/genitive f. dual',
rules: [
suffixInflection('تين', '', ['n_s'], ['n_p', 'n']),
suffixInflection('تين', 'ة', ['n_s'], ['n_p', 'n']),
],
},
'NSuff-tayh': {
name: 'dual + pos. pron.',
description: 'accusative/genitive f. dual + possessive pronoun',
rules: [
suffixInflection('تي', '', ['n_s'], ['n_indef', 'n']),
suffixInflection('تي', 'ة', ['n_s'], ['n_indef', 'n']),
...nonAssimilatingPossessivePronouns.map((p) => suffixInflection(`تي${p}`, '', ['n_s'], ['n_indef', 'n'])),
...nonAssimilatingPossessivePronouns.map((p) => suffixInflection(`تي${p}`, 'ة', ['n_s'], ['n_indef', 'n'])),
],
},
'NSuff-At': {
name: 'f. pl.',
description: 'sound f. plural',
rules: [
suffixInflection('ات', '', ['n_s'], ['n_p', 'n']),
suffixInflection('ات', 'ة', ['n_s'], ['n_p', 'n']),
suffixInflection('آت', 'أ', ['n_s'], ['n_p', 'n']),
suffixInflection('آت', 'أة', ['n_s'], ['n_p', 'n']),
],
},
'NSuff-Ath': {
name: 'f. pl. + pos. pron.',
description: 'sound f. plural + possessive pronoun',
rules: [
...possessivePronouns.map((p) => suffixInflection(`ات${p}`, '', ['n_s'], ['n_indef', 'n'])),
...possessivePronouns.map((p) => suffixInflection(`ات${p}`, 'ة', ['n_s'], ['n_indef', 'n'])),
...possessivePronouns.map((p) => suffixInflection(`آت${p}`, 'أ', ['n_s'], ['n_indef', 'n'])),
...possessivePronouns.map((p) => suffixInflection(`آت${p}`, 'أة', ['n_s'], ['n_indef', 'n'])),
],
},
'NSuff-wn': {
name: 'm. pl.',
description: 'nominative sound m. plural',
rules: [
suffixInflection('ون', '', ['n_s'], ['n_nom', 'n']),
],
},
'NSuff-wh': {
name: 'm. pl + pos. pron.',
description: 'nominative sound m. plural + possessive pronoun',
rules: [
suffixInflection('و', '', ['n_s'], ['n_nom_indef', 'n']),
...nonAssimilatingPossessivePronouns.map((p) => suffixInflection(`و${p}`, '', ['n_s'], ['n_nom_indef', 'n'])),
],
},
'NSuff-iyn': {
name: 'm. pl.',
description: 'accusative/genitive sound m. plural',
rules: [
suffixInflection('ين', '', ['n_s'], ['n_p', 'n']),
],
},
'NSuff-iyh': {
name: 'm. pl. + pos. pron.',
description: 'accusative/genitive sound m. plural + possessive pronoun',
rules: [
suffixInflection('ي', '', ['n_s'], ['n_indef', 'n']),
...nonAssimilatingPossessivePronouns.map((p) => suffixInflection(`ي${p}`, '', ['n_s'], ['n_indef', 'n'])),
],
},
// Perfect Verb
'PVPref-Wa': {
name: 'and',
description: 'and (و); and, so (ف)',
rules: [
prefixInflection('و', '', ['pv_p'], ['pv_s', 'pv']),
prefixInflection('ف', '', ['pv_p'], ['pv_s', 'pv']),
],
},
'PVPref-La': {
name: 'would have',
description: 'Result clause particle (if ... I would have ...)',
rules: [prefixInflection('ل', '', ['pv_p'], ['pv_s', 'pv'])],
},
'PVSuff-ah': {
name: 'Perfect Tense',
description: 'Perfect Verb + D.O pronoun',
rules: directObjectPronouns.map((p) => suffixInflection(p, '', ['pv_s'], ['pv'])),
},
'PVSuff-n': {
name: 'Perfect Tense',
description: 'Perfect Verb suffixes assimilating with ن',
rules: [
// Stem doesn't end in ن
conditionalSuffixInflection('ن', '', '(?<!ن)', ['pv_s'], ['pv']),
...directObjectPronouns.map((p) => conditionalSuffixInflection(`ن${p}`, '', '(?<!ن)', ['pv_s'], ['pv'])),
conditionalSuffixInflection('نا', '', '(?<!ن)', ['pv_s'], ['pv']),
...directObjectPronouns2nd.map((p) => conditionalSuffixInflection(`نا${p}`, '', '(?<!ن)', ['pv_s'], ['pv'])),
...directObjectPronouns3rd.map((p) => conditionalSuffixInflection(`نا${p}`, '', '(?<!ن)', ['pv_s'], ['pv'])),
// Suffixes assimilated with stems ending in ن
...directObjectPronouns.map((p) => suffixInflection(`ن${p}`, 'ن', ['pv_s'], ['pv'])),
suffixInflection('نا', 'ن', ['pv_s'], ['pv']),
...directObjectPronouns2nd.map((p) => suffixInflection(`نا${p}`, 'ن', ['pv_s'], ['pv'])),
...directObjectPronouns3rd.map((p) => suffixInflection(`نا${p}`, 'ن', ['pv_s'], ['pv'])),
],
},
'PVSuff-t': {
name: 'Perfect Tense',
description: 'Perfect Verb suffixes assimilating with ت',
rules: [
// This can either be 3rd p. f. singular, or 1st/2nd p. singular
// The former doesn't assimilate, the latter do, so the below accounts for both
suffixInflection('ت', '', ['pv_s'], ['pv']),
...directObjectPronouns.map((p) => suffixInflection(`ت${p}`, '', ['pv_s'], ['pv'])),
// Stem doesn't end in ت
conditionalSuffixInflection('تما', '', '(?<!ت)', ['pv_s'], ['pv']),
...directObjectPronouns1st.map((p) => conditionalSuffixInflection(`تما${p}`, '', '(?<!ت)', ['pv_s'], ['pv'])),
...directObjectPronouns3rd.map((p) => conditionalSuffixInflection(`تما${p}`, '', '(?<!ت)', ['pv_s'], ['pv'])),
conditionalSuffixInflection('تم', '', '(?<!ت)', ['pv_s'], ['pv']),
...directObjectPronouns1st.map((p) => conditionalSuffixInflection(`تمو${p}`, '', '(?<!ت)', ['pv_s'], ['pv'])),
...directObjectPronouns3rd.map((p) => conditionalSuffixInflection(`تمو${p}`, '', '(?<!ت)', ['pv_s'], ['pv'])),
conditionalSuffixInflection('تن', '', '(?<!ت)', ['pv_s'], ['pv']),
...directObjectPronouns1st.map((p) => conditionalSuffixInflection(`تن${p}`, '', '(?<!ت)', ['pv_s'], ['pv'])),
...directObjectPronouns3rd.map((p) => conditionalSuffixInflection(`تن${p}`, '', '(?<!ت)', ['pv_s'], ['pv'])),
// Suffixes assimilated with stems ending in ت
...directObjectPronouns.map((p) => suffixInflection(`ت${p}`, 'ت', ['pv_s'], ['pv'])),
suffixInflection('تما', 'ت', ['pv_s'], ['pv']),
...directObjectPronouns1st.map((p) => suffixInflection(`تما${p}`, 'ت', ['pv_s'], ['pv'])),
...directObjectPronouns3rd.map((p) => suffixInflection(`تما${p}`, 'ت', ['pv_s'], ['pv'])),
suffixInflection('تم', 'ت', ['pv_s'], ['pv']),
...directObjectPronouns1st.map((p) => suffixInflection(`تمو${p}`, 'ت', ['pv_s'], ['pv'])),
...directObjectPronouns3rd.map((p) => suffixInflection(`تمو${p}`, 'ت', ['pv_s'], ['pv'])),
suffixInflection('تن', 'ت', ['pv_s'], ['pv']),
...directObjectPronouns1st.map((p) => suffixInflection(`تن${p}`, 'ت', ['pv_s'], ['pv'])),
...directObjectPronouns3rd.map((p) => suffixInflection(`تن${p}`, 'ت', ['pv_s'], ['pv'])),
],
},
'PVSuff-at': {
name: 'Perfect Tense',
description: 'Perfect Verb non-assimilating ت suffixes',
rules: [
suffixInflection('تا', '', ['pv_s'], ['pv']),
...directObjectPronouns.map((p) => suffixInflection(`تا${p}`, '', ['pv_s'], ['pv'])),
],
},
'PVSuff-A': {
name: 'Perfect Tense',
description: 'Perfect Verb 3rd. m. dual',
rules: [
suffixInflection('ا', '', ['pv_s'], ['pv']),
...directObjectPronouns.map((p) => suffixInflection(`ا${p}`, '', ['pv_s'], ['pv'])),
// Combines with أ to form آ
suffixInflection('آ', 'أ', ['pv_s'], ['pv']),
...directObjectPronouns.map((p) => suffixInflection(`آ${p}`, 'أ', ['pv_s'], ['pv'])),
],
},
'PVSuff-uw': {
name: 'Perfect Tense',
description: 'Perfect Verb 3rd. m. pl.',
rules: [
suffixInflection('وا', '', ['pv_s'], ['pv']),
...directObjectPronouns.map((p) => suffixInflection(`و${p}`, '', ['pv_s'], ['pv'])),
],
},
// Imperfect Verb
'IVPref-hw': {
name: 'Imperfect Tense',
description: 'Imperfect Verb 3rd. m. sing.',
rules: [...getImperfectRules('ي', '', '', '')],
},
'IVPref-hy': {
name: 'Imperfect Tense',
description: 'Imperfect Verb 3rd. f. sing.',
rules: [...getImperfectRules('ت', '', '', '')],
},
'IVPref-hmA': {
name: 'Imperfect Tense',
description: 'Imperfect Verb 3rd. m. dual',
rules: [
// Indicative
...getImperfectRules('ي', '', 'ان', '', {includeLiPrefix: false}),
...getImperfectRules('ي', '', 'آن', 'أ', {includeLiPrefix: false}),
// Subjunctive
...getImperfectRules('ي', '', 'ا', ''),
...getImperfectRules('ي', '', 'آ', 'أ'),
],
},
'IVPref-hmA-ta': {
name: 'Imperfect Tense',
description: 'Imperfect Verb 3rd. f. dual',
rules: [
// Indicative
...getImperfectRules('ت', '', 'ان', '', {includeLiPrefix: false}),
...getImperfectRules('ت', '', 'آن', 'أ', {includeLiPrefix: false}),
// Subjunctive
...getImperfectRules('ت', '', 'ا', ''),
...getImperfectRules('ت', '', 'آ', 'أ'),
],
},
'IVPref-hm': {
name: 'Imperfect Tense',
description: 'Imperfect Verb 3rd. m. pl.',
rules: [
// Indicative
...getImperfectRules('ي', '', 'ون', '', {includeLiPrefix: false}),
// Subjunctive
...getImperfectRules('ي', '', 'وا', '', {attachedSuffix: 'و'}),
],
},
'IVPref-hn': {
name: 'Imperfect Tense',
description: 'Imperfect Verb 3rd. f. pl.',
rules: [
...getImperfectRules('ي', '', 'ن', '', {finalStemSegment: '(?<!ن)'}),
...getImperfectRules('ي', '', 'ن', 'ن'),
],
},
'IVPref-Anta': {
name: 'Imperfect Tense',
description: 'Imperfect Verb 2nd. m. sing.',
rules: [...getImperfectRules('ت', '', '', '', {attachesTo2nd: false})],
},
'IVPref-Anti': {
name: 'Imperfect Tense',
description: 'Imperfect Verb 2nd. f. sing.',
rules: [
...getImperfectRules('ت', '', 'ين', '', {attachesTo2nd: false, includeLiPrefix: false}), // Indicative
...getImperfectRules('ت', '', 'ي', '', {attachesTo2nd: false}), // Subjunctive
],
},
'IVPref-AntmA': {
name: 'Imperfect Tense',
description: 'Imperfect Verb 2nd. dual',
rules: [
// Indicative
...getImperfectRules('ت', '', 'ان', '', {attachesTo2nd: false, includeLiPrefix: false}),
...getImperfectRules('ت', '', 'آن', 'أ', {attachesTo2nd: false, includeLiPrefix: false}),
// Subjunctive
...getImperfectRules('ت', '', 'ا', '', {attachesTo2nd: false}),
...getImperfectRules('ت', '', 'آ', 'أ', {attachesTo2nd: false}),
],
},
'IVPref-Antm': {
name: 'Imperfect Tense',
description: 'Imperfect Verb 2nd. m. pl.',
rules: [
// Indicative
...getImperfectRules('ت', '', 'ون', '', {attachesTo2nd: false, includeLiPrefix: false}),
// Subjunctive
...getImperfectRules('ت', '', 'وا', '', {attachesTo2nd: false, attachedSuffix: 'و'}),
],
},
'IVPref-Antn': {
name: 'Imperfect Tense',
description: 'Imperfect Verb 2nd. f. pl.',
rules: [
...getImperfectRules('ت', '', 'ن', '', {attachesTo2nd: false, finalStemSegment: '(?<!ن)'}),
...getImperfectRules('ت', '', 'ن', 'ن', {attachesTo2nd: false}),
],
},
'IVPref-AnA': {
name: 'Imperfect Tense',
description: 'Imperfect Verb 1st. sing.',
rules: [
...getImperfectRules('أ', '', '', '', {attachesTo1st: false}),
...getImperfectRules('آ', 'أ', '', '', {attachesTo1st: false}),
],
},
'IVPref-nHn': {
name: 'Imperfect Tense',
description: 'Imperfect Verb 1st. pl.',
rules: [...getImperfectRules('ن', '', '', '', {attachesTo1st: false})],
},
// Command Verb
'CVPref': {
name: 'Imperative',
description: 'Command Verb',
rules: [
prefixInflection('و', '', ['cv_p'], ['cv_s']),
prefixInflection('ف', '', ['cv_p'], ['cv_s']),
prefixInflection('ا', '', ['cv_p'], ['cv_s', 'cv']),
prefixInflection('وا', '', ['cv_p'], ['cv_s', 'cv']),
prefixInflection('فا', '', ['cv_p'], ['cv_s', 'cv']),
],
},
'CVSuff': {
name: 'Imperative',
description: 'Command Verb',
rules: [
// 2nd. m. sing.
...directObjectPronouns1st.map((p) => suffixInflection(p, '', ['cv_s'], ['cv'])),
...directObjectPronouns3rd.map((p) => suffixInflection(p, '', ['cv_s'], ['cv'])),
// 2nd. f. sing
suffixInflection('ي', '', ['cv_s'], ['cv']),
...directObjectPronouns1st.map((p) => suffixInflection(`ي${p}`, '', ['cv_s'], ['cv'])),
...directObjectPronouns3rd.map((p) => suffixInflection(`ي${p}`, '', ['cv_s'], ['cv'])),
// 2nd. dual
suffixInflection('ا', '', ['cv_s'], ['cv']),
...directObjectPronouns1st.map((p) => suffixInflection(`ا${p}`, '', ['cv_s'], ['cv'])),
...directObjectPronouns3rd.map((p) => suffixInflection(`ا${p}`, '', ['cv_s'], ['cv'])),
// 2nd. m. pl.
suffixInflection('وا', '', ['cv_s'], ['cv']),
...directObjectPronouns1st.map((p) => suffixInflection(`و${p}`, '', ['cv_s'], ['cv'])),
...directObjectPronouns3rd.map((p) => suffixInflection(`و${p}`, '', ['cv_s'], ['cv'])),
// 2nd. f. pl.
suffixInflection('ن', '', ['cv_s'], ['cv']),
...directObjectPronouns1st.map((p) => suffixInflection(`ن${p}`, '', ['cv_s'], ['cv'])),
...directObjectPronouns3rd.map((p) => suffixInflection(`ن${p}`, '', ['cv_s'], ['cv'])),
],
},
},
};

View File

@@ -0,0 +1,34 @@
/*
* Copyright (C) 2024-2025 Yomitan Authors
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
/** @type {import('language').BidirectionalConversionPreprocessor} */
export const eszettPreprocessor = {
name: 'Convert "ß" to "ss"',
description: 'ß → ss, ẞ → SS and vice versa',
options: ['off', 'direct', 'inverse'],
process: (str, setting) => {
switch (setting) {
case 'off':
return str;
case 'direct':
return str.replace(/ẞ/g, 'SS').replace(/ß/g, 'ss');
case 'inverse':
return str.replace(/SS/g, 'ẞ').replace(/ss/g, 'ß');
}
},
};

View File

@@ -0,0 +1,176 @@
/*
* Copyright (C) 2024-2025 Yomitan Authors
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
import {prefixInflection, suffixInflection} from '../language-transforms.js';
/** @typedef {keyof typeof conditions} Condition */
// https://www.dartmouth.edu/~deutsch/Grammatik/Wortbildung/Separables.html
const separablePrefixes = ['ab', 'an', 'auf', 'aus', 'auseinander', 'bei', 'da', 'dabei', 'dar', 'daran', 'dazwischen', 'durch', 'ein', 'empor', 'entgegen', 'entlang', 'entzwei', 'fehl', 'fern', 'fest', 'fort', 'frei', 'gegenüber', 'gleich', 'heim', 'her', 'herab', 'heran', 'herauf', 'heraus', 'herbei', 'herein', 'herüber', 'herum', 'herunter', 'hervor', 'hin', 'hinab', 'hinauf', 'hinaus', 'hinein', 'hinterher', 'hinunter', 'hinweg', 'hinzu', 'hoch', 'los', 'mit', 'nach', 'nebenher', 'nieder', 'statt', 'um', 'vor', 'voran', 'voraus', 'vorbei', 'vorüber', 'vorweg', 'weg', 'weiter', 'wieder', 'zu', 'zurecht', 'zurück', 'zusammen'];
const germanLetters = 'a-zA-ZäöüßÄÖÜẞ';
/**
* @param {string} prefix
* @param {Condition[]} conditionsIn
* @param {Condition[]} conditionsOut
* @returns {import('language-transformer').Rule<Condition>}
*/
function separatedPrefix(prefix, conditionsIn, conditionsOut) {
const regex = new RegExp(`^([${germanLetters}]+) .+ ${prefix}$`);
return {
type: 'other',
isInflected: regex,
deinflect: (term) => {
return term.replace(regex, '$1 ' + prefix);
},
conditionsIn,
conditionsOut,
};
}
const separatedPrefixInflections = separablePrefixes.map((prefix) => {
return separatedPrefix(prefix, [], []);
});
const zuInfinitiveInflections = separablePrefixes.map((prefix) => {
return prefixInflection(prefix + 'zu', prefix, [], ['v']);
});
/**
* @returns {import('language-transformer').Rule<Condition>[]}
*/
function getBasicPastParticiples() {
const regularPastParticiple = new RegExp(`^ge([${germanLetters}]+)t$`);
const suffixes = ['n', 'en'];
return suffixes.map((suffix) => ({
type: 'other',
isInflected: regularPastParticiple,
deinflect: (term) => {
return term.replace(regularPastParticiple, `$1${suffix}`);
},
conditionsIn: [],
conditionsOut: ['vw'],
}));
}
/**
* @returns {import('language-transformer').Rule<Condition>[]}
*/
function getSeparablePastParticiples() {
const prefixDisjunction = separablePrefixes.join('|');
const separablePastParticiple = new RegExp(`^(${prefixDisjunction})ge([${germanLetters}]+)t$`);
const suffixes = ['n', 'en'];
return suffixes.map((suffix) => ({
type: 'other',
isInflected: separablePastParticiple,
deinflect: (term) => {
return term.replace(separablePastParticiple, `$1$2${suffix}`);
},
conditionsIn: [],
conditionsOut: ['vw'],
}));
}
const conditions = {
v: {
name: 'Verb',
isDictionaryForm: true,
subConditions: ['vw', 'vs'],
},
vw: {
name: 'Weak verb',
isDictionaryForm: true,
},
vs: {
name: 'Strong verb',
isDictionaryForm: true,
},
n: {
name: 'Noun',
isDictionaryForm: true,
},
adj: {
name: 'Adjective',
isDictionaryForm: true,
},
};
export const germanTransforms = {
language: 'de',
conditions,
transforms: {
'nominalization': {
name: 'nominalization',
description: 'Noun formed from a verb',
rules: [
suffixInflection('ung', 'en', [], ['v']),
suffixInflection('lung', 'eln', [], ['v']),
suffixInflection('rung', 'rn', [], ['v']),
],
},
'-bar': {
name: '-bar',
description: '-able adjective from a verb',
rules: [
suffixInflection('bar', 'en', ['adj'], ['v']),
suffixInflection('bar', 'n', ['adj'], ['v']),
],
},
'negative': {
name: 'negative',
description: 'Negation',
rules: [
prefixInflection('un', '', [], ['adj']),
],
},
'past participle': {
name: 'past participle',
rules: [
...getBasicPastParticiples(),
...getSeparablePastParticiples(),
],
},
'separated prefix': {
name: 'separated prefix',
rules: [
...separatedPrefixInflections,
],
},
'zu-infinitive': {
name: 'zu-infinitive',
rules: [
...zuInfinitiveInflections,
],
},
'-heit': {
name: '-heit',
description:
'1. Converts an adjective into a noun and usually denotes an abstract quality of the adjectival root. ' +
'It is often equivalent to the English suffixes -ness, -th, -ty, -dom:\n' +
'\t schön (“beautiful”) + -heit → Schönheit (“beauty”)\n' +
'\t neu (“new”) + -heit → Neuheit (“novelty”)\n' +
'2. Converts concrete nouns into abstract nouns:\n' +
'\t Kind (“child”) + -heit → Kindheit (“childhood”)\n' +
'\t Christ (“Christian”) + -heit → Christenheit (“Christendom”)\n',
rules: [
suffixInflection('heit', '', ['n'], ['adj', 'n']),
suffixInflection('keit', '', ['n'], ['adj', 'n']),
],
},
},
};

View File

@@ -0,0 +1,42 @@
/*
* Copyright (C) 2025 Yomitan Authors
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
import {basicTextProcessorOptions} from '../text-processors.js';
/** @type {import('language').TextProcessor<boolean>} */
export const removeDoubleAcuteAccents = {
name: 'Remove double acute accents',
description: 'πρόσωπό → πρόσωπο',
options: basicTextProcessorOptions,
process: (str, setting) => {
return setting ? removeDoubleAcuteAccentsImpl(str) : str;
},
};
/**
* @param {string} word
* @returns {string}
*/
export function removeDoubleAcuteAccentsImpl(word) {
const ACUTE_ACCENT = '\u0301';
const decomposed = [...word.normalize('NFD')];
const firstIndex = decomposed.indexOf(ACUTE_ACCENT);
const updated = decomposed.filter((char, index) => char !== ACUTE_ACCENT || index === firstIndex);
return updated.join('').normalize('NFC');
}

View File

@@ -0,0 +1,292 @@
/*
* Copyright (C) 2024-2025 Yomitan Authors
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
import {prefixInflection, suffixInflection} from '../language-transforms.js';
/** @typedef {keyof typeof conditions} Condition */
/**
* @param {string} consonants
* @param {string} suffix
* @param {Condition[]} conditionsIn
* @param {Condition[]} conditionsOut
* @returns {import('language-transformer').SuffixRule<Condition>[]}
*/
function doubledConsonantInflection(consonants, suffix, conditionsIn, conditionsOut) {
const inflections = [];
for (const consonant of consonants) {
inflections.push(suffixInflection(`${consonant}${consonant}${suffix}`, consonant, conditionsIn, conditionsOut));
}
return inflections;
}
const pastSuffixInflections = [
suffixInflection('ed', '', ['v'], ['v']), // 'walked'
suffixInflection('ed', 'e', ['v'], ['v']), // 'hoped'
suffixInflection('ied', 'y', ['v'], ['v']), // 'tried'
suffixInflection('cked', 'c', ['v'], ['v']), // 'frolicked'
...doubledConsonantInflection('bdgklmnprstz', 'ed', ['v'], ['v']),
suffixInflection('laid', 'lay', ['v'], ['v']),
suffixInflection('paid', 'pay', ['v'], ['v']),
suffixInflection('said', 'say', ['v'], ['v']),
];
const ingSuffixInflections = [
suffixInflection('ing', '', ['v'], ['v']), // 'walking'
suffixInflection('ing', 'e', ['v'], ['v']), // 'driving'
suffixInflection('ying', 'ie', ['v'], ['v']), // 'lying'
suffixInflection('cking', 'c', ['v'], ['v']), // 'panicking'
...doubledConsonantInflection('bdgklmnprstz', 'ing', ['v'], ['v']),
];
const thirdPersonSgPresentSuffixInflections = [
suffixInflection('s', '', ['v'], ['v']), // 'walks'
suffixInflection('es', '', ['v'], ['v']), // 'teaches'
suffixInflection('ies', 'y', ['v'], ['v']), // 'tries'
];
const phrasalVerbParticles = ['aboard', 'about', 'above', 'across', 'ahead', 'alongside', 'apart', 'around', 'aside', 'astray', 'away', 'back', 'before', 'behind', 'below', 'beneath', 'besides', 'between', 'beyond', 'by', 'close', 'down', 'east', 'west', 'north', 'south', 'eastward', 'westward', 'northward', 'southward', 'forward', 'backward', 'backwards', 'forwards', 'home', 'in', 'inside', 'instead', 'near', 'off', 'on', 'opposite', 'out', 'outside', 'over', 'overhead', 'past', 'round', 'since', 'through', 'throughout', 'together', 'under', 'underneath', 'up', 'within', 'without'];
const phrasalVerbPrepositions = ['aback', 'about', 'above', 'across', 'after', 'against', 'ahead', 'along', 'among', 'apart', 'around', 'as', 'aside', 'at', 'away', 'back', 'before', 'behind', 'below', 'between', 'beyond', 'by', 'down', 'even', 'for', 'forth', 'forward', 'from', 'in', 'into', 'of', 'off', 'on', 'onto', 'open', 'out', 'over', 'past', 'round', 'through', 'to', 'together', 'toward', 'towards', 'under', 'up', 'upon', 'way', 'with', 'without'];
const particlesDisjunction = phrasalVerbParticles.join('|');
const phrasalVerbWordSet = new Set([...phrasalVerbParticles, ...phrasalVerbPrepositions]);
const phrasalVerbWordDisjunction = [...phrasalVerbWordSet].join('|');
/**
* @type {import('language-transformer').Rule<Condition>}
*/
const phrasalVerbInterposedObjectRule = {
type: 'other',
isInflected: new RegExp(`^\\w* (?:(?!\\b(${phrasalVerbWordDisjunction})\\b).)+ (?:${particlesDisjunction})`),
deinflect: (term) => {
return term.replace(new RegExp(`(?<=\\w) (?:(?!\\b(${phrasalVerbWordDisjunction})\\b).)+ (?=(?:${particlesDisjunction}))`), ' ');
},
conditionsIn: [],
conditionsOut: ['v_phr'],
};
/**
* @param {string} inflected
* @param {string} deinflected
* @returns {import('language-transformer').Rule<Condition>}
*/
function createPhrasalVerbInflection(inflected, deinflected) {
return {
type: 'other',
isInflected: new RegExp(`^\\w*${inflected} (?:${phrasalVerbWordDisjunction})`),
deinflect: (term) => {
return term.replace(new RegExp(`(?<=)${inflected}(?= (?:${phrasalVerbWordDisjunction}))`), deinflected);
},
conditionsIn: ['v'],
conditionsOut: ['v_phr'],
};
}
/**
* @param {import('language-transformer').SuffixRule<Condition>[]} sourceRules
* @returns {import('language-transformer').Rule<Condition>[]}
*/
function createPhrasalVerbInflectionsFromSuffixInflections(sourceRules) {
return sourceRules.flatMap(({isInflected, deinflected}) => {
if (typeof deinflected === 'undefined') { return []; }
const inflectedSuffix = isInflected.source.replace('$', '');
const deinflectedSuffix = deinflected;
return [createPhrasalVerbInflection(inflectedSuffix, deinflectedSuffix)];
});
}
const conditions = {
v: {
name: 'Verb',
isDictionaryForm: true,
subConditions: ['v_phr'],
},
v_phr: {
name: 'Phrasal verb',
isDictionaryForm: true,
},
n: {
name: 'Noun',
isDictionaryForm: true,
subConditions: ['np', 'ns'],
},
np: {
name: 'Noun plural',
isDictionaryForm: true,
},
ns: {
name: 'Noun singular',
isDictionaryForm: true,
},
adj: {
name: 'Adjective',
isDictionaryForm: true,
},
adv: {
name: 'Adverb',
isDictionaryForm: true,
},
};
/** @type {import('language-transformer').LanguageTransformDescriptor<Condition>} */
export const englishTransforms = {
language: 'en',
conditions,
transforms: {
'plural': {
name: 'plural',
description: 'Plural form of a noun',
rules: [
suffixInflection('s', '', ['np'], ['ns']),
suffixInflection('es', '', ['np'], ['ns']),
suffixInflection('ies', 'y', ['np'], ['ns']),
suffixInflection('ves', 'fe', ['np'], ['ns']),
suffixInflection('ves', 'f', ['np'], ['ns']),
],
},
'possessive': {
name: 'possessive',
description: 'Possessive form of a noun',
rules: [
suffixInflection('\'s', '', ['n'], ['n']),
suffixInflection('s\'', 's', ['n'], ['n']),
],
},
'past': {
name: 'past',
description: 'Simple past tense of a verb',
rules: [
...pastSuffixInflections,
...createPhrasalVerbInflectionsFromSuffixInflections(pastSuffixInflections),
],
},
'ing': {
name: 'ing',
description: 'Present participle of a verb',
rules: [
...ingSuffixInflections,
...createPhrasalVerbInflectionsFromSuffixInflections(ingSuffixInflections),
],
},
'3rd pers. sing. pres': {
name: '3rd pers. sing. pres',
description: 'Third person singular present tense of a verb',
rules: [
...thirdPersonSgPresentSuffixInflections,
...createPhrasalVerbInflectionsFromSuffixInflections(thirdPersonSgPresentSuffixInflections),
],
},
'interposed object': {
name: 'interposed object',
description: 'Phrasal verb with interposed object',
rules: [
phrasalVerbInterposedObjectRule,
],
},
'archaic': {
name: 'archaic',
description: 'Archaic form of a word',
rules: [
suffixInflection('\'d', 'ed', ['v'], ['v']),
],
},
'adverb': {
name: 'adverb',
description: 'Adverb form of an adjective',
rules: [
suffixInflection('ly', '', ['adv'], ['adj']), // 'quickly'
suffixInflection('ily', 'y', ['adv'], ['adj']), // 'happily'
suffixInflection('ly', 'le', ['adv'], ['adj']), // 'humbly'
],
},
'comparative': {
name: 'comparative',
description: 'Comparative form of an adjective',
rules: [
suffixInflection('er', '', ['adj'], ['adj']), // 'faster'
suffixInflection('er', 'e', ['adj'], ['adj']), // 'nicer'
suffixInflection('ier', 'y', ['adj'], ['adj']), // 'happier'
...doubledConsonantInflection('bdgmnt', 'er', ['adj'], ['adj']),
],
},
'superlative': {
name: 'superlative',
description: 'Superlative form of an adjective',
rules: [
suffixInflection('est', '', ['adj'], ['adj']), // 'fastest'
suffixInflection('est', 'e', ['adj'], ['adj']), // 'nicest'
suffixInflection('iest', 'y', ['adj'], ['adj']), // 'happiest'
...doubledConsonantInflection('bdgmnt', 'est', ['adj'], ['adj']),
],
},
'dropped g': {
name: 'dropped g',
description: 'Dropped g in -ing form of a verb',
rules: [
suffixInflection('in\'', 'ing', ['v'], ['v']),
],
},
'-y': {
name: '-y',
description: 'Adjective formed from a verb or noun',
rules: [
suffixInflection('y', '', ['adj'], ['n', 'v']), // 'dirty', 'pushy'
suffixInflection('y', 'e', ['adj'], ['n', 'v']), // 'hazy'
...doubledConsonantInflection('glmnprst', 'y', [], ['n', 'v']), // 'baggy', 'saggy'
],
},
'un-': {
name: 'un-',
description: 'Negative form of an adjective, adverb, or verb',
rules: [
prefixInflection('un', '', ['adj', 'adv', 'v'], ['adj', 'adv', 'v']),
],
},
'going-to future': {
name: 'going-to future',
description: 'Going-to future tense of a verb',
rules: [
prefixInflection('going to ', '', ['v'], ['v']),
],
},
'will future': {
name: 'will future',
description: 'Will-future tense of a verb',
rules: [
prefixInflection('will ', '', ['v'], ['v']),
],
},
'imperative negative': {
name: 'imperative negative',
description: 'Negative imperative form of a verb',
rules: [
prefixInflection('don\'t ', '', ['v'], ['v']),
prefixInflection('do not ', '', ['v'], ['v']),
],
},
'-able': {
name: '-able',
description: 'Adjective formed from a verb',
rules: [
suffixInflection('able', '', ['v'], ['adj']),
suffixInflection('able', 'e', ['v'], ['adj']),
suffixInflection('iable', 'y', ['v'], ['adj']),
...doubledConsonantInflection('bdgklmnprstz', 'able', ['v'], ['adj']),
],
},
},
};

View File

@@ -0,0 +1,316 @@
/*
* Copyright (C) 2024-2025 Yomitan Authors
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
import {prefixInflection, suffixInflection} from '../language-transforms.js';
const conditions = {
n: {
name: 'Noun',
isDictionaryForm: true,
},
adj: {
name: 'Adjective',
isDictionaryForm: true,
},
adv: {
name: 'Adverb',
isDictionaryForm: true,
},
v: {
name: 'Verb',
isDictionaryForm: true,
},
};
/** @type {import('language-transformer').LanguageTransformDescriptor<keyof typeof conditions>} */
export const esperantoTransforms = {
language: 'eo',
conditions,
transforms: {
// general inflections
'accusative': {
name: 'accusative',
description: 'Accusative form of a word',
rules: [
suffixInflection('n', '', [], []),
],
},
'plural': {
name: 'plural',
description: 'Plural form of a word',
rules: [
suffixInflection('j', '', [], []),
],
},
'diminutive': {
name: 'diminutive',
description: 'Diminutive form of a noun',
rules: [
suffixInflection('eto', 'o', [], ['n']),
],
},
'directional': {
name: 'directional',
description: [
'An adverb in accusative case indicates direction',
'kie: where',
'kien: to where',
].join('\n'),
rules: [
suffixInflection('en', 'e', [], ['adv']),
],
},
'locational': {
name: 'locational',
description: [
'A noun becoming an adverb indicates location',
'surpinto : peak, tip, top',
'surpinte: at the peak, at the tip, at the top',
'ĉambro : room',
'ĉambre: at the room, in the room',
].join('\n'),
rules: [
suffixInflection('e', 'o', [], ['n']),
],
},
'adjectival': {
name: 'adjectival',
description: 'Adjectival form of a noun',
rules: [
suffixInflection('a', 'o', [], ['n']),
],
},
'adverbial (adj -> adv)': {
name: 'adverbial',
description: 'Adverbial form of an adjective',
rules: [
suffixInflection('e', 'a', [], ['adj']),
],
},
'adverbial (v -> adv)': {
name: 'adverbial',
description: 'Adverbial form of a verb',
rules: [
suffixInflection('e', 'i', [], ['v']),
],
},
// suffixes
'-ejo (noun)': {
name: '-ejo',
description: [
'Suffix which turns a word into a place designed for that specific thing',
'kafo: coffee',
'kafejo: café',
].join('\n'),
rules: [
suffixInflection('ejo', 'o', [], ['n']),
],
},
'-ejo (verb)': {
name: '-ejo',
description: [
'Suffix which turns a word into a place designed for that specific action',
'kuiri: to cook',
'kuirejo: kitchen',
].join('\n'),
rules: [
suffixInflection('ejo', 'i', [], ['v']),
],
},
'-ujo (noun)': {
name: '-ujo',
description: [
'Suffix which turns a word into a box or container for that specific thing',
'abelo: a bee',
'abelujo: a beehive',
'',
'Suffix which turns a word into a place where a type of people can be found',
'patro: father',
'patrujo: fatherland',
'',
'Suffix which turns a flower or fruit into a plant or tree which the flower or fruit can be found in',
'pomo: an apple',
'pomujo: an apple tree',
].join('\n'),
rules: [
suffixInflection('ujo', 'o', [], ['n']),
],
},
'-ujo (adjective)': {
name: '-ujo',
description: [
'Suffix which turns a descriptive word into a box or container for that specific type of thing',
'frida: cold',
'fridujo: a refrigerator',
].join('\n'),
rules: [
suffixInflection('ujo', 'a', [], ['adj']),
],
},
'-ujo (verb)': {
name: '-ujo',
description: [
'Suffix which turns an action into a box or container for that specific type of activity',
'lavi: to wash',
'lavujo: a sink',
].join('\n'),
rules: [
suffixInflection('ujo', 'i', [], ['v']),
],
},
'-ebla': {
name: '-ebla',
description: [
'Suffix which shows possibility',
'kompreni: to understand',
'komprenebla: understandable',
].join('\n'),
rules: [
suffixInflection('ebla', 'i', [], ['v']),
],
},
'-ado': {
name: '-ado',
description: [
'Suffix which turns an action into a thing representing the action',
'vivi: to live',
'vivado: life',
'spiri: to breathe',
'spirado: respiration',
].join('\n'),
rules: [
suffixInflection('ado', 'i', [], ['v']),
],
},
// prefixes
'mal-': {
name: 'mal-',
description: 'Prefix which turns an action, description, thing, or direction into its opposite meaning',
rules: [
prefixInflection('mal', '', [], []),
],
},
'kun-': {
name: 'kun-',
description: [
'Prefix meaning to do the action together with other people',
'labori: to work',
'kunlabori: to collaborate',
].join('\n'),
rules: [
prefixInflection('kun', '', [], []),
],
},
'ekster-': {
name: 'ekster-',
description: [
'Prefix meaning "outside of"',
'lando: country',
'eksterlando: foreign country',
].join('\n'),
rules: [
prefixInflection('ekster', '', [], []),
],
},
'ek-': {
name: 'ek-',
description: [
'Prefix meaning to begin the action',
'kanti: to sing',
'ekkanti: to begin to sing',
].join('\n'),
rules: [
prefixInflection('ek', '', [], []),
],
},
'ĵus-': {
name: 'ĵus-',
description: [
'Prefix meaning something is newly or recently done',
'vekita: awakened',
'ĵusvekita: newly/recently awakened',
].join('\n'),
rules: [
prefixInflection('ĵus', '', [], []),
],
},
'el-': {
name: 'el-',
description: [
'Prefix meaning to do the action in an outward direction',
'tiri: to pull',
'eltiri: to pull out',
'',
'Prefix meaning to do the action all the way to completion',
'trinki: to drink',
'eltrinki: to drink up, to drink all of something',
'lerni: to learn',
'ellerni: to learn all that you can, to master',
'uzi: to use',
'eluzi: to use up, wear out',
].join('\n'),
rules: [
prefixInflection('el', '', [], []),
],
},
'dis-': {
name: 'dis-',
description: [
'Prefix meaning "separation, being apart, spreading out"',
'ŝvebi: to float',
'disŝvebi: to float out/separately',
'fali: to fall',
'disfali: to fall apart',
'doni: to give',
'disdoni: to give out, to distribute',
'sendo: a thing that is sent',
'dissendo: a broadcast',
].join('\n'),
rules: [
prefixInflection('dis', '', [], []),
],
},
'for-': {
name: 'for-',
description: [
'Prefix meaning "movement to a far distance"',
'flugi: to fly',
'forflugi: to fly away',
'',
'Prefix meaning "disappearance/annihilation"',
'uzi: to use',
'foruzi: to use up (until nothing is left)',
].join('\n'),
rules: [
prefixInflection('for', '', [], []),
],
},
'mis-': {
name: 'mis-',
description: [
'Prefix meaning "wrong, erroneous"',
'kompreni: to understand',
'miskompreni: to misunderstand',
'paroli: to speak',
'misparoli: to misspeak',
].join('\n'),
rules: [
prefixInflection('mis', '', [], []),
],
},
},
};

View File

@@ -0,0 +1,969 @@
/*
* Copyright (C) 2024-2025 Yomitan Authors
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
import {suffixInflection, wholeWordInflection} from '../language-transforms.js';
/** @typedef {keyof typeof conditions} Condition */
const REFLEXIVE_PATTERN = /\b(me|te|se|nos|os)\s+(\w+)(ar|er|ir)\b/g;
const ACCENTS = new Map([
['a', 'á'],
['e', 'é'],
['i', 'í'],
['o', 'ó'],
['u', 'ú'],
]);
/**
* @param {string} char
* @returns {string}
*/
function addAccent(char) {
return ACCENTS.get(char) || char;
}
const conditions = {
n: {
name: 'Noun',
isDictionaryForm: true,
subConditions: ['ns', 'np'],
},
np: {
name: 'Noun plural',
isDictionaryForm: false,
},
ns: {
name: 'Noun singular',
isDictionaryForm: false,
},
v: {
name: 'Verb',
isDictionaryForm: true,
subConditions: ['v_ar', 'v_er', 'v_ir'],
},
v_ar: {
name: '-ar verb',
isDictionaryForm: false,
},
v_er: {
name: '-er verb',
isDictionaryForm: false,
},
v_ir: {
name: '-ir verb',
isDictionaryForm: false,
},
adj: {
name: 'Adjective',
isDictionaryForm: true,
},
};
/** @type {import('language-transformer').LanguageTransformDescriptor<keyof typeof conditions>} */
export const spanishTransforms = {
language: 'es',
conditions,
transforms: {
'plural': {
name: 'plural',
description: 'Plural form of a noun',
rules: [
suffixInflection('s', '', ['np'], ['ns']),
suffixInflection('es', '', ['np'], ['ns']),
suffixInflection('ces', 'z', ['np'], ['ns']), // 'lápices' -> lápiz
...[...'aeiou'].map((v) => suffixInflection(`${v}ses`, `${addAccent(v)}s`, ['np'], ['ns'])), // 'autobuses' -> autobús
...[...'aeiou'].map((v) => suffixInflection(`${v}nes`, `${addAccent(v)}n`, ['np'], ['ns'])), // 'canciones' -> canción
],
},
'feminine adjective': {
name: 'feminine adjective',
description: 'feminine form of an adjective',
rules: [
suffixInflection('a', 'o', ['adj'], ['adj']),
suffixInflection('a', '', ['adj'], ['adj']), // encantadora -> encantador, española -> español
...[...'aeio'].map((v) => suffixInflection(`${v}na`, `${addAccent(v)}n`, ['adj'], ['adj'])), // dormilona -> dormilón, chiquitina -> chiquitín
...[...'aeio'].map((v) => suffixInflection(`${v}sa`, `${addAccent(v)}s`, ['adj'], ['adj'])), // francesa -> francés
],
},
'present indicative': {
name: 'present indicative',
description: 'Present indicative form of a verb',
rules: [
// STEM-CHANGING RULES FIRST
// e->ie for -ar
{
type: 'other',
isInflected: /ie([a-z]*)(o|as|a|an)$/,
deinflect: (term) => term.replace(/ie/, 'e').replace(/(o|as|a|an)$/, 'ar'),
conditionsIn: ['v_ar'],
conditionsOut: ['v_ar'],
},
// e->ie for -er
{
type: 'other',
isInflected: /ie([a-z]*)(o|es|e|en)$/,
deinflect: (term) => term.replace(/ie/, 'e').replace(/(o|es|e|en)$/, 'er'),
conditionsIn: ['v_er'],
conditionsOut: ['v_er'],
},
// e->ie for -ir
{
type: 'other',
isInflected: /ie([a-z]*)(o|es|e|en)$/,
deinflect: (term) => term.replace(/ie/, 'e').replace(/(o|es|e|en)$/, 'ir'),
conditionsIn: ['v_ir'],
conditionsOut: ['v_ir'],
},
// o->ue for -ar
{
type: 'other',
isInflected: /ue([a-z]*)(o|as|a|an)$/,
deinflect: (term) => {
// "jugar" (u->ue)
if (term.startsWith('jue')) {
return term.replace(/ue/, 'u').replace(/(o|as|a|an)$/, 'ar');
}
return term.replace(/ue/, 'o').replace(/(o|as|a|an)$/, 'ar');
},
conditionsIn: ['v_ar'],
conditionsOut: ['v_ar'],
},
// o->ue for -er
{
type: 'other',
isInflected: /ue([a-z]*)(o|es|e|en)$/,
deinflect: (term) => {
// "oler" (o->hue)
if (term.startsWith('hue')) {
return term.replace(/hue/, 'o').replace(/(o|es|e|en)$/, 'er');
}
return term.replace(/ue/, 'o').replace(/(o|es|e|en)$/, 'er');
},
conditionsIn: ['v_er'],
conditionsOut: ['v_er'],
},
// o->ue for -ir
{
type: 'other',
isInflected: /ue([a-z]*)(o|es|e|en)$/,
deinflect: (term) => term.replace(/ue/, 'o').replace(/(o|es|e|en)$/, 'ir'),
conditionsIn: ['v_ir'],
conditionsOut: ['v_ir'],
},
// e->i for -ir
{
type: 'other',
isInflected: /i([a-z]*)(o|es|e|en)$/,
deinflect: (term) => term.replace(/i/, 'e').replace(/(o|es|e|en)$/, 'ir'),
conditionsIn: ['v_ir'],
conditionsOut: ['v_ir'],
},
// -ar verbs
suffixInflection('o', 'ar', ['v_ar'], ['v_ar']),
suffixInflection('as', 'ar', ['v_ar'], ['v_ar']),
suffixInflection('a', 'ar', ['v_ar'], ['v_ar']),
suffixInflection('amos', 'ar', ['v_ar'], ['v_ar']),
suffixInflection('áis', 'ar', ['v_ar'], ['v_ar']),
suffixInflection('an', 'ar', ['v_ar'], ['v_ar']),
// -er verbs
suffixInflection('o', 'er', ['v_er'], ['v_er']),
suffixInflection('es', 'er', ['v_er'], ['v_er']),
suffixInflection('e', 'er', ['v_er'], ['v_er']),
suffixInflection('emos', 'er', ['v_er'], ['v_er']),
suffixInflection('éis', 'er', ['v_er'], ['v_er']),
suffixInflection('en', 'er', ['v_er'], ['v_er']),
// -ir verbs
suffixInflection('o', 'ir', ['v_ir'], ['v_ir']),
suffixInflection('es', 'ir', ['v_ir'], ['v_ir']),
suffixInflection('e', 'ir', ['v_ir'], ['v_ir']),
suffixInflection('imos', 'ir', ['v_ir'], ['v_ir']),
suffixInflection('ís', 'ir', ['v_ir'], ['v_ir']),
suffixInflection('en', 'ir', ['v_ir'], ['v_ir']),
// i -> y verbs (incluir, huir, construir...)
suffixInflection('uyo', 'uir', ['v_ir'], ['v_ir']),
suffixInflection('uyes', 'uir', ['v_ir'], ['v_ir']),
suffixInflection('uye', 'uir', ['v_ir'], ['v_ir']),
suffixInflection('uyen', 'uir', ['v_ir'], ['v_ir']),
// -tener verbs
suffixInflection('tengo', 'tener', ['v'], ['v']),
suffixInflection('tienes', 'tener', ['v'], ['v']),
suffixInflection('tiene', 'tener', ['v'], ['v']),
suffixInflection('tenemos', 'tener', ['v'], ['v']),
suffixInflection('tenéis', 'tener', ['v'], ['v']),
suffixInflection('tienen', 'tener', ['v'], ['v']),
// -oír verbs
suffixInflection('oigo', 'oír', ['v'], ['v']),
suffixInflection('oyes', 'oír', ['v'], ['v']),
suffixInflection('oye', 'oír', ['v'], ['v']),
suffixInflection('oímos', 'oír', ['v'], ['v']),
suffixInflection('oís', 'oír', ['v'], ['v']),
suffixInflection('oyen', 'oír', ['v'], ['v']),
// -venir verbs
suffixInflection('vengo', 'venir', ['v'], ['v']),
suffixInflection('vienes', 'venir', ['v'], ['v']),
suffixInflection('viene', 'venir', ['v'], ['v']),
suffixInflection('venimos', 'venir', ['v'], ['v']),
suffixInflection('venís', 'venir', ['v'], ['v']),
suffixInflection('vienen', 'venir', ['v'], ['v']),
// Verbs with Irregular Yo Forms
// -guir, -ger, or -gir verbs
suffixInflection('go', 'guir', ['v'], ['v']),
suffixInflection('jo', 'ger', ['v'], ['v']),
suffixInflection('jo', 'gir', ['v'], ['v']),
suffixInflection('aigo', 'aer', ['v'], ['v']),
suffixInflection('zco', 'cer', ['v'], ['v']),
suffixInflection('zco', 'cir', ['v'], ['v']),
suffixInflection('hago', 'hacer', ['v'], ['v']),
suffixInflection('pongo', 'poner', ['v'], ['v']),
suffixInflection('lgo', 'lir', ['v'], ['v']),
suffixInflection('lgo', 'ler', ['v'], ['v']),
wholeWordInflection('quepo', 'caber', ['v'], ['v']),
wholeWordInflection('doy', 'dar', ['v'], ['v']),
wholeWordInflection('sé', 'saber', ['v'], ['v']),
wholeWordInflection('veo', 'ver', ['v'], ['v']),
// Ser, estar, ir, haber
wholeWordInflection('soy', 'ser', ['v'], ['v']),
wholeWordInflection('eres', 'ser', ['v'], ['v']),
wholeWordInflection('es', 'ser', ['v'], ['v']),
wholeWordInflection('somos', 'ser', ['v'], ['v']),
wholeWordInflection('sois', 'ser', ['v'], ['v']),
wholeWordInflection('son', 'ser', ['v'], ['v']),
wholeWordInflection('estoy', 'estar', ['v'], ['v']),
wholeWordInflection('estás', 'estar', ['v'], ['v']),
wholeWordInflection('está', 'estar', ['v'], ['v']),
wholeWordInflection('estamos', 'estar', ['v'], ['v']),
wholeWordInflection('estáis', 'estar', ['v'], ['v']),
wholeWordInflection('están', 'estar', ['v'], ['v']),
wholeWordInflection('voy', 'ir', ['v'], ['v']),
wholeWordInflection('vas', 'ir', ['v'], ['v']),
wholeWordInflection('va', 'ir', ['v'], ['v']),
wholeWordInflection('vamos', 'ir', ['v'], ['v']),
wholeWordInflection('vais', 'ir', ['v'], ['v']),
wholeWordInflection('van', 'ir', ['v'], ['v']),
wholeWordInflection('he', 'haber', ['v'], ['v']),
wholeWordInflection('has', 'haber', ['v'], ['v']),
wholeWordInflection('ha', 'haber', ['v'], ['v']),
wholeWordInflection('hemos', 'haber', ['v'], ['v']),
wholeWordInflection('habéis', 'haber', ['v'], ['v']),
wholeWordInflection('han', 'haber', ['v'], ['v']),
],
},
'preterite': {
name: 'preterite',
description: 'Preterite (past) form of a verb',
rules: [
// e->i for -ir
{
type: 'other',
isInflected: /i([a-z]*)(ió|ieron)$/, // this only happens in 3rd person - singular and plural
deinflect: (term) => term.replace(/i/, 'e').replace(/(ió|ieron)$/, 'ir'),
conditionsIn: ['v_ir'],
conditionsOut: ['v_ir'],
},
// o->u for -ir
{
type: 'other',
isInflected: /u([a-z]*)(ió|ieron)$/,
deinflect: (term) => term.replace(/u/, 'o').replace(/(ió|ieron)$/, 'ir'),
conditionsIn: ['v_ir'],
conditionsOut: ['v_ir'],
},
// -ar verbs
suffixInflection('é', 'ar', ['v_ar'], ['v_ar']),
suffixInflection('aste', 'ar', ['v_ar'], ['v_ar']),
suffixInflection('ó', 'ar', ['v_ar'], ['v_ar']),
suffixInflection('amos', 'ar', ['v_ar'], ['v_ar']),
suffixInflection('asteis', 'ar', ['v_ar'], ['v_ar']),
suffixInflection('aron', 'ar', ['v_ar'], ['v_ar']),
// -er verbs
suffixInflection('í', 'er', ['v_er'], ['v_er']),
suffixInflection('iste', 'er', ['v_er'], ['v_er']),
suffixInflection('ió', 'er', ['v_er'], ['v_er']),
suffixInflection('imos', 'er', ['v_er'], ['v_er']),
suffixInflection('isteis', 'er', ['v_er'], ['v_er']),
suffixInflection('ieron', 'er', ['v_er'], ['v_er']),
// -ir verbs
suffixInflection('í', 'ir', ['v_ir'], ['v_ir']),
suffixInflection('iste', 'ir', ['v_ir'], ['v_ir']),
suffixInflection('ió', 'ir', ['v_ir'], ['v_ir']),
suffixInflection('imos', 'ir', ['v_ir'], ['v_ir']),
suffixInflection('isteis', 'ir', ['v_ir'], ['v_ir']),
suffixInflection('ieron', 'ir', ['v_ir'], ['v_ir']),
// -car, -gar, -zar verbs
suffixInflection('qué', 'car', ['v'], ['v']),
suffixInflection('gué', 'gar', ['v'], ['v']),
suffixInflection('cé', 'zar', ['v'], ['v']),
// -uir verbs
suffixInflection('í', 'uir', ['v'], ['v']),
// Verbs with irregular forms
wholeWordInflection('fui', 'ser', ['v'], ['v']),
wholeWordInflection('fuiste', 'ser', ['v'], ['v']),
wholeWordInflection('fue', 'ser', ['v'], ['v']),
wholeWordInflection('fuimos', 'ser', ['v'], ['v']),
wholeWordInflection('fuisteis', 'ser', ['v'], ['v']),
wholeWordInflection('fueron', 'ser', ['v'], ['v']),
wholeWordInflection('fui', 'ir', ['v'], ['v']),
wholeWordInflection('fuiste', 'ir', ['v'], ['v']),
wholeWordInflection('fue', 'ir', ['v'], ['v']),
wholeWordInflection('fuimos', 'ir', ['v'], ['v']),
wholeWordInflection('fuisteis', 'ir', ['v'], ['v']),
wholeWordInflection('fueron', 'ir', ['v'], ['v']),
wholeWordInflection('di', 'dar', ['v'], ['v']),
wholeWordInflection('diste', 'dar', ['v'], ['v']),
wholeWordInflection('dio', 'dar', ['v'], ['v']),
wholeWordInflection('dimos', 'dar', ['v'], ['v']),
wholeWordInflection('disteis', 'dar', ['v'], ['v']),
wholeWordInflection('dieron', 'dar', ['v'], ['v']),
suffixInflection('hice', 'hacer', ['v'], ['v']),
suffixInflection('hiciste', 'hacer', ['v'], ['v']),
suffixInflection('hizo', 'hacer', ['v'], ['v']),
suffixInflection('hicimos', 'hacer', ['v'], ['v']),
suffixInflection('hicisteis', 'hacer', ['v'], ['v']),
suffixInflection('hicieron', 'hacer', ['v'], ['v']),
suffixInflection('puse', 'poner', ['v'], ['v']),
suffixInflection('pusiste', 'poner', ['v'], ['v']),
suffixInflection('puso', 'poner', ['v'], ['v']),
suffixInflection('pusimos', 'poner', ['v'], ['v']),
suffixInflection('pusisteis', 'poner', ['v'], ['v']),
suffixInflection('pusieron', 'poner', ['v'], ['v']),
suffixInflection('dije', 'decir', ['v'], ['v']),
suffixInflection('dijiste', 'decir', ['v'], ['v']),
suffixInflection('dijo', 'decir', ['v'], ['v']),
suffixInflection('dijimos', 'decir', ['v'], ['v']),
suffixInflection('dijisteis', 'decir', ['v'], ['v']),
suffixInflection('dijeron', 'decir', ['v'], ['v']),
suffixInflection('vine', 'venir', ['v'], ['v']),
suffixInflection('viniste', 'venir', ['v'], ['v']),
suffixInflection('vino', 'venir', ['v'], ['v']),
suffixInflection('vinimos', 'venir', ['v'], ['v']),
suffixInflection('vinisteis', 'venir', ['v'], ['v']),
suffixInflection('vinieron', 'venir', ['v'], ['v']),
wholeWordInflection('quise', 'querer', ['v'], ['v']),
wholeWordInflection('quisiste', 'querer', ['v'], ['v']),
wholeWordInflection('quiso', 'querer', ['v'], ['v']),
wholeWordInflection('quisimos', 'querer', ['v'], ['v']),
wholeWordInflection('quisisteis', 'querer', ['v'], ['v']),
wholeWordInflection('quisieron', 'querer', ['v'], ['v']),
suffixInflection('tuve', 'tener', ['v'], ['v']),
suffixInflection('tuviste', 'tener', ['v'], ['v']),
suffixInflection('tuvo', 'tener', ['v'], ['v']),
suffixInflection('tuvimos', 'tener', ['v'], ['v']),
suffixInflection('tuvisteis', 'tener', ['v'], ['v']),
suffixInflection('tuvieron', 'tener', ['v'], ['v']),
wholeWordInflection('pude', 'poder', ['v'], ['v']),
wholeWordInflection('pudiste', 'poder', ['v'], ['v']),
wholeWordInflection('pudo', 'poder', ['v'], ['v']),
wholeWordInflection('pudimos', 'poder', ['v'], ['v']),
wholeWordInflection('pudisteis', 'poder', ['v'], ['v']),
wholeWordInflection('pudieron', 'poder', ['v'], ['v']),
wholeWordInflection('supe', 'saber', ['v'], ['v']),
wholeWordInflection('supiste', 'saber', ['v'], ['v']),
wholeWordInflection('supo', 'saber', ['v'], ['v']),
wholeWordInflection('supimos', 'saber', ['v'], ['v']),
wholeWordInflection('supisteis', 'saber', ['v'], ['v']),
wholeWordInflection('supieron', 'saber', ['v'], ['v']),
wholeWordInflection('estuve', 'estar', ['v'], ['v']),
wholeWordInflection('estuviste', 'estar', ['v'], ['v']),
wholeWordInflection('estuvo', 'estar', ['v'], ['v']),
wholeWordInflection('estuvimos', 'estar', ['v'], ['v']),
wholeWordInflection('estuvisteis', 'estar', ['v'], ['v']),
wholeWordInflection('estuvieron', 'estar', ['v'], ['v']),
wholeWordInflection('anduve', 'andar', ['v'], ['v']),
wholeWordInflection('anduviste', 'andar', ['v'], ['v']),
wholeWordInflection('anduvo', 'andar', ['v'], ['v']),
wholeWordInflection('anduvimos', 'andar', ['v'], ['v']),
wholeWordInflection('anduvisteis', 'andar', ['v'], ['v']),
wholeWordInflection('anduvieron', 'andar', ['v'], ['v']),
],
},
'imperfect': {
name: 'imperfect',
description: 'Imperfect form of a verb',
rules: [
// -ar verbs
suffixInflection('aba', 'ar', ['v_ar'], ['v_ar']),
suffixInflection('abas', 'ar', ['v_ar'], ['v_ar']),
suffixInflection('aba', 'ar', ['v_ar'], ['v_ar']),
suffixInflection('ábamos', 'ar', ['v_ar'], ['v_ar']),
suffixInflection('abais', 'ar', ['v_ar'], ['v_ar']),
suffixInflection('aban', 'ar', ['v_ar'], ['v_ar']),
// -er verbs
suffixInflection('ía', 'er', ['v_er'], ['v_er']),
suffixInflection('ías', 'er', ['v_er'], ['v_er']),
suffixInflection('ía', 'er', ['v_er'], ['v_er']),
suffixInflection('íamos', 'er', ['v_er'], ['v_er']),
suffixInflection('íais', 'er', ['v_er'], ['v_er']),
suffixInflection('ían', 'er', ['v_er'], ['v_er']),
// -ir verbs
suffixInflection('ía', 'ir', ['v_ir'], ['v_ir']),
suffixInflection('ías', 'ir', ['v_ir'], ['v_ir']),
suffixInflection('ía', 'ir', ['v_ir'], ['v_ir']),
suffixInflection('íamos', 'ir', ['v_ir'], ['v_ir']),
suffixInflection('íais', 'ir', ['v_ir'], ['v_ir']),
suffixInflection('ían', 'ir', ['v_ir'], ['v_ir']),
// -ir verbs with stem changes
suffixInflection('eía', 'ir', ['v_ir'], ['v_ir']),
suffixInflection('eías', 'ir', ['v_ir'], ['v_ir']),
suffixInflection('eía', 'ir', ['v_ir'], ['v_ir']),
suffixInflection('eíamos', 'ir', ['v_ir'], ['v_ir']),
suffixInflection('eíais', 'ir', ['v_ir'], ['v_ir']),
suffixInflection('eían', 'ir', ['v_ir'], ['v_ir']),
// irregular verbs ir, ser, ver
wholeWordInflection('era', 'ser', ['v'], ['v']),
wholeWordInflection('eras', 'ser', ['v'], ['v']),
wholeWordInflection('era', 'ser', ['v'], ['v']),
wholeWordInflection('éramos', 'ser', ['v'], ['v']),
wholeWordInflection('erais', 'ser', ['v'], ['v']),
wholeWordInflection('eran', 'ser', ['v'], ['v']),
wholeWordInflection('iba', 'ir', ['v'], ['v']),
wholeWordInflection('ibas', 'ir', ['v'], ['v']),
wholeWordInflection('iba', 'ir', ['v'], ['v']),
wholeWordInflection('íbamos', 'ir', ['v'], ['v']),
wholeWordInflection('ibais', 'ir', ['v'], ['v']),
wholeWordInflection('iban', 'ir', ['v'], ['v']),
wholeWordInflection('veía', 'ver', ['v'], ['v']),
wholeWordInflection('veías', 'ver', ['v'], ['v']),
wholeWordInflection('veía', 'ver', ['v'], ['v']),
wholeWordInflection('veíamos', 'ver', ['v'], ['v']),
wholeWordInflection('veíais', 'ver', ['v'], ['v']),
wholeWordInflection('veían', 'ver', ['v'], ['v']),
],
},
'progressive': {
name: 'progressive',
description: 'Progressive form of a verb',
rules: [
// e->i for -ir
{
type: 'other',
isInflected: /i([a-z]*)(iendo)$/,
deinflect: (term) => term.replace(/i/, 'e').replace(/(iendo)$/, 'ir'),
conditionsIn: ['v_ir'],
conditionsOut: ['v_ir'],
},
// o->u for -er
{
type: 'other',
isInflected: /u([a-z]*)(iendo)$/,
deinflect: (term) => term.replace(/u/, 'o').replace(/(iendo)$/, 'er'),
conditionsIn: ['v_er'],
conditionsOut: ['v_er'],
},
// o->u for -ir
{
type: 'other',
isInflected: /u([a-z]*)(iendo)$/,
deinflect: (term) => term.replace(/u/, 'o').replace(/(iendo)$/, 'ir'),
conditionsIn: ['v_ir'],
conditionsOut: ['v_ir'],
},
// regular
suffixInflection('ando', 'ar', ['v_ar'], ['v_ar']),
suffixInflection('iendo', 'er', ['v_er'], ['v_er']),
suffixInflection('iendo', 'ir', ['v_ir'], ['v_ir']),
// vowel before the ending (-yendo)
suffixInflection('ayendo', 'aer', ['v_er'], ['v_er']), // traer -> trayendo, caer -> cayendo
suffixInflection('eyendo', 'eer', ['v_er'], ['v_er']), // leer -> leyendo
suffixInflection('uyendo', 'uir', ['v_ir'], ['v_ir']), // huir -> huyendo
// irregular
wholeWordInflection('oyendo', 'oír', ['v'], ['v']),
wholeWordInflection('yendo', 'ir', ['v'], ['v']),
],
},
'imperative': {
name: 'imperative',
description: 'Imperative form of a verb',
rules: [
{
type: 'other',
isInflected: /ie([a-z]*)(a|e|en)$/,
deinflect: (term) => term.replace(/ie/, 'e').replace(/(a|e|en)$/, 'ar'),
conditionsIn: ['v_ar'],
conditionsOut: ['v_ar'],
},
{
type: 'other',
isInflected: /ie([a-z]*)(e|a|an)$/,
deinflect: (term) => term.replace(/ie/, 'e').replace(/(e|a|an)$/, 'er'),
conditionsIn: ['v_er'],
conditionsOut: ['v_er'],
},
{
type: 'other',
isInflected: /ie([a-z]*)(e|a|an)$/,
deinflect: (term) => term.replace(/ie/, 'e').replace(/(e|a|an)$/, 'ir'),
conditionsIn: ['v_ir'],
conditionsOut: ['v_ir'],
},
{
type: 'other',
isInflected: /ue([a-z]*)(a|e|en)$/,
deinflect: (term) => {
if (term.startsWith('jue')) {
return term.replace(/ue/, 'u').replace(/(a|ue|uen)$/, 'ar');
}
return term.replace(/ue/, 'o').replace(/(a|e|en)$/, 'ar');
},
conditionsIn: ['v_ar'],
conditionsOut: ['v_ar'],
},
{
type: 'other',
isInflected: /ue([a-z]*)(e|a|an)$/,
deinflect: (term) => {
if (term.startsWith('hue')) {
return term.replace(/hue/, 'o').replace(/(e|a|an)$/, 'er');
}
return term.replace(/ue/, 'o').replace(/(e|a|an)$/, 'er');
},
conditionsIn: ['v_er'],
conditionsOut: ['v_er'],
},
{
type: 'other',
isInflected: /ue([a-z]*)(e|a|an)$/,
deinflect: (term) => term.replace(/ue/, 'o').replace(/(e|a|an)$/, 'ir'),
conditionsIn: ['v_ir'],
conditionsOut: ['v_ir'],
},
{
type: 'other',
isInflected: /i([a-z]*)(e|a|an)$/,
deinflect: (term) => term.replace(/i/, 'e').replace(/(e|a|an)$/, 'ir'),
conditionsIn: ['v_ir'],
conditionsOut: ['v_ir'],
},
// -ar verbs
suffixInflection('a', 'ar', ['v_ar'], ['v_ar']),
suffixInflection('emos', 'ar', ['v_ar'], ['v_ar']),
suffixInflection('ad', 'ar', ['v_ar'], ['v_ar']),
// -er verbs
suffixInflection('e', 'er', ['v_er'], ['v_er']),
suffixInflection('amos', 'ar', ['v_er'], ['v_er']),
suffixInflection('ed', 'er', ['v_er'], ['v_er']),
// -ir verbs
suffixInflection('e', 'ir', ['v_ir'], ['v_ir']),
suffixInflection('amos', 'ar', ['v_ir'], ['v_ir']),
suffixInflection('id', 'ir', ['v_ir'], ['v_ir']),
// irregular verbs
wholeWordInflection('diga', 'decir', ['v'], ['v']),
wholeWordInflection('sé', 'ser', ['v'], ['v']),
wholeWordInflection('ve', 'ir', ['v'], ['v']),
wholeWordInflection('ten', 'tener', ['v'], ['v']),
wholeWordInflection('ven', 'venir', ['v'], ['v']),
wholeWordInflection('haz', 'hacer', ['v'], ['v']),
wholeWordInflection('di', 'decir', ['v'], ['v']),
wholeWordInflection('pon', 'poner', ['v'], ['v']),
wholeWordInflection('sal', 'salir', ['v'], ['v']),
// negative commands
// -ar verbs
suffixInflection('es', 'ar', ['v_ar'], ['v_ar']),
suffixInflection('emos', 'ar', ['v_ar'], ['v_ar']),
suffixInflection('éis', 'ar', ['v_ar'], ['v_ar']),
// -er verbs
suffixInflection('as', 'er', ['v_er'], ['v_er']),
suffixInflection('amos', 'er', ['v_er'], ['v_er']),
suffixInflection('áis', 'er', ['v_er'], ['v_er']),
// -ir verbs
suffixInflection('as', 'ir', ['v_ir'], ['v_ir']),
suffixInflection('amos', 'ir', ['v_ir'], ['v_ir']),
suffixInflection('áis', 'ir', ['v_ir'], ['v_ir']),
],
},
'conditional': {
name: 'conditional',
description: 'Conditional form of a verb',
rules: [
suffixInflection('ía', '', ['v'], ['v']),
suffixInflection('ías', '', ['v'], ['v']),
suffixInflection('ía', '', ['v'], ['v']),
suffixInflection('íamos', '', ['v'], ['v']),
suffixInflection('íais', '', ['v'], ['v']),
suffixInflection('ían', '', ['v'], ['v']),
// irregular verbs
wholeWordInflection('diría', 'decir', ['v'], ['v']),
wholeWordInflection('dirías', 'decir', ['v'], ['v']),
wholeWordInflection('diría', 'decir', ['v'], ['v']),
wholeWordInflection('diríamos', 'decir', ['v'], ['v']),
wholeWordInflection('diríais', 'decir', ['v'], ['v']),
wholeWordInflection('dirían', 'decir', ['v'], ['v']),
wholeWordInflection('haría', 'hacer', ['v'], ['v']),
wholeWordInflection('harías', 'hacer', ['v'], ['v']),
wholeWordInflection('haría', 'hacer', ['v'], ['v']),
wholeWordInflection('haríamos', 'hacer', ['v'], ['v']),
wholeWordInflection('haríais', 'hacer', ['v'], ['v']),
wholeWordInflection('harían', 'hacer', ['v'], ['v']),
wholeWordInflection('pondría', 'poner', ['v'], ['v']),
wholeWordInflection('pondrías', 'poner', ['v'], ['v']),
wholeWordInflection('pondría', 'poner', ['v'], ['v']),
wholeWordInflection('pondríamos', 'poner', ['v'], ['v']),
wholeWordInflection('pondríais', 'poner', ['v'], ['v']),
wholeWordInflection('pondrían', 'poner', ['v'], ['v']),
wholeWordInflection('saldría', 'salir', ['v'], ['v']),
wholeWordInflection('saldrías', 'salir', ['v'], ['v']),
wholeWordInflection('saldría', 'salir', ['v'], ['v']),
wholeWordInflection('saldríamos', 'salir', ['v'], ['v']),
wholeWordInflection('saldríais', 'salir', ['v'], ['v']),
wholeWordInflection('saldrían', 'salir', ['v'], ['v']),
wholeWordInflection('tendría', 'tener', ['v'], ['v']),
wholeWordInflection('tendrías', 'tener', ['v'], ['v']),
wholeWordInflection('tendría', 'tener', ['v'], ['v']),
wholeWordInflection('tendríamos', 'tener', ['v'], ['v']),
wholeWordInflection('tendríais', 'tener', ['v'], ['v']),
wholeWordInflection('tendrían', 'tener', ['v'], ['v']),
wholeWordInflection('vendría', 'venir', ['v'], ['v']),
wholeWordInflection('vendrías', 'venir', ['v'], ['v']),
wholeWordInflection('vendría', 'venir', ['v'], ['v']),
wholeWordInflection('vendríamos', 'venir', ['v'], ['v']),
wholeWordInflection('vendríais', 'venir', ['v'], ['v']),
wholeWordInflection('vendrían', 'venir', ['v'], ['v']),
wholeWordInflection('querría', 'querer', ['v'], ['v']),
wholeWordInflection('querrías', 'querer', ['v'], ['v']),
wholeWordInflection('querría', 'querer', ['v'], ['v']),
wholeWordInflection('querríamos', 'querer', ['v'], ['v']),
wholeWordInflection('querríais', 'querer', ['v'], ['v']),
wholeWordInflection('querrían', 'querer', ['v'], ['v']),
wholeWordInflection('podría', 'poder', ['v'], ['v']),
wholeWordInflection('podrías', 'poder', ['v'], ['v']),
wholeWordInflection('podría', 'poder', ['v'], ['v']),
wholeWordInflection('podríamos', 'poder', ['v'], ['v']),
wholeWordInflection('podríais', 'poder', ['v'], ['v']),
wholeWordInflection('podrían', 'poder', ['v'], ['v']),
wholeWordInflection('sabría', 'saber', ['v'], ['v']),
wholeWordInflection('sabrías', 'saber', ['v'], ['v']),
wholeWordInflection('sabría', 'saber', ['v'], ['v']),
wholeWordInflection('sabríamos', 'saber', ['v'], ['v']),
wholeWordInflection('sabríais', 'saber', ['v'], ['v']),
wholeWordInflection('sabrían', 'saber', ['v'], ['v']),
],
},
'future': {
name: 'future',
description: 'Future form of a verb',
rules: [
suffixInflection('é', '', ['v'], ['v']),
suffixInflection('ás', '', ['v'], ['v']),
suffixInflection('á', '', ['v'], ['v']),
suffixInflection('emos', '', ['v'], ['v']),
suffixInflection('éis', '', ['v'], ['v']),
suffixInflection('án', '', ['v'], ['v']),
// irregular verbs
suffixInflection('diré', 'decir', ['v'], ['v']),
suffixInflection('dirás', 'decir', ['v'], ['v']),
suffixInflection('dirá', 'decir', ['v'], ['v']),
suffixInflection('diremos', 'decir', ['v'], ['v']),
suffixInflection('diréis', 'decir', ['v'], ['v']),
suffixInflection('dirán', 'decir', ['v'], ['v']),
wholeWordInflection('haré', 'hacer', ['v'], ['v']),
wholeWordInflection('harás', 'hacer', ['v'], ['v']),
wholeWordInflection('hará', 'hacer', ['v'], ['v']),
wholeWordInflection('haremos', 'hacer', ['v'], ['v']),
wholeWordInflection('haréis', 'hacer', ['v'], ['v']),
wholeWordInflection('harán', 'hacer', ['v'], ['v']),
suffixInflection('pondré', 'poner', ['v'], ['v']),
suffixInflection('pondrás', 'poner', ['v'], ['v']),
suffixInflection('pondrá', 'poner', ['v'], ['v']),
suffixInflection('pondremos', 'poner', ['v'], ['v']),
suffixInflection('pondréis', 'poner', ['v'], ['v']),
suffixInflection('pondrán', 'poner', ['v'], ['v']),
wholeWordInflection('saldré', 'salir', ['v'], ['v']),
wholeWordInflection('saldrás', 'salir', ['v'], ['v']),
wholeWordInflection('saldrá', 'salir', ['v'], ['v']),
wholeWordInflection('saldremos', 'salir', ['v'], ['v']),
wholeWordInflection('saldréis', 'salir', ['v'], ['v']),
wholeWordInflection('saldrán', 'salir', ['v'], ['v']),
suffixInflection('tendré', 'tener', ['v'], ['v']),
suffixInflection('tendrás', 'tener', ['v'], ['v']),
suffixInflection('tendrá', 'tener', ['v'], ['v']),
suffixInflection('tendremos', 'tener', ['v'], ['v']),
suffixInflection('tendréis', 'tener', ['v'], ['v']),
suffixInflection('tendrán', 'tener', ['v'], ['v']),
suffixInflection('vendré', 'venir', ['v'], ['v']),
suffixInflection('vendrás', 'venir', ['v'], ['v']),
suffixInflection('vendrá', 'venir', ['v'], ['v']),
suffixInflection('vendremos', 'venir', ['v'], ['v']),
suffixInflection('vendréis', 'venir', ['v'], ['v']),
suffixInflection('vendrán', 'venir', ['v'], ['v']),
],
},
'present subjunctive': {
name: 'present subjunctive',
description: 'Present subjunctive form of a verb',
rules: [
// STEM-CHANGING RULES FIRST
// e->ie for -ar
{
type: 'other',
isInflected: /ie([a-z]*)(e|es|e|en)$/,
deinflect: (term) => term.replace(/ie/, 'e').replace(/(e|es|e|en)$/, 'ar'),
conditionsIn: ['v_ar'],
conditionsOut: ['v_ar'],
},
// e->ie for -er
{
type: 'other',
isInflected: /ie([a-z]*)(a|as|a|an)$/,
deinflect: (term) => term.replace(/ie/, 'e').replace(/(a|as|a|an)$/, 'er'),
conditionsIn: ['v_er'],
conditionsOut: ['v_er'],
},
// e->ie for -ir
{
type: 'other',
isInflected: /ie([a-z]*)(a|as|a|an)$/,
deinflect: (term) => term.replace(/ie/, 'e').replace(/(a|as|a|an)$/, 'ir'),
conditionsIn: ['v_ir'],
conditionsOut: ['v_ir'],
},
// o->ue for -ar
{
type: 'other',
isInflected: /ue([a-z]*)(e|es|e|en)$/,
deinflect: (term) => {
// "jugar" (u->ue)
if (term.startsWith('jue')) {
return term.replace(/ue/, 'u').replace(/(ue|ues|ue|uen)$/, 'ar');
}
return term.replace(/ue/, 'o').replace(/(e|es|e|en)$/, 'ar');
},
conditionsIn: ['v_ar'],
conditionsOut: ['v_ar'],
},
// o->ue for -er
{
type: 'other',
isInflected: /ue([a-z]*)(a|as|a|an)$/,
deinflect: (term) => {
if (term.startsWith('hue')) {
return term.replace(/hue/, 'o').replace(/(a|as|a|an)$/, 'er');
}
return term.replace(/ue/, 'o').replace(/(a|as|a|an)$/, 'er');
},
conditionsIn: ['v_er'],
conditionsOut: ['v_er'],
},
// o->ue for -ir
{
type: 'other',
isInflected: /ue([a-z]*)(a|as|a|an)$/,
deinflect: (term) => term.replace(/ue/, 'o').replace(/(a|as|a|an)$/, 'ir'),
conditionsIn: ['v_ir'],
conditionsOut: ['v_ir'],
},
// e->i for -ir
{
type: 'other',
isInflected: /i([a-z]*)(a|as|a|an)$/,
deinflect: (term) => term.replace(/i/, 'e').replace(/(a|as|a|an)$/, 'ir'),
conditionsIn: ['v_ir'],
conditionsOut: ['v_ir'],
},
// -ar verbs
suffixInflection('e', 'ar', ['v_ar'], ['v_ar']),
suffixInflection('es', 'ar', ['v_ar'], ['v_ar']),
suffixInflection('e', 'ar', ['v_ar'], ['v_ar']),
suffixInflection('emos', 'ar', ['v_ar'], ['v_ar']),
suffixInflection('éis', 'ar', ['v_ar'], ['v_ar']),
suffixInflection('en', 'ar', ['v_ar'], ['v_ar']),
// -er verbs
suffixInflection('a', 'er', ['v_er'], ['v_er']),
suffixInflection('as', 'er', ['v_er'], ['v_er']),
suffixInflection('a', 'er', ['v_er'], ['v_er']),
suffixInflection('amos', 'er', ['v_er'], ['v_er']),
suffixInflection('áis', 'er', ['v_er'], ['v_er']),
suffixInflection('an', 'er', ['v_er'], ['v_er']),
// -ir verbs
suffixInflection('a', 'ir', ['v_ir'], ['v_ir']),
suffixInflection('as', 'ir', ['v_ir'], ['v_ir']),
suffixInflection('a', 'ir', ['v_ir'], ['v_ir']),
suffixInflection('amos', 'ir', ['v_ir'], ['v_ir']),
suffixInflection('áis', 'ir', ['v_ir'], ['v_ir']),
suffixInflection('an', 'ir', ['v_ir'], ['v_ir']),
// irregular verbs
wholeWordInflection('dé', 'dar', ['v'], ['v']),
wholeWordInflection('des', 'dar', ['v'], ['v']),
wholeWordInflection('dé', 'dar', ['v'], ['v']),
wholeWordInflection('demos', 'dar', ['v'], ['v']),
wholeWordInflection('deis', 'dar', ['v'], ['v']),
wholeWordInflection('den', 'dar', ['v'], ['v']),
wholeWordInflection('esté', 'estar', ['v'], ['v']),
wholeWordInflection('estés', 'estar', ['v'], ['v']),
wholeWordInflection('esté', 'estar', ['v'], ['v']),
wholeWordInflection('estemos', 'estar', ['v'], ['v']),
wholeWordInflection('estéis', 'estar', ['v'], ['v']),
wholeWordInflection('estén', 'estar', ['v'], ['v']),
wholeWordInflection('sea', 'ser', ['v'], ['v']),
wholeWordInflection('seas', 'ser', ['v'], ['v']),
wholeWordInflection('sea', 'ser', ['v'], ['v']),
wholeWordInflection('seamos', 'ser', ['v'], ['v']),
wholeWordInflection('seáis', 'ser', ['v'], ['v']),
wholeWordInflection('sean', 'ser', ['v'], ['v']),
wholeWordInflection('vaya', 'ir', ['v'], ['v']),
wholeWordInflection('vayas', 'ir', ['v'], ['v']),
wholeWordInflection('vaya', 'ir', ['v'], ['v']),
wholeWordInflection('vayamos', 'ir', ['v'], ['v']),
wholeWordInflection('vayáis', 'ir', ['v'], ['v']),
wholeWordInflection('vayan', 'ir', ['v'], ['v']),
wholeWordInflection('haya', 'haber', ['v'], ['v']),
wholeWordInflection('hayas', 'haber', ['v'], ['v']),
wholeWordInflection('haya', 'haber', ['v'], ['v']),
wholeWordInflection('hayamos', 'haber', ['v'], ['v']),
wholeWordInflection('hayáis', 'haber', ['v'], ['v']),
wholeWordInflection('hayan', 'haber', ['v'], ['v']),
wholeWordInflection('sepa', 'saber', ['v'], ['v']),
wholeWordInflection('sepas', 'saber', ['v'], ['v']),
wholeWordInflection('sepa', 'saber', ['v'], ['v']),
wholeWordInflection('sepamos', 'saber', ['v'], ['v']),
wholeWordInflection('sepáis', 'saber', ['v'], ['v']),
wholeWordInflection('sepan', 'saber', ['v'], ['v']),
],
},
'imperfect subjunctive': {
name: 'imperfect subjunctive',
description: 'Imperfect subjunctive form of a verb',
rules: [
// -ar verbs
suffixInflection('ara', 'ar', ['v_ar'], ['v_ar']),
suffixInflection('ase', 'ar', ['v_ar'], ['v_ar']),
suffixInflection('aras', 'ar', ['v_ar'], ['v_ar']),
suffixInflection('ases', 'ar', ['v_ar'], ['v_ar']),
suffixInflection('ara', 'ar', ['v_ar'], ['v_ar']),
suffixInflection('ase', 'ar', ['v_ar'], ['v_ar']),
suffixInflection('áramos', 'ar', ['v_ar'], ['v_ar']),
suffixInflection('ásemos', 'ar', ['v_ar'], ['v_ar']),
suffixInflection('arais', 'ar', ['v_ar'], ['v_ar']),
suffixInflection('aseis', 'ar', ['v_ar'], ['v_ar']),
suffixInflection('aran', 'ar', ['v_ar'], ['v_ar']),
suffixInflection('asen', 'ar', ['v_ar'], ['v_ar']),
// -er verbs
suffixInflection('iera', 'er', ['v_er'], ['v_er']),
suffixInflection('iese', 'er', ['v_er'], ['v_er']),
suffixInflection('ieras', 'er', ['v_er'], ['v_er']),
suffixInflection('ieses', 'er', ['v_er'], ['v_er']),
suffixInflection('iera', 'er', ['v_er'], ['v_er']),
suffixInflection('iese', 'er', ['v_er'], ['v_er']),
suffixInflection('iéramos', 'er', ['v_er'], ['v_er']),
suffixInflection('iésemos', 'er', ['v_er'], ['v_er']),
suffixInflection('ierais', 'er', ['v_er'], ['v_er']),
suffixInflection('ieseis', 'er', ['v_er'], ['v_er']),
suffixInflection('ieran', 'er', ['v_er'], ['v_er']),
suffixInflection('iesen', 'er', ['v_er'], ['v_er']),
// -ir verbs
suffixInflection('iera', 'ir', ['v_ir'], ['v_ir']),
suffixInflection('iese', 'ir', ['v_ir'], ['v_ir']),
suffixInflection('ieras', 'ir', ['v_ir'], ['v_ir']),
suffixInflection('ieses', 'ir', ['v_ir'], ['v_ir']),
suffixInflection('iera', 'ir', ['v_ir'], ['v_ir']),
suffixInflection('iese', 'ir', ['v_ir'], ['v_ir']),
suffixInflection('iéramos', 'ir', ['v_ir'], ['v_ir']),
suffixInflection('iésemos', 'ir', ['v_ir'], ['v_ir']),
suffixInflection('ierais', 'ir', ['v_ir'], ['v_ir']),
suffixInflection('ieseis', 'ir', ['v_ir'], ['v_ir']),
suffixInflection('ieran', 'ir', ['v_ir'], ['v_ir']),
suffixInflection('iesen', 'ir', ['v_ir'], ['v_ir']),
// irregular verbs
wholeWordInflection('fuera', 'ser', ['v'], ['v']),
wholeWordInflection('fuese', 'ser', ['v'], ['v']),
wholeWordInflection('fueras', 'ser', ['v'], ['v']),
wholeWordInflection('fueses', 'ser', ['v'], ['v']),
wholeWordInflection('fuera', 'ser', ['v'], ['v']),
wholeWordInflection('fuese', 'ser', ['v'], ['v']),
wholeWordInflection('fuéramos', 'ser', ['v'], ['v']),
wholeWordInflection('fuésemos', 'ser', ['v'], ['v']),
wholeWordInflection('fuerais', 'ser', ['v'], ['v']),
wholeWordInflection('fueseis', 'ser', ['v'], ['v']),
wholeWordInflection('fueran', 'ser', ['v'], ['v']),
wholeWordInflection('fuesen', 'ser', ['v'], ['v']),
wholeWordInflection('fuera', 'ir', ['v'], ['v']),
wholeWordInflection('fuese', 'ir', ['v'], ['v']),
wholeWordInflection('fueras', 'ir', ['v'], ['v']),
wholeWordInflection('fueses', 'ir', ['v'], ['v']),
wholeWordInflection('fuera', 'ir', ['v'], ['v']),
wholeWordInflection('fuese', 'ir', ['v'], ['v']),
wholeWordInflection('fuéramos', 'ir', ['v'], ['v']),
wholeWordInflection('fuésemos', 'ir', ['v'], ['v']),
wholeWordInflection('fuerais', 'ir', ['v'], ['v']),
wholeWordInflection('fueseis', 'ir', ['v'], ['v']),
wholeWordInflection('fueran', 'ir', ['v'], ['v']),
wholeWordInflection('fuesen', 'ir', ['v'], ['v']),
],
},
'participle': {
name: 'participle',
description: 'Participle form of a verb',
rules: [
// -ar verbs
suffixInflection('ado', 'ar', ['adj'], ['v_ar']),
// -er verbs
suffixInflection('ido', 'er', ['adj'], ['v_er']),
// -ir verbs
suffixInflection('ido', 'ir', ['adj'], ['v_ir']),
// irregular verbs
suffixInflection('oído', 'oír', ['adj'], ['v']),
wholeWordInflection('dicho', 'decir', ['adj'], ['v']),
wholeWordInflection('escrito', 'escribir', ['adj'], ['v']),
wholeWordInflection('hecho', 'hacer', ['adj'], ['v']),
wholeWordInflection('muerto', 'morir', ['adj'], ['v']),
wholeWordInflection('puesto', 'poner', ['adj'], ['v']),
wholeWordInflection('roto', 'romper', ['adj'], ['v']),
wholeWordInflection('visto', 'ver', ['adj'], ['v']),
wholeWordInflection('vuelto', 'volver', ['adj'], ['v']),
],
},
'reflexive': {
name: 'reflexive',
description: 'Reflexive form of a verb',
rules: [
suffixInflection('arse', 'ar', ['v_ar'], ['v_ar']),
suffixInflection('erse', 'er', ['v_er'], ['v_er']),
suffixInflection('irse', 'ir', ['v_ir'], ['v_ir']),
],
},
'pronoun substitution': {
name: 'pronoun substitution',
description: 'Substituted pronoun of a reflexive verb',
rules: [
suffixInflection('arme', 'arse', ['v_ar'], ['v_ar']),
suffixInflection('arte', 'arse', ['v_ar'], ['v_ar']),
suffixInflection('arnos', 'arse', ['v_er'], ['v_er']),
suffixInflection('erme', 'erse', ['v_er'], ['v_er']),
suffixInflection('erte', 'erse', ['v_er'], ['v_er']),
suffixInflection('ernos', 'erse', ['v_er'], ['v_er']),
suffixInflection('irme', 'irse', ['v_ir'], ['v_ir']),
suffixInflection('irte', 'irse', ['v_ir'], ['v_ir']),
suffixInflection('irnos', 'irse', ['v_ir'], ['v_ir']),
],
},
'pronominal': {
// me despertar -> despertarse
name: 'pronominal',
description: 'Pronominal form of a verb',
rules: [
{
type: 'other',
isInflected: new RegExp(REFLEXIVE_PATTERN),
deinflect: (term) => {
return term.replace(REFLEXIVE_PATTERN, (_match, _pronoun, verb, ending) => `${verb}${ending}se`);
},
conditionsIn: ['v'],
conditionsOut: ['v'],
},
],
},
},
};

View File

@@ -0,0 +1,34 @@
/*
* Copyright (C) 2024-2025 Yomitan Authors
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
/** @type {import('language').BidirectionalConversionPreprocessor} */
export const apostropheVariants = {
name: 'Search for apostrophe variants',
description: '\' → and vice versa',
options: ['off', 'direct', 'inverse'],
process: (str, setting) => {
switch (setting) {
case 'off':
return str;
case 'direct':
return str.replace(/'/g, '');
case 'inverse':
return str.replace(//g, '\'');
}
},
};

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,78 @@
/*
* Copyright (C) 2024-2025 Yomitan Authors
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
import {prefixInflection} from '../language-transforms.js';
/** @typedef {keyof typeof conditions} Condition */
const eclipsisPrefixInflections = [
prefixInflection('mb', 'b', ['n'], ['n']), // 'mbean'
prefixInflection('gc', 'c', ['n'], ['n']), // 'gclann'
prefixInflection('nd', 'd', ['n'], ['n']), // 'ndul'
prefixInflection('bhf', 'f', ['n'], ['n']), // bhfear
prefixInflection('ng', 'g', ['n'], ['n']), // nGaeilge
prefixInflection('bp', 'p', ['n'], ['n']), // bpáiste
prefixInflection('dt', 't', ['n'], ['n']), // dtriail
];
const conditions = {
v: {
name: 'Verb',
isDictionaryForm: true,
subConditions: ['v_phr'],
},
v_phr: {
name: 'Phrasal verb',
isDictionaryForm: true,
},
n: {
name: 'Noun',
isDictionaryForm: true,
subConditions: ['np', 'ns'],
},
np: {
name: 'Noun plural',
isDictionaryForm: true,
},
ns: {
name: 'Noun singular',
isDictionaryForm: true,
},
adj: {
name: 'Adjective',
isDictionaryForm: true,
},
adv: {
name: 'Adverb',
isDictionaryForm: true,
},
};
/** @type {import('language-transformer').LanguageTransformDescriptor<Condition>} */
export const irishTransforms = {
language: 'ga',
conditions,
transforms: {
eclipsis: {
name: 'eclipsis',
description: 'eclipsis form of a noun',
rules: [
...eclipsisPrefixInflections,
],
},
},
};

View File

@@ -0,0 +1,106 @@
/*
* Copyright (C) 2025 Yomitan Authors
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
import {basicTextProcessorOptions, removeAlphabeticDiacritics} from '../text-processors.js';
/** @type {import('language').TextProcessor<boolean>} */
export const convertLatinToGreek = {
name: 'Convert latin characters to greek',
description: 'a → α, A → Α, b → β, B → Β, etc.',
options: basicTextProcessorOptions,
process: (str, setting) => {
return setting ? latinToGreek(str) : str;
},
};
/**
* @param {string} latin
* @returns {string}
*/
export function latinToGreek(latin) {
latin = removeAlphabeticDiacritics.process(latin, true);
const singleMap = {
a: 'α',
b: 'β',
g: 'γ',
d: 'δ',
e: 'ε',
z: 'ζ',
ē: 'η',
i: 'ι',
k: 'κ',
l: 'λ',
m: 'μ',
n: 'ν',
x: 'ξ',
o: 'ο',
p: 'π',
r: 'ρ',
s: 'σ',
t: 'τ',
u: 'υ',
ō: 'ω',
A: 'Α',
B: 'Β',
G: 'Γ',
D: 'Δ',
E: 'Ε',
Z: 'Ζ',
Ē: 'Η',
I: 'Ι',
K: 'Κ',
L: 'Λ',
M: 'Μ',
N: 'Ν',
X: 'Ξ',
O: 'Ο',
P: 'Π',
R: 'Ρ',
S: 'Σ',
T: 'Τ',
U: 'Υ',
Ō: 'Ω',
};
const doubleMap = {
th: 'θ',
ph: 'φ',
ch: 'χ',
ps: 'ψ',
Th: 'Θ',
Ph: 'Φ',
Ch: 'Χ',
Ps: 'Ψ',
};
let result = latin;
for (const [double, greek] of Object.entries(doubleMap)) {
result = result.replace(new RegExp(double, 'g'), greek);
}
// Handle basic character replacements
for (const [single, greek] of Object.entries(singleMap)) {
result = result.replace(new RegExp(single, 'g'), greek);
}
// Handle final sigma
result = result.replace(/σ$/, 'ς');
return result;
}

View File

@@ -0,0 +1,207 @@
/*
* Copyright (C) 2025 Yomitan Authors
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
import {suffixInflection} from '../language-transforms.js';
const conditions = {
v: {
name: 'Verb',
isDictionaryForm: true,
},
n: {
name: 'Noun',
isDictionaryForm: true,
},
adj: {
name: 'Adjective',
isDictionaryForm: true,
},
};
/** @type {import('language-transformer').LanguageTransformDescriptor<keyof typeof conditions>} */
export const ancientGreekTransforms = {
language: 'grc',
conditions,
transforms: {
// inflections
// verbs - active voice
'2nd person singular present active indicative': {
name: '2nd person singular present active indicative',
rules: [
suffixInflection('εις', 'ω', [], ['v']),
suffixInflection('εις', 'εω', [], ['v']),
],
},
'3rd person singular present active indicative': {
name: '3rd person singular present active indicative',
rules: [
suffixInflection('ει', 'ω', [], ['v']),
suffixInflection('ει', 'εω', [], ['v']),
],
},
'1st person plural present active indicative': {
name: '1st person plural present active indicative',
rules: [
suffixInflection('ομεν', 'ω', [], ['v']),
],
},
'2nd person plural present active indicative': {
name: '2nd person plural present active indicative',
rules: [
suffixInflection('ετε', 'ω', [], ['v']),
],
},
'3rd person plural present active indicative': {
name: '3rd person plural present active indicative',
rules: [
suffixInflection('ουσι', 'ω', [], ['v']),
suffixInflection('ουσιν', 'ω', [], ['v']),
],
},
// verbs - middle voice
'2nd person singular present middle indicative': {
name: '2nd person singular present middle indicative',
rules: [
suffixInflection('ῃ', 'ομαι', [], ['v']),
suffixInflection('ει', 'ομαι', [], ['v']),
],
},
'3rd person singular present middle indicative': {
name: '3rd person singular present middle indicative',
rules: [
suffixInflection('εται', 'ομαι', [], ['v']),
],
},
'1st person plural present middle indicative': {
name: '1st person plural present middle indicative',
rules: [
suffixInflection('ομεθα', 'ομαι', [], ['v']),
],
},
'2nd person plural present middle indicative': {
name: '2nd person plural present middle indicative',
rules: [
suffixInflection('εσθε', 'ομαι', [], ['v']),
],
},
'3rd person plural present middle indicative': {
name: '3rd person plural present middle indicative',
rules: [
suffixInflection('ονται', 'ομαι', [], ['v']),
],
},
// nouns
'genitive singular': {
name: 'genitive singular',
rules: [
suffixInflection('ου', 'ος', [], ['n']),
suffixInflection('ας', 'α', [], ['n']),
suffixInflection('ου', 'ας', [], ['n']),
suffixInflection('ου', 'ον', [], ['n']),
suffixInflection('ης', 'η', [], ['n']),
],
},
'dative singular': {
name: 'dative singular',
rules: [
suffixInflection('ω', 'ος', [], ['n']),
suffixInflection('α', 'ας', [], ['n']),
suffixInflection('ω', 'ον', [], ['n']),
],
},
'accusative singular': {
name: 'accusative singular',
rules: [
suffixInflection('ον', 'ος', [], ['n']),
suffixInflection('αν', 'α', [], ['n']),
suffixInflection('αν', 'ας', [], ['n']),
suffixInflection('ην', 'η', [], ['n']),
],
},
'vocative singular': {
name: 'vocative singular',
rules: [
suffixInflection('ε', 'ος', [], ['n']),
suffixInflection('α', 'ας', [], ['n']),
suffixInflection('η', 'η', [], ['n']),
],
},
'nominative plural': {
name: 'nominative plural',
rules: [
suffixInflection('οι', 'ος', [], ['n']),
suffixInflection('αι', 'α', [], ['n']),
suffixInflection('αι', 'ας', [], ['n']),
suffixInflection('α', 'ον', [], ['n']),
suffixInflection('αι', 'η', [], ['n']),
],
},
'genitive plural': {
name: 'genitive plural',
rules: [
suffixInflection('ων', 'ος', [], ['n']),
suffixInflection('ων', 'α', [], ['n']),
suffixInflection('ων', 'ας', [], ['n']),
suffixInflection('ων', 'ον', [], ['n']),
suffixInflection('ων', 'η', [], ['n']),
],
},
'dative plural': {
name: 'dative plural',
rules: [
suffixInflection('οις', 'ος', [], ['n']),
suffixInflection('αις', 'α', [], ['n']),
suffixInflection('αις', 'ας', [], ['n']),
suffixInflection('οις', 'ον', [], ['n']),
suffixInflection('αις', 'η', [], ['n']),
],
},
'accusative plural': {
name: 'accusative plural',
rules: [
suffixInflection('ους', 'ος', [], ['n']),
suffixInflection('ας', 'α', [], ['n']),
suffixInflection('α', 'ον', [], ['n']),
suffixInflection('ας', 'η', [], ['n']),
],
},
'vocative plural': {
name: 'vocative plural',
rules: [
suffixInflection('οι', 'ος', [], ['n']),
suffixInflection('αι', 'α', [], ['n']),
suffixInflection('αι', 'ας', [], ['n']),
suffixInflection('α', 'ον', [], ['n']),
suffixInflection('αι', 'η', [], ['n']),
],
},
// adjectives
'accusative singular masculine': {
name: 'accusative singular masculine',
rules: [
suffixInflection('ον', 'ος', [], ['adj']),
],
},
// word formation
'nominalization': {
name: 'nominalization',
rules: [
suffixInflection('ος', 'εω', [], ['v']),
],
},
},
};

View File

@@ -0,0 +1,45 @@
/*
* Copyright (C) 2025 Yomitan Authors
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
/**
* Mobile browsers report that keyboards are composing even when they are clearly sending normal input.
* This conflicts with detection of desktop IME composing which is reported correctly.
* If the composing input is a single alphabetic letter, it is almost certainly a mobile keyboard pretending to be composing.
* This is not foolproof. For example a Japanese IME could try to convert `えい` to `A` which would show as "fake composing". But this is unlikely.
* @param {InputEvent} event
* @returns {boolean}
*/
export function isFakeComposing(event) {
return !!event.data?.match(/^[A-Za-z]$/);
}
/**
* @param {InputEvent} event
* @param {string} platform
* @param {string} browser
* @returns {boolean}
*/
export function isComposing(event, platform, browser) {
// Desktop Composing
if (event.isComposing && platform !== 'android') { return true; }
// Android Composing
// eslint-disable-next-line sonarjs/prefer-single-boolean-return
if (event.isComposing && !isFakeComposing(event) && platform === 'android' && browser !== 'firefox-mobile') { return true; }
return false;
}

View File

@@ -0,0 +1,618 @@
/*
* Copyright (C) 2025 Yomitan Authors
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
// Mozc's default Romaji to Hiragana list referenced to create ROMAJI_TO_HIRAGANA
// https://github.com/google/mozc/blob/035668c3452fa98ac09462fd2cf556948964aad7/src/data/preedit/romanji-hiragana.tsv
export const ROMAJI_TO_HIRAGANA = {
// Double letters - these **must** always be matched first or further down matches may cause inserting `っ` from double letters to require extra logic
// There **must** be an entry for every accepted double letter
// To not disturb further matches, an extra letter must be appended after the `っ`
'qq': 'っq',
'vv': 'っv',
'll': 'っl',
'xx': 'っx',
'kk': 'っk',
'gg': 'っg',
'ss': 'っs',
'zz': 'っz',
'jj': 'っj',
'tt': 'っt',
'dd': 'っd',
'hh': 'っh',
'ff': 'っf',
'bb': 'っb',
'pp': 'っp',
'mm': 'っm',
'yy': 'っy',
'rr': 'っr',
'ww': 'っw',
'cc': 'っc',
// Length 4 - longest matches
'hwyu': 'ふゅ',
'xtsu': 'っ',
'ltsu': 'っ',
// Length 3
'vya': 'ゔゃ',
'vyi': 'ゔぃ',
'vyu': 'ゔゅ',
'vye': 'ゔぇ',
'vyo': 'ゔょ',
'kya': 'きゃ',
'kyi': 'きぃ',
'kyu': 'きゅ',
'kye': 'きぇ',
'kyo': 'きょ',
'gya': 'ぎゃ',
'gyi': 'ぎぃ',
'gyu': 'ぎゅ',
'gye': 'ぎぇ',
'gyo': 'ぎょ',
'sya': 'しゃ',
'syi': 'しぃ',
'syu': 'しゅ',
'sye': 'しぇ',
'syo': 'しょ',
'sha': 'しゃ',
'shi': 'し',
'shu': 'しゅ',
'she': 'しぇ',
'sho': 'しょ',
'zya': 'じゃ',
'zyi': 'じぃ',
'zyu': 'じゅ',
'zye': 'じぇ',
'zyo': 'じょ',
'tya': 'ちゃ',
'tyi': 'ちぃ',
'tyu': 'ちゅ',
'tye': 'ちぇ',
'tyo': 'ちょ',
'cha': 'ちゃ',
'chi': 'ち',
'chu': 'ちゅ',
'che': 'ちぇ',
'cho': 'ちょ',
'cya': 'ちゃ',
'cyi': 'ちぃ',
'cyu': 'ちゅ',
'cye': 'ちぇ',
'cyo': 'ちょ',
'dya': 'ぢゃ',
'dyi': 'ぢぃ',
'dyu': 'ぢゅ',
'dye': 'ぢぇ',
'dyo': 'ぢょ',
'tsa': 'つぁ',
'tsi': 'つぃ',
'tse': 'つぇ',
'tso': 'つぉ',
'tha': 'てゃ',
'thi': 'てぃ',
'thu': 'てゅ',
'the': 'てぇ',
'tho': 'てょ',
'dha': 'でゃ',
'dhi': 'でぃ',
'dhu': 'でゅ',
'dhe': 'でぇ',
'dho': 'でょ',
'twa': 'とぁ',
'twi': 'とぃ',
'twu': 'とぅ',
'twe': 'とぇ',
'two': 'とぉ',
'dwa': 'どぁ',
'dwi': 'どぃ',
'dwu': 'どぅ',
'dwe': 'どぇ',
'dwo': 'どぉ',
'nya': 'にゃ',
'nyi': 'にぃ',
'nyu': 'にゅ',
'nye': 'にぇ',
'nyo': 'にょ',
'hya': 'ひゃ',
'hyi': 'ひぃ',
'hyu': 'ひゅ',
'hye': 'ひぇ',
'hyo': 'ひょ',
'bya': 'びゃ',
'byi': 'びぃ',
'byu': 'びゅ',
'bye': 'びぇ',
'byo': 'びょ',
'pya': 'ぴゃ',
'pyi': 'ぴぃ',
'pyu': 'ぴゅ',
'pye': 'ぴぇ',
'pyo': 'ぴょ',
'fya': 'ふゃ',
'fyu': 'ふゅ',
'fyo': 'ふょ',
'hwa': 'ふぁ',
'hwi': 'ふぃ',
'hwe': 'ふぇ',
'hwo': 'ふぉ',
'mya': 'みゃ',
'myi': 'みぃ',
'myu': 'みゅ',
'mye': 'みぇ',
'myo': 'みょ',
'rya': 'りゃ',
'ryi': 'りぃ',
'ryu': 'りゅ',
'rye': 'りぇ',
'ryo': 'りょ',
'lyi': 'ぃ',
'xyi': 'ぃ',
'lye': 'ぇ',
'xye': 'ぇ',
'xka': 'ヵ',
'xke': 'ヶ',
'lka': 'ヵ',
'lke': 'ヶ',
'kwa': 'くぁ',
'kwi': 'くぃ',
'kwu': 'くぅ',
'kwe': 'くぇ',
'kwo': 'くぉ',
'gwa': 'ぐぁ',
'gwi': 'ぐぃ',
'gwu': 'ぐぅ',
'gwe': 'ぐぇ',
'gwo': 'ぐぉ',
'swa': 'すぁ',
'swi': 'すぃ',
'swu': 'すぅ',
'swe': 'すぇ',
'swo': 'すぉ',
'zwa': 'ずぁ',
'zwi': 'ずぃ',
'zwu': 'ずぅ',
'zwe': 'ずぇ',
'zwo': 'ずぉ',
'jya': 'じゃ',
'jyi': 'じぃ',
'jyu': 'じゅ',
'jye': 'じぇ',
'jyo': 'じょ',
'tsu': 'つ',
'xtu': 'っ',
'ltu': 'っ',
'xya': 'ゃ',
'lya': 'ゃ',
'wyi': 'ゐ',
'xyu': 'ゅ',
'lyu': 'ゅ',
'wye': 'ゑ',
'xyo': 'ょ',
'lyo': 'ょ',
'xwa': 'ゎ',
'lwa': 'ゎ',
'wha': 'うぁ',
'whi': 'うぃ',
'whu': 'う',
'whe': 'うぇ',
'who': 'うぉ',
// Length 2
'nn': 'ん',
'n\'': 'ん',
'va': 'ゔぁ',
'vi': 'ゔぃ',
'vu': 'ゔ',
've': 'ゔぇ',
'vo': 'ゔぉ',
'fa': 'ふぁ',
'fi': 'ふぃ',
'fe': 'ふぇ',
'fo': 'ふぉ',
'xn': 'ん',
'wu': 'う',
'xa': 'ぁ',
'xi': 'ぃ',
'xu': 'ぅ',
'xe': 'ぇ',
'xo': 'ぉ',
'la': 'ぁ',
'li': 'ぃ',
'lu': 'ぅ',
'le': 'ぇ',
'lo': 'ぉ',
'ye': 'いぇ',
'ka': 'か',
'ki': 'き',
'ku': 'く',
'ke': 'け',
'ko': 'こ',
'ga': 'が',
'gi': 'ぎ',
'gu': 'ぐ',
'ge': 'げ',
'go': 'ご',
'sa': 'さ',
'si': 'し',
'su': 'す',
'se': 'せ',
'so': 'そ',
'ca': 'か',
'ci': 'し',
'cu': 'く',
'ce': 'せ',
'co': 'こ',
'qa': 'くぁ',
'qi': 'くぃ',
'qu': 'く',
'qe': 'くぇ',
'qo': 'くぉ',
'za': 'ざ',
'zi': 'じ',
'zu': 'ず',
'ze': 'ぜ',
'zo': 'ぞ',
'ja': 'じゃ',
'ji': 'じ',
'ju': 'じゅ',
'je': 'じぇ',
'jo': 'じょ',
'ta': 'た',
'ti': 'ち',
'tu': 'つ',
'te': 'て',
'to': 'と',
'da': 'だ',
'di': 'ぢ',
'du': 'づ',
'de': 'で',
'do': 'ど',
'na': 'な',
'ni': 'に',
'nu': 'ぬ',
'ne': 'ね',
'no': 'の',
'ha': 'は',
'hi': 'ひ',
'hu': 'ふ',
'fu': 'ふ',
'he': 'へ',
'ho': 'ほ',
'ba': 'ば',
'bi': 'び',
'bu': 'ぶ',
'be': 'べ',
'bo': 'ぼ',
'pa': 'ぱ',
'pi': 'ぴ',
'pu': 'ぷ',
'pe': 'ぺ',
'po': 'ぽ',
'ma': 'ま',
'mi': 'み',
'mu': 'む',
'me': 'め',
'mo': 'も',
'ya': 'や',
'yu': 'ゆ',
'yo': 'よ',
'ra': 'ら',
'ri': 'り',
'ru': 'る',
're': 'れ',
'ro': 'ろ',
'wa': 'わ',
'wi': 'うぃ',
'we': 'うぇ',
'wo': 'を',
// Length 1 - shortest matches
'a': 'あ',
'i': 'い',
'u': 'う',
'e': 'え',
'o': 'お',
// Length 1 Special/Symbols
'.': '。',
',': '、',
':': '',
'/': '・',
'!': '',
'?': '',
'~': '〜',
'-': 'ー',
'': '「',
'': '」',
'“': '『',
'”': '』',
'[': '',
']': '',
'(': '',
')': '',
'{': '',
'}': '',
' ': ' ',
// n -> ん is a special case.
'n': 'ん',
};
export const HIRAGANA_TO_ROMAJI = {
// Length 2
'んい': 'n\'i',
'ゔぁ': 'va',
'ゔぃ': 'vi',
'ゔぉ': 'vo',
'ゔゃ': 'vya',
'ゔゅ': 'vyu',
'ゔぇ': 've',
'ゔょ': 'vyo',
'きゃ': 'kya',
'きぃ': 'kyi',
'きゅ': 'kyu',
'きぇ': 'kye',
'きょ': 'kyo',
'ぎゃ': 'gya',
'ぎぃ': 'gyi',
'ぎゅ': 'gyu',
'ぎぇ': 'gye',
'ぎょ': 'gyo',
'しゃ': 'sha',
'しぃ': 'syi',
'しゅ': 'shu',
'しぇ': 'she',
'しょ': 'sho',
'ちゃ': 'cya',
'ちゅ': 'chu',
'ちぇ': 'che',
'ちょ': 'cho',
'ちぃ': 'cyi',
'ぢゃ': 'dya',
'ぢぃ': 'dyi',
'ぢゅ': 'dyu',
'ぢぇ': 'dye',
'ぢょ': 'dyo',
'つぁ': 'tsa',
'つぃ': 'tsi',
'つぇ': 'tse',
'つぉ': 'tso',
'てゃ': 'tha',
'てぃ': 'thi',
'てゅ': 'thu',
'てぇ': 'the',
'てょ': 'tho',
'でゃ': 'dha',
'でぃ': 'dhi',
'でゅ': 'dhu',
'でぇ': 'dhe',
'でょ': 'dho',
'とぁ': 'twa',
'とぃ': 'twi',
'とぅ': 'twu',
'とぇ': 'twe',
'とぉ': 'two',
'どぁ': 'dwa',
'どぃ': 'dwi',
'どぅ': 'dwu',
'どぇ': 'dwe',
'どぉ': 'dwo',
'にゃ': 'nya',
'にぃ': 'nyi',
'にゅ': 'nyu',
'にぇ': 'nye',
'にょ': 'nyo',
'ひゃ': 'hya',
'ひぃ': 'hyi',
'ひゅ': 'hyu',
'ひぇ': 'hye',
'ひょ': 'hyo',
'びゃ': 'bya',
'びぃ': 'byi',
'びゅ': 'byu',
'びぇ': 'bye',
'びょ': 'byo',
'ぴゃ': 'pya',
'ぴぃ': 'pyi',
'ぴゅ': 'pyu',
'ぴぇ': 'pye',
'ぴょ': 'pyo',
'ふゃ': 'fya',
'ふょ': 'fyo',
'ふぁ': 'fa',
'ふゅ': 'fyu',
'ふぃ': 'fi',
'ふぇ': 'fe',
'ふぉ': 'fo',
'みゃ': 'mya',
'みぃ': 'myi',
'みゅ': 'myu',
'みぇ': 'mye',
'みょ': 'myo',
'りゃ': 'rya',
'りぃ': 'ryi',
'りゅ': 'ryu',
'りぇ': 'rye',
'りょ': 'ryo',
'くぁ': 'kwa',
'くぃ': 'kwi',
'くぅ': 'kwu',
'くぇ': 'kwe',
'くぉ': 'kwo',
'ぐぁ': 'gwa',
'ぐぃ': 'gwi',
'ぐぅ': 'gwu',
'ぐぇ': 'gwe',
'ぐぉ': 'gwo',
'すぁ': 'swa',
'すぃ': 'swi',
'すぅ': 'swu',
'すぇ': 'swe',
'すぉ': 'swo',
'ずぁ': 'zwa',
'ずぃ': 'zwi',
'ずぅ': 'zwu',
'ずぇ': 'zwe',
'ずぉ': 'zwo',
'じゃ': 'ja',
'じぃ': 'jyi',
'じゅ': 'ju',
'じぇ': 'je',
'じょ': 'jo',
'うぁ': 'wha',
'うぃ': 'wi',
'うぇ': 'we',
'うぉ': 'who',
'いぇ': 'ye',
// Length 1
'ん': 'n',
'あ': 'a',
'い': 'i',
'う': 'u',
'え': 'e',
'お': 'o',
'ゔ': 'vu',
'か': 'ka',
'き': 'ki',
'く': 'ku',
'け': 'ke',
'こ': 'ko',
'が': 'ga',
'ぎ': 'gi',
'ぐ': 'gu',
'げ': 'ge',
'ご': 'go',
'さ': 'sa',
'し': 'shi',
'す': 'su',
'せ': 'se',
'そ': 'so',
'ざ': 'za',
'じ': 'ji',
'ず': 'zu',
'ぜ': 'ze',
'ぞ': 'zo',
'た': 'ta',
'ち': 'chi',
'つ': 'tsu',
'て': 'te',
'と': 'to',
'だ': 'da',
'ぢ': 'di',
'づ': 'du',
'で': 'de',
'ど': 'do',
'な': 'na',
'に': 'ni',
'ぬ': 'nu',
'ね': 'ne',
'の': 'no',
'は': 'ha',
'ひ': 'hi',
'ふ': 'fu',
'へ': 'he',
'ほ': 'ho',
'ば': 'ba',
'び': 'bi',
'ぶ': 'bu',
'べ': 'be',
'ぼ': 'bo',
'ぱ': 'pa',
'ぴ': 'pi',
'ぷ': 'pu',
'ぺ': 'pe',
'ぽ': 'po',
'ま': 'ma',
'み': 'mi',
'む': 'mu',
'め': 'me',
'も': 'mo',
'や': 'ya',
'ゆ': 'yu',
'よ': 'yo',
'ら': 'ra',
'り': 'ri',
'る': 'ru',
'れ': 're',
'ろ': 'ro',
'わ': 'wa',
'ゐ': 'wi',
'ゑ': 'we',
'を': 'wo',
// Length 1 Special/Symbols
'。': '.',
'、': ',',
'': ':',
'・': '/',
'': '!',
'': '?',
'〜': '~',
'ー': '-',
'「': '',
'」': '',
'『': '“',
'』': '”',
'': '[',
'': ']',
'': '(',
'': ')',
'': '{',
'': '}',
' ': ' ',
// Length 1 Small - Even though these are usually represented with `x` or `l` prepending them, in romaji it makes the most sense to not do that
'ゃ': 'ya',
'ゅ': 'yu',
'ょ': 'yo',
'ゎ': 'wa',
'ぁ': 'a',
'ぃ': 'i',
'ぅ': 'u',
'ぇ': 'e',
'ぉ': 'o',
'ヵ': 'ka',
'ヶ': 'ke',
// Double letters - these **must** always be matched last or they will break previous maches
'っq': 'qq',
'っv': 'vv',
'っx': 'xx',
'っk': 'kk',
'っg': 'gg',
'っs': 'ss',
'っz': 'zz',
'っj': 'jj',
'っt': 'tt',
'っd': 'dd',
'っh': 'hh',
'っf': 'ff',
'っb': 'bb',
'っp': 'pp',
'っm': 'mm',
'っy': 'yy',
'っr': 'rr',
'っw': 'ww',
'っc': 'cc',
// `っん` is a special case
'っn': 'n',
// single `っ` is weird, some converters just remove it, some leave the `っ` in kana, some replace with `xtsu` or `ltsu`
'っ': '',
};

View File

@@ -0,0 +1,118 @@
/*
* Copyright (C) 2024-2025 Yomitan Authors
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
import {basicTextProcessorOptions} from '../text-processors.js';
import {convertAlphabeticToKana} from './japanese-wanakana.js';
import {
collapseEmphaticSequences as collapseEmphaticSequencesFunction,
convertAlphanumericToFullWidth,
convertFullWidthAlphanumericToNormal,
convertHalfWidthKanaToFullWidth,
convertHiraganaToKatakana as convertHiraganaToKatakanaFunction,
convertKatakanaToHiragana as convertKatakanaToHiraganaFunction,
normalizeCJKCompatibilityCharacters as normalizeCJKCompatibilityCharactersFunction,
normalizeCombiningCharacters as normalizeCombiningCharactersFunction,
} from './japanese.js';
import {convertVariants} from '../../../lib/kanji-processor.js';
/** @type {import('language').TextProcessor<boolean>} */
export const convertHalfWidthCharacters = {
name: 'Convert half width characters to full width',
description: 'ヨミチャン → ヨミチャン',
options: basicTextProcessorOptions,
process: (str, setting) => (setting ? convertHalfWidthKanaToFullWidth(str) : str),
};
/** @type {import('language').TextProcessor<boolean>} */
export const alphabeticToHiragana = {
name: 'Convert alphabetic characters to hiragana',
description: 'yomichan → よみちゃん',
options: basicTextProcessorOptions,
process: (str, setting) => (setting ? convertAlphabeticToKana(str) : str),
};
/** @type {import('language').BidirectionalConversionPreprocessor} */
export const alphanumericWidthVariants = {
name: 'Convert between alphabetic width variants',
description: ' → yomitan and vice versa',
options: ['off', 'direct', 'inverse'],
process: (str, setting) => {
switch (setting) {
case 'off':
return str;
case 'direct':
return convertFullWidthAlphanumericToNormal(str);
case 'inverse':
return convertAlphanumericToFullWidth(str);
}
},
};
/** @type {import('language').BidirectionalConversionPreprocessor} */
export const convertHiraganaToKatakana = {
name: 'Convert hiragana to katakana',
description: 'よみちゃん → ヨミチャン and vice versa',
options: ['off', 'direct', 'inverse'],
process: (str, setting) => {
switch (setting) {
case 'off':
return str;
case 'direct':
return convertHiraganaToKatakanaFunction(str);
case 'inverse':
return convertKatakanaToHiraganaFunction(str);
}
},
};
/** @type {import('language').TextProcessor<[collapseEmphatic: boolean, collapseEmphaticFull: boolean]>} */
export const collapseEmphaticSequences = {
name: 'Collapse emphatic character sequences',
description: 'すっっごーーい → すっごーい / すごい',
options: [[false, false], [true, false], [true, true]],
process: (str, setting) => {
const [collapseEmphatic, collapseEmphaticFull] = setting;
if (collapseEmphatic) {
str = collapseEmphaticSequencesFunction(str, collapseEmphaticFull);
}
return str;
},
};
/** @type {import('language').TextProcessor<boolean>} */
export const normalizeCombiningCharacters = {
name: 'Normalize combining characters',
description: 'ド → ド (U+30C8 U+3099 → U+30C9)',
options: basicTextProcessorOptions,
process: (str, setting) => (setting ? normalizeCombiningCharactersFunction(str) : str),
};
/** @type {import('language').TextProcessor<boolean>} */
export const normalizeCJKCompatibilityCharacters = {
name: 'Normalize CJK Compatibility Characters',
description: '㌀ → アパート',
options: basicTextProcessorOptions,
process: (str, setting) => (setting ? normalizeCJKCompatibilityCharactersFunction(str) : str),
};
/** @type {import('language').TextProcessor<boolean>} */
export const standardizeKanji = {
name: 'Convert kanji variants to their modern standard form',
description: '萬 → 万',
options: basicTextProcessorOptions,
process: (str, setting) => (setting ? convertVariants(str) : str),
};

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,149 @@
/*
* Copyright (C) 2024-2025 Yomitan Authors
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
import {HIRAGANA_TO_ROMAJI, ROMAJI_TO_HIRAGANA} from './japanese-kana-romaji-dicts.js';
import {convertHiraganaToKatakana} from './japanese.js';
/**
* @param {string} text
* @returns {string}
*/
export function convertToHiragana(text) {
let newText = text.toLowerCase();
for (const [romaji, kana] of Object.entries(ROMAJI_TO_HIRAGANA)) {
newText = newText.replaceAll(romaji, kana);
}
return fillSokuonGaps(newText);
}
/**
* @param {string} text
* @param {number} selectionStart
* @returns {import('language').KanaIMEOutput}
*/
export function convertToKanaIME(text, selectionStart) {
const prevSelectionStart = selectionStart;
const prevLength = text.length;
let kanaString = '';
// If the user starts typing a single `n`, hide it from the converter. (This only applies when using the converter as an IME)
// The converter must only allow the n to become ん when the user's text cursor is at least one character ahead of it.
// If `n` occurs directly behind the user's text cursor, it should be hidden from the converter.
// If `ny` occurs directly behind the user's text cursor, it must also be hidden from the converter as the user may be trying to type `nya` `nyi` `nyu` `nye` `nyo`.
// Examples (`|` shall be the user's text cursor):
// `たn|` does not convert to `たん|`. The `n` should be hidden from the converter and `た` should only be sent.
// `n|の` also does not convert to `ん|の`. Even though the cursor is not at the end of the line, the `n` should still be hidden since it is directly behind the user's text cursor.
// `ny|` does not convert to `んy|`. The `ny` must be hidden since the user may be trying to type something starting with `ny` such as `nya`.
// `たnt|` does convert to `たんt|`. The user's text cursor is one character ahead of the `n` so it does not need to be hidden and can be converted.
// `nとあ|` also converts to `んとあ|` The user's text cursor is two characters away from the `n`.
// `なno|` will still convert to `なの` instead of `なんお` without issue since the `no` -> `の` conversion will be found before `n` -> `ん` and `o` -> `お`.
// `nn|` will still convert to `ん` instead of `んん` since `nn` -> `ん` will be found before `n` -> `ん`.
// If the user pastes in a long string of `n` such as `nnnnn|` it should leave the last `n` and convert to `んんn`
const textLowered = text.toLowerCase();
if (textLowered[prevSelectionStart - 1] === 'n' && textLowered.slice(0, prevSelectionStart - 1).replaceAll('nn', '').at(-1) !== 'n') {
const n = text.slice(prevSelectionStart - 1, prevSelectionStart);
const beforeN = text.slice(0, prevSelectionStart - 1);
const afterN = text.slice(prevSelectionStart);
kanaString = convertToKana(beforeN) + n + convertToKana(afterN);
} else if (textLowered.slice(prevSelectionStart - 2, prevSelectionStart) === 'ny') {
const ny = text.slice(prevSelectionStart - 2, prevSelectionStart);
const beforeN = text.slice(0, prevSelectionStart - 2);
const afterN = text.slice(prevSelectionStart);
kanaString = convertToKana(beforeN) + ny + convertToKana(afterN);
} else {
kanaString = convertToKana(text);
}
const selectionOffset = kanaString.length - prevLength;
return {kanaString, newSelectionStart: prevSelectionStart + selectionOffset};
}
/**
* @param {string} text
* @returns {string}
*/
export function convertToKana(text) {
let newText = text;
for (const [romaji, kana] of Object.entries(ROMAJI_TO_HIRAGANA)) {
newText = newText.replaceAll(romaji, kana);
// Uppercase text converts to katakana
newText = newText.replaceAll(romaji.toUpperCase(), convertHiraganaToKatakana(kana).toUpperCase());
}
return fillSokuonGaps(newText);
}
/**
* @param {string} text
* @returns {string}
* Fills gaps in sokuons that replaceAll using ROMAJI_TO_HIRAGANA will miss due to it not running iteratively
* Example: `ttttttttttsu` -> `っっっっっっっっっつ` would become `ttttttttttsu` -> `っtっtっtっtっつ` without filling the gaps
*/
function fillSokuonGaps(text) {
return text.replaceAll(/っ[a-z](?=っ)/g, 'っっ').replaceAll(/ッ[A-Z](?=ッ)/g, 'ッッ');
}
/**
* @param {string} text
* @returns {string}
*/
export function convertToRomaji(text) {
let newText = text;
for (const [kana, romaji] of Object.entries(HIRAGANA_TO_ROMAJI)) {
newText = newText.replaceAll(kana, romaji);
newText = newText.replaceAll(convertHiraganaToKatakana(kana), romaji);
}
return newText;
}
/**
* @param {string} text
* @returns {string}
*/
export function convertAlphabeticToKana(text) {
let part = '';
let result = '';
for (const char of text) {
// Note: 0x61 is the character code for 'a'
let c = /** @type {number} */ (char.codePointAt(0));
if (c >= 0x41 && c <= 0x5a) { // ['A', 'Z']
c += (0x61 - 0x41);
} else if (c >= 0x61 && c <= 0x7a) { // ['a', 'z']
// NOP; c += (0x61 - 0x61);
} else if (c >= 0xff21 && c <= 0xff3a) { // ['A', 'Z'] fullwidth
c += (0x61 - 0xff21);
} else if (c >= 0xff41 && c <= 0xff5a) { // ['a', 'z'] fullwidth
c += (0x61 - 0xff41);
} else if (c === 0x2d || c === 0xff0d) { // '-' or fullwidth dash
c = 0x2d; // '-'
} else {
if (part.length > 0) {
result += convertToHiragana(part);
part = '';
}
result += char;
continue;
}
part += String.fromCodePoint(c);
}
if (part.length > 0) {
result += convertToHiragana(part);
}
return result;
}

View File

@@ -0,0 +1,813 @@
/*
* Copyright (C) 2024-2025 Yomitan Authors
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
import {CJK_COMPATIBILITY, CJK_IDEOGRAPH_RANGES, CJK_PUNCTUATION_RANGE, FULLWIDTH_CHARACTER_RANGES, isCodePointInRange, isCodePointInRanges} from '../CJK-util.js';
const HIRAGANA_SMALL_TSU_CODE_POINT = 0x3063;
const KATAKANA_SMALL_TSU_CODE_POINT = 0x30c3;
const KATAKANA_SMALL_KA_CODE_POINT = 0x30f5;
const KATAKANA_SMALL_KE_CODE_POINT = 0x30f6;
const KANA_PROLONGED_SOUND_MARK_CODE_POINT = 0x30fc;
/** @type {import('CJK-util').CodepointRange} */
const HIRAGANA_RANGE = [0x3040, 0x309f];
/** @type {import('CJK-util').CodepointRange} */
const KATAKANA_RANGE = [0x30a0, 0x30ff];
/** @type {import('CJK-util').CodepointRange} */
const HIRAGANA_CONVERSION_RANGE = [0x3041, 0x3096];
/** @type {import('CJK-util').CodepointRange} */
const KATAKANA_CONVERSION_RANGE = [0x30a1, 0x30f6];
/** @type {import('CJK-util').CodepointRange[]} */
const KANA_RANGES = [HIRAGANA_RANGE, KATAKANA_RANGE];
/**
* Japanese character ranges, roughly ordered in order of expected frequency.
* @type {import('CJK-util').CodepointRange[]}
*/
const JAPANESE_RANGES = [
HIRAGANA_RANGE,
KATAKANA_RANGE,
...CJK_IDEOGRAPH_RANGES,
[0xff66, 0xff9f], // Halfwidth katakana
[0x30fb, 0x30fc], // Katakana punctuation
[0xff61, 0xff65], // Kana punctuation
CJK_PUNCTUATION_RANGE,
...FULLWIDTH_CHARACTER_RANGES,
];
const SMALL_KANA_SET = new Set('ぁぃぅぇぉゃゅょゎァィゥェォャュョヮ');
const HALFWIDTH_KATAKANA_MAPPING = new Map([
['・', '・--'],
['ヲ', 'ヲヺ-'],
['ァ', 'ァ--'],
['ィ', 'ィ--'],
['ゥ', 'ゥ--'],
['ェ', 'ェ--'],
['ォ', 'ォ--'],
['ャ', 'ャ--'],
['ュ', 'ュ--'],
['ョ', 'ョ--'],
['ッ', 'ッ--'],
['ー', 'ー--'],
['ア', 'ア--'],
['イ', 'イ--'],
['ウ', 'ウヴ-'],
['エ', 'エ--'],
['オ', 'オ--'],
['カ', 'カガ-'],
['キ', 'キギ-'],
['ク', 'クグ-'],
['ケ', 'ケゲ-'],
['コ', 'コゴ-'],
['サ', 'サザ-'],
['シ', 'シジ-'],
['ス', 'スズ-'],
['セ', 'セゼ-'],
['ソ', 'ソゾ-'],
['タ', 'タダ-'],
['チ', 'チヂ-'],
['ツ', 'ツヅ-'],
['テ', 'テデ-'],
['ト', 'トド-'],
['ナ', 'ナ--'],
['ニ', 'ニ--'],
['ヌ', 'ヌ--'],
['ネ', 'ネ--'],
['ノ', '--'],
['ハ', 'ハバパ'],
['ヒ', 'ヒビピ'],
['フ', 'フブプ'],
['ヘ', 'ヘベペ'],
['ホ', 'ホボポ'],
['マ', 'マ--'],
['ミ', 'ミ--'],
['ム', 'ム--'],
['メ', 'メ--'],
['モ', 'モ--'],
['ヤ', 'ヤ--'],
['ユ', 'ユ--'],
['ヨ', 'ヨ--'],
['ラ', 'ラ--'],
['リ', 'リ--'],
['ル', 'ル--'],
['レ', 'レ--'],
['ロ', 'ロ--'],
['ワ', 'ワ--'],
['ン', 'ン--'],
]);
const VOWEL_TO_KANA_MAPPING = new Map([
['a', 'ぁあかがさざただなはばぱまゃやらゎわヵァアカガサザタダナハバパマャヤラヮワヵヷ'],
['i', 'ぃいきぎしじちぢにひびぴみりゐィイキギシジチヂニヒビピミリヰヸ'],
['u', 'ぅうくぐすずっつづぬふぶぷむゅゆるゥウクグスズッツヅヌフブプムュユルヴ'],
['e', 'ぇえけげせぜてでねへべぺめれゑヶェエケゲセゼテデネヘベペメレヱヶヹ'],
['o', 'ぉおこごそぞとどのほぼぽもょよろをォオコゴソゾトドノホボポモョヨロヲヺ'],
['', 'のノ'],
]);
/** @type {Map<string, string>} */
const KANA_TO_VOWEL_MAPPING = new Map();
for (const [vowel, characters] of VOWEL_TO_KANA_MAPPING) {
for (const character of characters) {
KANA_TO_VOWEL_MAPPING.set(character, vowel);
}
}
const kana = 'うゔ-かが-きぎ-くぐ-けげ-こご-さざ-しじ-すず-せぜ-そぞ-ただ-ちぢ-つづ-てで-とど-はばぱひびぴふぶぷへべぺほぼぽワヷ-ヰヸ-ウヴ-ヱヹ-ヲヺ-カガ-キギ-クグ-ケゲ-コゴ-サザ-シジ-スズ-セゼ-ソゾ-タダ-チヂ-ツヅ-テデ-トド-ハバパヒビピフブプヘベペホボポ';
/** @type {Map<string, {character: string, type: import('japanese-util').DiacriticType}>} */
const DIACRITIC_MAPPING = new Map();
for (let i = 0, ii = kana.length; i < ii; i += 3) {
const character = kana[i];
const dakuten = kana[i + 1];
const handakuten = kana[i + 2];
DIACRITIC_MAPPING.set(dakuten, {character, type: 'dakuten'});
if (handakuten !== '-') {
DIACRITIC_MAPPING.set(handakuten, {character, type: 'handakuten'});
}
}
/**
* @param {string} previousCharacter
* @returns {?string}
*/
function getProlongedHiragana(previousCharacter) {
switch (KANA_TO_VOWEL_MAPPING.get(previousCharacter)) {
case 'a': return 'あ';
case 'i': return 'い';
case 'u': return 'う';
case 'e': return 'え';
case 'o': return 'う';
default: return null;
}
}
/**
* @param {string} text
* @param {string} reading
* @returns {import('japanese-util').FuriganaSegment}
*/
function createFuriganaSegment(text, reading) {
return {text, reading};
}
/**
* @param {string} reading
* @param {string} readingNormalized
* @param {import('japanese-util').FuriganaGroup[]} groups
* @param {number} groupsStart
* @returns {?(import('japanese-util').FuriganaSegment[])}
*/
function segmentizeFurigana(reading, readingNormalized, groups, groupsStart) {
const groupCount = groups.length - groupsStart;
if (groupCount <= 0) {
return reading.length === 0 ? [] : null;
}
const group = groups[groupsStart];
const {isKana, text} = group;
const textLength = text.length;
if (isKana) {
const {textNormalized} = group;
if (textNormalized !== null && readingNormalized.startsWith(textNormalized)) {
const segments = segmentizeFurigana(
reading.substring(textLength),
readingNormalized.substring(textLength),
groups,
groupsStart + 1,
);
if (segments !== null) {
if (reading.startsWith(text)) {
segments.unshift(createFuriganaSegment(text, ''));
} else {
segments.unshift(...getFuriganaKanaSegments(text, reading));
}
return segments;
}
}
return null;
} else {
let result = null;
for (let i = reading.length; i >= textLength; --i) {
const segments = segmentizeFurigana(
reading.substring(i),
readingNormalized.substring(i),
groups,
groupsStart + 1,
);
if (segments !== null) {
if (result !== null) {
// More than one way to segmentize the tail; mark as ambiguous
return null;
}
const segmentReading = reading.substring(0, i);
segments.unshift(createFuriganaSegment(text, segmentReading));
result = segments;
}
// There is only one way to segmentize the last non-kana group
if (groupCount === 1) {
break;
}
}
return result;
}
}
/**
* @param {string} text
* @param {string} reading
* @returns {import('japanese-util').FuriganaSegment[]}
*/
function getFuriganaKanaSegments(text, reading) {
const textLength = text.length;
const newSegments = [];
let start = 0;
let state = (reading[0] === text[0]);
for (let i = 1; i < textLength; ++i) {
const newState = (reading[i] === text[i]);
if (state === newState) { continue; }
newSegments.push(createFuriganaSegment(text.substring(start, i), state ? '' : reading.substring(start, i)));
state = newState;
start = i;
}
newSegments.push(createFuriganaSegment(text.substring(start, textLength), state ? '' : reading.substring(start, textLength)));
return newSegments;
}
/**
* @param {string} text1
* @param {string} text2
* @returns {number}
*/
function getStemLength(text1, text2) {
const minLength = Math.min(text1.length, text2.length);
if (minLength === 0) { return 0; }
let i = 0;
while (true) {
const char1 = /** @type {number} */ (text1.codePointAt(i));
const char2 = /** @type {number} */ (text2.codePointAt(i));
if (char1 !== char2) { break; }
const charLength = String.fromCodePoint(char1).length;
i += charLength;
if (i >= minLength) {
if (i > minLength) {
i -= charLength; // Don't consume partial UTF16 surrogate characters
}
break;
}
}
return i;
}
// Character code testing functions
/**
* @param {number} codePoint
* @returns {boolean}
*/
export function isCodePointKanji(codePoint) {
return isCodePointInRanges(codePoint, CJK_IDEOGRAPH_RANGES);
}
/**
* @param {number} codePoint
* @returns {boolean}
*/
export function isCodePointKana(codePoint) {
return isCodePointInRanges(codePoint, KANA_RANGES);
}
/**
* @param {number} codePoint
* @returns {boolean}
*/
export function isCodePointJapanese(codePoint) {
return isCodePointInRanges(codePoint, JAPANESE_RANGES);
}
// String testing functions
/**
* @param {string} str
* @returns {boolean}
*/
export function isStringEntirelyKana(str) {
if (str.length === 0) { return false; }
for (const c of str) {
if (!isCodePointInRanges(/** @type {number} */ (c.codePointAt(0)), KANA_RANGES)) {
return false;
}
}
return true;
}
/**
* @param {string} str
* @returns {boolean}
*/
export function isStringPartiallyJapanese(str) {
if (str.length === 0) { return false; }
for (const c of str) {
if (isCodePointInRanges(/** @type {number} */ (c.codePointAt(0)), JAPANESE_RANGES)) {
return true;
}
}
return false;
}
// Mora functions
/**
* @param {number} moraIndex
* @param {number | string} pitchAccentValue
* @returns {boolean}
*/
export function isMoraPitchHigh(moraIndex, pitchAccentValue) {
if (typeof pitchAccentValue === 'string') {
return pitchAccentValue[moraIndex] === 'H';
}
switch (pitchAccentValue) {
case 0: return (moraIndex > 0);
case 1: return (moraIndex < 1);
default: return (moraIndex > 0 && moraIndex < pitchAccentValue);
}
}
/**
* @param {string} text
* @param {number | string} pitchAccentValue
* @param {boolean} isVerbOrAdjective
* @returns {?import('japanese-util').PitchCategory}
*/
export function getPitchCategory(text, pitchAccentValue, isVerbOrAdjective) {
const pitchAccentDownstepPosition = typeof pitchAccentValue === 'string' ? getDownstepPositions(pitchAccentValue)[0] : pitchAccentValue;
if (pitchAccentDownstepPosition === 0) {
return 'heiban';
}
if (isVerbOrAdjective) {
return pitchAccentDownstepPosition > 0 ? 'kifuku' : null;
}
if (pitchAccentDownstepPosition === 1) {
return 'atamadaka';
}
if (pitchAccentDownstepPosition > 1) {
return pitchAccentDownstepPosition >= getKanaMoraCount(text) ? 'odaka' : 'nakadaka';
}
return null;
}
/**
* @param {string} pitchString
* @returns {number[]}
*/
export function getDownstepPositions(pitchString) {
const downsteps = [];
const moraCount = pitchString.length;
for (let i = 0; i < moraCount; i++) {
if (i > 0 && pitchString[i - 1] === 'H' && pitchString[i] === 'L') {
downsteps.push(i);
}
}
if (downsteps.length === 0) {
downsteps.push(pitchString.startsWith('L') ? 0 : -1);
}
return downsteps;
}
/**
* @param {string} text
* @returns {string[]}
*/
export function getKanaMorae(text) {
const morae = [];
let i;
for (const c of text) {
if (SMALL_KANA_SET.has(c) && (i = morae.length) > 0) {
morae[i - 1] += c;
} else {
morae.push(c);
}
}
return morae;
}
/**
* @param {string} text
* @returns {number}
*/
export function getKanaMoraCount(text) {
let moraCount = 0;
for (const c of text) {
if (!(SMALL_KANA_SET.has(c) && moraCount > 0)) {
++moraCount;
}
}
return moraCount;
}
// Conversion functions
/**
* @param {string} text
* @param {boolean} [keepProlongedSoundMarks]
* @returns {string}
*/
export function convertKatakanaToHiragana(text, keepProlongedSoundMarks = false) {
let result = '';
const offset = (HIRAGANA_CONVERSION_RANGE[0] - KATAKANA_CONVERSION_RANGE[0]);
for (let char of text) {
const codePoint = /** @type {number} */ (char.codePointAt(0));
switch (codePoint) {
case KATAKANA_SMALL_KA_CODE_POINT:
case KATAKANA_SMALL_KE_CODE_POINT:
// No change
break;
case KANA_PROLONGED_SOUND_MARK_CODE_POINT:
if (!keepProlongedSoundMarks && result.length > 0) {
const char2 = getProlongedHiragana(result[result.length - 1]);
if (char2 !== null) { char = char2; }
}
break;
default:
if (isCodePointInRange(codePoint, KATAKANA_CONVERSION_RANGE)) {
char = String.fromCodePoint(codePoint + offset);
}
break;
}
result += char;
}
return result;
}
/**
* @param {string} text
* @returns {string}
*/
export function convertHiraganaToKatakana(text) {
let result = '';
const offset = (KATAKANA_CONVERSION_RANGE[0] - HIRAGANA_CONVERSION_RANGE[0]);
for (let char of text) {
const codePoint = /** @type {number} */ (char.codePointAt(0));
if (isCodePointInRange(codePoint, HIRAGANA_CONVERSION_RANGE)) {
char = String.fromCodePoint(codePoint + offset);
}
result += char;
}
return result;
}
/**
* @param {string} text
* @returns {string}
*/
export function convertAlphanumericToFullWidth(text) {
let result = '';
for (const char of text) {
let c = /** @type {number} */ (char.codePointAt(0));
if (c >= 0x30 && c <= 0x39) { // ['0', '9']
c += 0xff10 - 0x30; // 0xff10 = '0' full width
} else if (c >= 0x41 && c <= 0x5a) { // ['A', 'Z']
c += 0xff21 - 0x41; // 0xff21 = 'A' full width
} else if (c >= 0x61 && c <= 0x7a) { // ['a', 'z']
c += 0xff41 - 0x61; // 0xff41 = 'a' full width
}
result += String.fromCodePoint(c);
}
return result;
}
/**
* @param {string} text
* @returns {string}
*/
export function convertFullWidthAlphanumericToNormal(text) {
let result = '';
const length = text.length;
for (let i = 0; i < length; i++) {
let c = /** @type {number} */ (text[i].codePointAt(0));
if (c >= 0xff10 && c <= 0xff19) { // ['', '']
c -= 0xff10 - 0x30; // 0x30 = '0'
} else if (c >= 0xff21 && c <= 0xff3a) { // ['', '']
c -= 0xff21 - 0x41; // 0x41 = 'A'
} else if (c >= 0xff41 && c <= 0xff5a) { // ['', '']
c -= 0xff41 - 0x61; // 0x61 = 'a'
}
result += String.fromCodePoint(c);
}
return result;
}
/**
* @param {string} text
* @returns {string}
*/
export function convertHalfWidthKanaToFullWidth(text) {
let result = '';
// This function is safe to use charCodeAt instead of codePointAt, since all
// the relevant characters are represented with a single UTF-16 character code.
for (let i = 0, ii = text.length; i < ii; ++i) {
const c = text[i];
const mapping = HALFWIDTH_KATAKANA_MAPPING.get(c);
if (typeof mapping !== 'string') {
result += c;
continue;
}
let index = 0;
switch (text.charCodeAt(i + 1)) {
case 0xff9e: // Dakuten
index = 1;
break;
case 0xff9f: // Handakuten
index = 2;
break;
}
let c2 = mapping[index];
if (index > 0) {
if (c2 === '-') { // Invalid
index = 0;
c2 = mapping[0];
} else {
++i;
}
}
result += c2;
}
return result;
}
/**
* @param {string} character
* @returns {?{character: string, type: import('japanese-util').DiacriticType}}
*/
export function getKanaDiacriticInfo(character) {
const info = DIACRITIC_MAPPING.get(character);
return typeof info !== 'undefined' ? {character: info.character, type: info.type} : null;
}
/**
* @param {number} codePoint
* @returns {boolean}
*/
function dakutenAllowed(codePoint) {
// To reduce processing time some characters which shouldn't have dakuten but are highly unlikely to have a combining character attached are included
// かがきぎくぐけげこごさざしじすずせぜそぞただちぢっつづてでとはばぱひびぴふぶぷへべぺほ
// カガキギクグケゲコゴサザシジスズセゼソゾタダチヂッツヅテデトハバパヒビピフブプヘベペホ
return ((codePoint >= 0x304B && codePoint <= 0x3068) ||
(codePoint >= 0x306F && codePoint <= 0x307B) ||
(codePoint >= 0x30AB && codePoint <= 0x30C8) ||
(codePoint >= 0x30CF && codePoint <= 0x30DB));
}
/**
* @param {number} codePoint
* @returns {boolean}
*/
function handakutenAllowed(codePoint) {
// To reduce processing time some characters which shouldn't have handakuten but are highly unlikely to have a combining character attached are included
// はばぱひびぴふぶぷへべぺほ
// ハバパヒビピフブプヘベペホ
return ((codePoint >= 0x306F && codePoint <= 0x307B) ||
(codePoint >= 0x30CF && codePoint <= 0x30DB));
}
/**
* @param {string} text
* @returns {string}
*/
export function normalizeCombiningCharacters(text) {
let result = '';
let i = text.length - 1;
// Ignoring the first character is intentional, it cannot combine with anything
while (i > 0) {
if (text[i] === '\u3099') {
const dakutenCombinee = text[i - 1].codePointAt(0);
if (dakutenCombinee && dakutenAllowed(dakutenCombinee)) {
result = String.fromCodePoint(dakutenCombinee + 1) + result;
i -= 2;
continue;
}
} else if (text[i] === '\u309A') {
const handakutenCombinee = text[i - 1].codePointAt(0);
if (handakutenCombinee && handakutenAllowed(handakutenCombinee)) {
result = String.fromCodePoint(handakutenCombinee + 2) + result;
i -= 2;
continue;
}
}
result = text[i] + result;
i--;
}
// i === -1 when first two characters are combined
if (i === 0) {
result = text[0] + result;
}
return result;
}
/**
* @param {string} text
* @returns {string}
*/
export function normalizeCJKCompatibilityCharacters(text) {
let result = '';
for (let i = 0; i < text.length; i++) {
const codePoint = text[i].codePointAt(0);
result += codePoint && isCodePointInRange(codePoint, CJK_COMPATIBILITY) ? text[i].normalize('NFKD') : text[i];
}
return result;
}
// Furigana distribution
/**
* @param {string} term
* @param {string} reading
* @returns {import('japanese-util').FuriganaSegment[]}
*/
export function distributeFurigana(term, reading) {
if (reading === term) {
// Same
return [createFuriganaSegment(term, '')];
}
/** @type {import('japanese-util').FuriganaGroup[]} */
const groups = [];
/** @type {?import('japanese-util').FuriganaGroup} */
let groupPre = null;
let isKanaPre = null;
for (const c of term) {
const codePoint = /** @type {number} */ (c.codePointAt(0));
const isKana = isCodePointKana(codePoint);
if (isKana === isKanaPre) {
/** @type {import('japanese-util').FuriganaGroup} */ (groupPre).text += c;
} else {
groupPre = {isKana, text: c, textNormalized: null};
groups.push(groupPre);
isKanaPre = isKana;
}
}
for (const group of groups) {
if (group.isKana) {
group.textNormalized = convertKatakanaToHiragana(group.text);
}
}
const readingNormalized = convertKatakanaToHiragana(reading);
const segments = segmentizeFurigana(reading, readingNormalized, groups, 0);
if (segments !== null) {
return segments;
}
// Fallback
return [createFuriganaSegment(term, reading)];
}
/**
* @param {string} term
* @param {string} reading
* @param {string} source
* @returns {import('japanese-util').FuriganaSegment[]}
*/
export function distributeFuriganaInflected(term, reading, source) {
const termNormalized = convertKatakanaToHiragana(term);
const readingNormalized = convertKatakanaToHiragana(reading);
const sourceNormalized = convertKatakanaToHiragana(source);
let mainText = term;
let stemLength = getStemLength(termNormalized, sourceNormalized);
// Check if source is derived from the reading instead of the term
const readingStemLength = getStemLength(readingNormalized, sourceNormalized);
if (readingStemLength > 0 && readingStemLength >= stemLength) {
mainText = reading;
stemLength = readingStemLength;
reading = `${source.substring(0, stemLength)}${reading.substring(stemLength)}`;
}
const segments = [];
if (stemLength > 0) {
mainText = `${source.substring(0, stemLength)}${mainText.substring(stemLength)}`;
const segments2 = distributeFurigana(mainText, reading);
let consumed = 0;
for (const segment of segments2) {
const {text} = segment;
const start = consumed;
consumed += text.length;
if (consumed < stemLength) {
segments.push(segment);
} else if (consumed === stemLength) {
segments.push(segment);
break;
} else {
if (start < stemLength) {
segments.push(createFuriganaSegment(mainText.substring(start, stemLength), ''));
}
break;
}
}
}
if (stemLength < source.length) {
const remainder = source.substring(stemLength);
const segmentCount = segments.length;
if (segmentCount > 0 && segments[segmentCount - 1].reading.length === 0) {
// Append to the last segment if it has an empty reading
segments[segmentCount - 1].text += remainder;
} else {
// Otherwise, create a new segment
segments.push(createFuriganaSegment(remainder, ''));
}
}
return segments;
}
// Miscellaneous
/**
* @param {number} codePoint
* @returns {boolean}
*/
export function isEmphaticCodePoint(codePoint) {
return (
codePoint === HIRAGANA_SMALL_TSU_CODE_POINT ||
codePoint === KATAKANA_SMALL_TSU_CODE_POINT ||
codePoint === KANA_PROLONGED_SOUND_MARK_CODE_POINT
);
}
/**
* @param {string} text
* @param {boolean} fullCollapse
* @returns {string}
*/
export function collapseEmphaticSequences(text, fullCollapse) {
let left = 0;
while (left < text.length && isEmphaticCodePoint(/** @type {number} */ (text.codePointAt(left)))) {
++left;
}
let right = text.length - 1;
while (right >= 0 && isEmphaticCodePoint(/** @type {number} */ (text.codePointAt(right)))) {
--right;
}
// Whole string is emphatic
if (left > right) {
return text;
}
const leadingEmphatics = text.substring(0, left);
const trailingEmphatics = text.substring(right + 1);
let middle = '';
let currentCollapsedCodePoint = -1;
for (let i = left; i <= right; ++i) {
const char = text[i];
const codePoint = /** @type {number} */ (char.codePointAt(0));
if (isEmphaticCodePoint(codePoint)) {
if (currentCollapsedCodePoint !== codePoint) {
currentCollapsedCodePoint = codePoint;
if (!fullCollapse) {
middle += char;
continue;
}
}
} else {
currentCollapsedCodePoint = -1;
middle += char;
}
}
return leadingEmphatics + middle + trailingEmphatics;
}

View File

@@ -0,0 +1,93 @@
/*
* Copyright (C) 2025 Yomitan Authors
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
import {suffixInflection} from '../language-transforms.js';
const suffixes = [
'ები',
'ებს',
'ებების', // plural suffixes
'მა', // ergative
'ს', // dative
'ის', // genitive
'ით', // instrumental
'ად', // adverbial
'ო', // vocative
'ში',
'ზე',
'შია',
'ზეა',
];
// Stem completion (for consonant endings)
const stemCompletionRules = [
suffixInflection('გნ', 'გნი', ['n', 'adj'], ['n', 'adj']),
suffixInflection('ნ', 'ნი', ['n', 'adj'], ['n', 'adj']),
];
// Vowel restoration example (optional, extend as needed)
const vowelRestorationRules = [
suffixInflection('გ', 'გა', ['n', 'adj'], ['n', 'adj']),
];
export const georgianTransforms = {
language: 'kat',
conditions: {
v: {
name: 'Verb',
isDictionaryForm: true,
},
n: {
name: 'Noun',
isDictionaryForm: true,
subConditions: ['np', 'ns'],
},
np: {
name: 'Noun plural',
isDictionaryForm: true,
},
ns: {
name: 'Noun singular',
isDictionaryForm: true,
},
adj: {
name: 'Adjective',
isDictionaryForm: true,
},
adv: {
name: 'Adverb',
isDictionaryForm: true,
},
},
transforms: {
nounAdjSuffixStripping: {
name: 'noun-adj-suffix-stripping',
description: 'Strip Georgian noun and adjective declension suffixes',
rules: suffixes.map((suffix) => suffixInflection(suffix, '', ['n', 'adj'], ['n', 'adj'])),
},
nounAdjStemCompletion: {
name: 'noun-adj-stem-completion',
description: 'Restore nominative suffix -ი for consonant-ending noun/adjective stems',
rules: stemCompletionRules,
},
vowelRestoration: {
name: 'vowel-restoration',
description: 'Restore truncated vowels if applicable',
rules: vowelRestorationRules,
},
},
};

View File

@@ -0,0 +1,38 @@
/*
* Copyright (C) 2024-2025 Yomitan Authors
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
import {Hangul} from '../../../lib/hangul-js.js';
/** @type {import('language').TextProcessor<boolean>} */
export const disassembleHangul = {
name: 'Disassemble Hangul',
description: 'Disassemble Hangul characters into jamo.',
options: [true], // Could probably also be set to [false, true], but this way it is always on
process: (str) => {
return Hangul.disassemble(str, false).join('');
},
};
/** @type {import('language').TextProcessor<boolean>} */
export const reassembleHangul = {
name: 'Reassemble Hangul',
description: 'Reassemble Hangul characters from jamo.',
options: [true], // Could probably also be set to [false, true], but this way it is always on
process: (str) => {
return Hangul.assemble(str);
},
};

File diff suppressed because it is too large Load Diff

56
vendor/yomitan/js/language/ko/korean.js vendored Normal file
View File

@@ -0,0 +1,56 @@
/*
* Copyright (C) 2024-2025 Yomitan Authors
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
import {CJK_IDEOGRAPH_RANGES, CJK_PUNCTUATION_RANGE, FULLWIDTH_CHARACTER_RANGES, isCodePointInRanges} from '../CJK-util.js';
/** @type {import('CJK-util').CodepointRange} */
const HANGUL_JAMO_RANGE = [0x1100, 0x11ff];
/** @type {import('CJK-util').CodepointRange} */
const HANGUL_COMPATIBILITY_JAMO_RANGE = [0x3130, 0x318f];
/** @type {import('CJK-util').CodepointRange} */
const HANGUL_SYLLABLES_RANGE = [0xac00, 0xd7af];
/** @type {import('CJK-util').CodepointRange} */
const HANGUL_JAMO_EXTENDED_A_RANGE = [0xa960, 0xa97f];
/** @type {import('CJK-util').CodepointRange} */
const HANGUL_JAMO_EXTENDED_B_RANGE = [0xd7b0, 0xd7ff];
/** @type {import('CJK-util').CodepointRange} */
const HANGUL_JAMO_HALF_WIDTH_RANGE = [0xffa0, 0xffdf];
/**
* Korean character ranges, roughly ordered in order of expected frequency.
* @type {import('CJK-util').CodepointRange[]}
*/
const KOREAN_RANGES = [
...CJK_IDEOGRAPH_RANGES,
CJK_PUNCTUATION_RANGE,
...FULLWIDTH_CHARACTER_RANGES,
HANGUL_JAMO_RANGE,
HANGUL_COMPATIBILITY_JAMO_RANGE,
HANGUL_SYLLABLES_RANGE,
HANGUL_JAMO_EXTENDED_A_RANGE,
HANGUL_JAMO_EXTENDED_B_RANGE,
HANGUL_JAMO_HALF_WIDTH_RANGE,
];
/**
* @param {number} codePoint
* @returns {boolean}
*/
export function isCodePointKorean(codePoint) {
return isCodePointInRanges(codePoint, KOREAN_RANGES);
}

View File

@@ -0,0 +1,39 @@
/*
* Copyright (C) 2024-2025 Yomitan Authors
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
/** @type {import('language').BidirectionalConversionPreprocessor} */
export const processDiphtongs = {
name: 'Convert æ to ae',
description: 'æ → ae, Æ → AE, œ → oe, Œ → OE',
options: ['off', 'direct', 'inverse'],
process: (str, setting) => {
switch (setting) {
case 'off':
return str;
case 'direct':
return str.replace(/æ/g, 'ae')
.replace(/Æ/g, 'AE')
.replace(/œ/g, 'oe')
.replace(/Œ/g, 'OE');
case 'inverse':
return str.replace(/ae/g, 'æ')
.replace(/AE/g, 'Æ')
.replace(/oe/g, 'œ')
.replace(/OE/g, 'Œ');
}
},
};

View File

@@ -0,0 +1,164 @@
/*
* Copyright (C) 2024-2025 Yomitan Authors
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
import {suffixInflection} from '../language-transforms.js';
// TODO: -ne suffix (estne, nonne)?
const conditions = {
v: {
name: 'Verb',
isDictionaryForm: true,
},
n: {
name: 'Noun',
isDictionaryForm: true,
subConditions: ['ns', 'np'],
},
ns: {
name: 'Noun, singular',
isDictionaryForm: true,
subConditions: ['n1s', 'n2s', 'n3s', 'n4s', 'n5s'],
},
np: {
name: 'Noun, plural',
isDictionaryForm: true,
subConditions: ['n1p', 'n2p', 'n3p', 'n4p', 'n5p'],
},
n1: {
name: 'Noun, 1st declension',
isDictionaryForm: true,
subConditions: ['n1s', 'n1p'],
},
n1p: {
name: 'Noun, 1st declension, plural',
isDictionaryForm: true,
},
n1s: {
name: 'Noun, 1st declension, singular',
isDictionaryForm: true,
},
n2: {
name: 'Noun, 2nd declension',
isDictionaryForm: true,
subConditions: ['n2s', 'n2p'],
},
n2p: {
name: 'Noun, 2nd declension, plural',
isDictionaryForm: true,
},
n2s: {
name: 'Noun, 2nd declension, singular',
isDictionaryForm: true,
},
n3: {
name: 'Noun, 3rd declension',
isDictionaryForm: true,
subConditions: ['n3s', 'n3p'],
},
n3p: {
name: 'Noun, 3rd declension, plural',
isDictionaryForm: true,
},
n3s: {
name: 'Noun, 3rd declension, singular',
isDictionaryForm: true,
},
n4: {
name: 'Noun, 4th declension',
isDictionaryForm: true,
subConditions: ['n4s', 'n4p'],
},
n4p: {
name: 'Noun, 4th declension, plural',
isDictionaryForm: true,
},
n4s: {
name: 'Noun, 4th declension, singular',
isDictionaryForm: true,
},
n5: {
name: 'Noun, 5th declension',
isDictionaryForm: true,
subConditions: ['n5s', 'n5p'],
},
n5p: {
name: 'Noun, 5th declension, plural',
isDictionaryForm: true,
},
n5s: {
name: 'Noun, 5th declension, singular',
isDictionaryForm: true,
},
adj: {
name: 'Adjective',
isDictionaryForm: true,
subConditions: ['adj3', 'adj12'],
},
adj12: {
name: 'Adjective, 1st-2nd declension',
isDictionaryForm: true,
},
adj3: {
name: 'Adjective, 3rd declension',
isDictionaryForm: true,
},
adv: {
name: 'Adverb',
isDictionaryForm: true,
},
};
/** @type {import('language-transformer').LanguageTransformDescriptor<keyof typeof conditions>} */
export const latinTransforms = {
language: 'la',
conditions,
transforms: {
plural: {
name: 'plural',
description: 'Plural declension',
rules: [
suffixInflection('i', 'us', ['n2p'], ['n2s']),
suffixInflection('i', 'us', ['adj12'], ['adj12']),
suffixInflection('e', '', ['n1p'], ['n1s']),
suffixInflection('ae', 'a', ['adj12'], ['adj12']),
suffixInflection('a', 'um', ['adj12'], ['adj12']),
],
},
feminine: {
name: 'feminine',
description: 'Adjective form',
rules: [
suffixInflection('a', 'us', ['adj12'], ['adj12']),
],
},
neuter: {
name: 'neuter',
description: 'Adjective form',
rules: [
suffixInflection('um', 'us', ['adj12'], ['adj12']),
],
},
ablative: {
name: 'ablative',
description: 'Ablative case',
rules: [
suffixInflection('o', 'um', ['n2s'], ['n2s']),
],
},
},
};

View File

@@ -0,0 +1,559 @@
/*
* Copyright (C) 2024-2025 Yomitan Authors
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
import {removeSyriacScriptDiacritics} from './aii/assyrian-neo-aramaic-text-preprocessors.js';
import {
addHamzaBottom,
addHamzaTop,
convertAlifMaqsuraToYaa,
convertHaToTaMarbuta,
normalizeUnicode,
removeArabicScriptDiacritics,
removeTatweel,
} from './ar/arabic-text-preprocessors.js';
import {arabicTransforms} from './ar/arabic-transforms.js';
import {normalizeRadicalCharacters} from './CJK-util.js';
import {eszettPreprocessor} from './de/german-text-preprocessors.js';
import {germanTransforms} from './de/german-transforms.js';
import {removeDoubleAcuteAccents} from './el/modern-greek-processors.js';
import {englishTransforms} from './en/english-transforms.js';
import {esperantoTransforms} from './eo/esperanto-transforms.js';
import {spanishTransforms} from './es/spanish-transforms.js';
import {apostropheVariants} from './fr/french-text-preprocessors.js';
import {frenchTransforms} from './fr/french-transforms.js';
import {irishTransforms} from './ga/irish-transforms.js';
import {convertLatinToGreek} from './grc/ancient-greek-processors.js';
import {ancientGreekTransforms} from './grc/ancient-greek-transforms.js';
import {
alphabeticToHiragana,
alphanumericWidthVariants,
collapseEmphaticSequences,
convertHalfWidthCharacters,
convertHiraganaToKatakana,
normalizeCJKCompatibilityCharacters,
normalizeCombiningCharacters,
standardizeKanji,
} from './ja/japanese-text-preprocessors.js';
import {japaneseTransforms} from './ja/japanese-transforms.js';
import {isStringPartiallyJapanese} from './ja/japanese.js';
import {georgianTransforms} from './ka/georgian-transforms.js';
import {disassembleHangul, reassembleHangul} from './ko/korean-text-processors.js';
import {koreanTransforms} from './ko/korean-transforms.js';
import {processDiphtongs} from './la/latin-text-preprocessors.js';
import {latinTransforms} from './la/latin-transforms.js';
import {removeRussianDiacritics, yoToE} from './ru/russian-text-preprocessors.js';
import {oldIrishTransforms} from './sga/old-irish-transforms.js';
import {removeSerboCroatianAccentMarks} from './sh/serbo-croatian-text-preprocessors.js';
import {albanianTransforms} from './sq/albanian-transforms.js';
import {capitalizeFirstLetter, decapitalize, removeAlphabeticDiacritics} from './text-processors.js';
import {tagalogTransforms} from './tl/tagalog-transforms.js';
import {normalizeDiacritics} from './vi/viet-text-preprocessors.js';
import {convertFinalLetters, convertYiddishLigatures} from './yi/yiddish-text-postprocessors.js';
import {combineYiddishLigatures, removeYiddishDiacritics} from './yi/yiddish-text-preprocessors.js';
import {yiddishTransforms} from './yi/yiddish-transforms.js';
import {isStringPartiallyChinese, normalizePinyin} from './zh/chinese.js';
const capitalizationPreprocessors = {
decapitalize,
capitalizeFirstLetter,
};
/** @type {import('language-descriptors').LanguageDescriptorAny[]} */
const languageDescriptors = [
{
iso: 'aii',
iso639_3: 'aii',
name: 'Assyrian Neo-Aramaic',
exampleText: 'ܟܵܬܹܒ݂',
textPreprocessors: {
removeSyriacScriptDiacritics,
},
},
{
iso: 'ar',
iso639_3: 'ara',
name: 'Arabic (MSA)',
exampleText: 'قَرَأَ',
textPreprocessors: {
removeArabicScriptDiacritics,
removeTatweel,
normalizeUnicode,
addHamzaTop,
addHamzaBottom,
convertAlifMaqsuraToYaa,
},
languageTransforms: arabicTransforms,
},
{
iso: 'arz',
iso639_3: 'arz',
name: 'Arabic (Egyptian)',
exampleText: 'قَرَأَ',
textPreprocessors: {
removeArabicScriptDiacritics,
removeTatweel,
normalizeUnicode,
addHamzaTop,
addHamzaBottom,
convertAlifMaqsuraToYaa,
convertHaToTaMarbuta,
},
languageTransforms: arabicTransforms,
},
{
iso: 'bg',
iso639_3: 'bul',
name: 'Bulgarian',
exampleText: 'чета',
textPreprocessors: capitalizationPreprocessors,
},
{
iso: 'cs',
iso639_3: 'ces',
name: 'Czech',
exampleText: 'číst',
textPreprocessors: capitalizationPreprocessors,
},
{
iso: 'da',
iso639_3: 'dan',
name: 'Danish',
exampleText: 'læse',
textPreprocessors: {
...capitalizationPreprocessors,
},
},
{
iso: 'de',
iso639_3: 'deu',
name: 'German',
exampleText: 'lesen',
textPreprocessors: {
...capitalizationPreprocessors,
eszettPreprocessor,
},
languageTransforms: germanTransforms,
},
{
iso: 'el',
iso639_3: 'ell',
name: 'Greek',
exampleText: 'διαβάζω',
textPreprocessors: {
...capitalizationPreprocessors,
removeDoubleAcuteAccents,
},
},
{
iso: 'en',
iso639_3: 'eng',
name: 'English',
exampleText: 'read',
textPreprocessors: capitalizationPreprocessors,
languageTransforms: englishTransforms,
},
{
iso: 'eo',
iso639_3: 'epo',
name: 'Esperanto',
exampleText: 'legi',
textPreprocessors: capitalizationPreprocessors,
languageTransforms: esperantoTransforms,
},
{
iso: 'es',
iso639_3: 'spa',
name: 'Spanish',
exampleText: 'leer',
textPreprocessors: capitalizationPreprocessors,
languageTransforms: spanishTransforms,
},
{
iso: 'et',
iso639_3: 'est',
name: 'Estonian',
exampleText: 'lugema',
textPreprocessors: capitalizationPreprocessors,
},
{
iso: 'fa',
iso639_3: 'fas',
name: 'Persian',
exampleText: 'خواندن',
textPreprocessors: {
removeArabicScriptDiacritics,
},
},
{
iso: 'fi',
iso639_3: 'fin',
name: 'Finnish',
exampleText: 'lukea',
textPreprocessors: capitalizationPreprocessors,
},
{
iso: 'fr',
iso639_3: 'fra',
name: 'French',
exampleText: 'lire',
textPreprocessors: {
...capitalizationPreprocessors,
apostropheVariants,
},
languageTransforms: frenchTransforms,
},
{
iso: 'ga',
iso639_3: 'gle',
name: 'Irish',
exampleText: 'léigh',
textPreprocessors: capitalizationPreprocessors,
languageTransforms: irishTransforms,
},
{
iso: 'grc',
iso639_3: 'grc',
name: 'Ancient Greek',
exampleText: 'γράφω', /* 'to write' */
textPreprocessors: {
...capitalizationPreprocessors,
removeAlphabeticDiacritics,
convertLatinToGreek,
},
languageTransforms: ancientGreekTransforms,
},
{
// no 2 letter iso for hawaiian
iso: 'haw',
iso639_3: 'haw',
name: 'Hawaiian',
exampleText: 'heluhelu',
textPreprocessors: capitalizationPreprocessors,
},
{
iso: 'he',
iso639_3: 'heb',
name: 'Hebrew',
exampleText: 'קריאה',
},
{
iso: 'hi',
iso639_3: 'hin',
name: 'Hindi',
exampleText: 'पढ़ने के लिए',
},
{
iso: 'hu',
iso639_3: 'hun',
name: 'Hungarian',
exampleText: 'olvasni',
textPreprocessors: capitalizationPreprocessors,
},
{
iso: 'id',
iso639_3: 'ind',
name: 'Indonesian',
exampleText: 'baca',
textPreprocessors: {
...capitalizationPreprocessors,
removeAlphabeticDiacritics,
},
},
{
iso: 'it',
iso639_3: 'ita',
name: 'Italian',
exampleText: 'leggere',
textPreprocessors: {
...capitalizationPreprocessors,
removeAlphabeticDiacritics,
},
},
{
iso: 'la',
iso639_3: 'lat',
name: 'Latin',
exampleText: 'legō',
textPreprocessors: {
...capitalizationPreprocessors,
removeAlphabeticDiacritics,
processDiphtongs,
},
languageTransforms: latinTransforms,
},
{
iso: 'lo',
iso639_3: 'lao',
name: 'Lao',
exampleText: 'ອ່ານ',
},
{
iso: 'lv',
iso639_3: 'lav',
name: 'Latvian',
exampleText: 'lasīt',
textPreprocessors: capitalizationPreprocessors,
},
{
iso: 'ja',
iso639_3: 'jpn',
name: 'Japanese',
exampleText: '読め',
isTextLookupWorthy: isStringPartiallyJapanese,
textPreprocessors: {
convertHalfWidthCharacters,
alphabeticToHiragana,
normalizeCombiningCharacters,
normalizeCJKCompatibilityCharacters,
normalizeRadicalCharacters,
alphanumericWidthVariants,
convertHiraganaToKatakana,
collapseEmphaticSequences,
standardizeKanji,
},
languageTransforms: japaneseTransforms,
},
{
iso: 'ka',
iso639_3: 'kat',
name: 'Georgian',
exampleText: 'კითხვა', // Georgian for “read”
languageTransforms: georgianTransforms,
},
{
iso: 'kn',
iso639_3: 'kan',
name: 'Kannada',
exampleText: 'ಓದು',
},
{
iso: 'km',
iso639_3: 'khm',
name: 'Khmer',
exampleText: 'អាន',
},
{
iso: 'ko',
iso639_3: 'kor',
name: 'Korean',
exampleText: '읽어',
textPreprocessors: {
disassembleHangul,
},
textPostprocessors: {
reassembleHangul,
},
languageTransforms: koreanTransforms,
},
{
iso: 'mn',
iso639_3: 'mon',
name: 'Mongolian',
exampleText: 'унших',
textPreprocessors: capitalizationPreprocessors,
},
{
iso: 'mt',
iso639_3: 'mlt',
name: 'Maltese',
exampleText: 'kiteb',
textPreprocessors: capitalizationPreprocessors,
},
{
iso: 'nl',
iso639_3: 'nld',
name: 'Dutch',
exampleText: 'lezen',
textPreprocessors: capitalizationPreprocessors,
},
{
iso: 'no',
iso639_3: 'nor',
name: 'Norwegian',
exampleText: 'lese',
textPreprocessors: {
...capitalizationPreprocessors,
},
},
{
iso: 'pl',
iso639_3: 'pol',
name: 'Polish',
exampleText: 'czytać',
textPreprocessors: capitalizationPreprocessors,
},
{
iso: 'pt',
iso639_3: 'por',
name: 'Portuguese',
exampleText: 'ler',
textPreprocessors: capitalizationPreprocessors,
},
{
iso: 'ro',
iso639_3: 'ron',
name: 'Romanian',
exampleText: 'citi',
textPreprocessors: {
...capitalizationPreprocessors,
removeAlphabeticDiacritics,
},
},
{
iso: 'ru',
iso639_3: 'rus',
name: 'Russian',
exampleText: 'читать',
textPreprocessors: {
...capitalizationPreprocessors,
yoToE,
removeRussianDiacritics,
},
},
{
iso: 'sga',
iso639_3: 'sga',
name: 'Old Irish',
exampleText: 'légaid',
textPreprocessors: {
...capitalizationPreprocessors,
removeAlphabeticDiacritics,
},
languageTransforms: oldIrishTransforms,
},
{
iso: 'sh',
iso639_3: 'hbs',
name: 'Serbo-Croatian',
exampleText: 'čìtati',
textPreprocessors: {
...capitalizationPreprocessors,
removeSerboCroatianAccentMarks,
},
},
{
iso: 'sq',
iso639_3: 'sqi',
name: 'Albanian',
exampleText: 'ndihmoj', /* 'to help' */
textPreprocessors: capitalizationPreprocessors,
languageTransforms: albanianTransforms,
},
{
iso: 'sv',
iso639_3: 'swe',
name: 'Swedish',
exampleText: 'läsa',
textPreprocessors: capitalizationPreprocessors,
},
{
iso: 'th',
iso639_3: 'tha',
name: 'Thai',
exampleText: 'อ่าน',
},
{
iso: 'tl',
iso639_3: 'tgl',
name: 'Tagalog',
exampleText: 'basahin',
textPreprocessors: {
...capitalizationPreprocessors,
removeAlphabeticDiacritics,
},
languageTransforms: tagalogTransforms,
},
{
iso: 'tr',
iso639_3: 'tur',
name: 'Turkish',
exampleText: 'okumak',
textPreprocessors: capitalizationPreprocessors,
},
{
iso: 'tok',
iso639_3: 'tok',
name: 'Toki Pona',
exampleText: 'wile',
textPreprocessors: capitalizationPreprocessors,
},
{
iso: 'uk',
iso639_3: 'ukr',
name: 'Ukrainian',
exampleText: 'читати',
textPreprocessors: capitalizationPreprocessors,
},
{
iso: 'vi',
iso639_3: 'vie',
name: 'Vietnamese',
exampleText: 'đọc',
textPreprocessors: {
...capitalizationPreprocessors,
normalizeDiacritics,
},
},
{
iso: 'cy',
iso639_3: 'cym',
name: 'Welsh',
exampleText: 'ddarllen',
textPreprocessors: capitalizationPreprocessors,
},
{
iso: 'yi',
iso639_3: 'yid',
name: 'Yiddish',
exampleText: 'באַשאַפֿן',
textPreprocessors: {
removeYiddishDiacritics,
combineYiddishLigatures,
},
textPostprocessors: {
convertFinalLetters,
convertYiddishLigatures,
},
languageTransforms: yiddishTransforms,
},
{
iso: 'yue',
iso639_3: 'yue',
name: 'Cantonese',
exampleText: '讀',
textPreprocessors: {
normalizeRadicalCharacters,
},
},
{
iso: 'zh',
iso639_3: 'zho',
name: 'Chinese',
exampleText: '读',
isTextLookupWorthy: isStringPartiallyChinese,
readingNormalizer: normalizePinyin,
textPreprocessors: {
normalizeRadicalCharacters,
},
},
];
/** @type {Map<string, import('language-descriptors').LanguageDescriptorAny>} */
export const languageDescriptorMap = new Map();
for (const languageDescriptor of languageDescriptors) {
languageDescriptorMap.set(languageDescriptor.iso, languageDescriptor);
}

View File

@@ -0,0 +1,273 @@
/*
* Copyright (C) 2024-2025 Yomitan Authors
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
import {log} from '../core/log.js';
export class LanguageTransformer {
constructor() {
/** @type {number} */
this._nextFlagIndex = 0;
/** @type {import('language-transformer-internal').Transform[]} */
this._transforms = [];
/** @type {Map<string, number>} */
this._conditionTypeToConditionFlagsMap = new Map();
/** @type {Map<string, number>} */
this._partOfSpeechToConditionFlagsMap = new Map();
}
/** */
clear() {
this._nextFlagIndex = 0;
this._transforms = [];
this._conditionTypeToConditionFlagsMap.clear();
this._partOfSpeechToConditionFlagsMap.clear();
}
/**
* @param {import('language-transformer').LanguageTransformDescriptor} descriptor
* @throws {Error}
*/
addDescriptor(descriptor) {
const {conditions, transforms} = descriptor;
const conditionEntries = Object.entries(conditions);
const {conditionFlagsMap, nextFlagIndex} = this._getConditionFlagsMap(conditionEntries, this._nextFlagIndex);
/** @type {import('language-transformer-internal').Transform[]} */
const transforms2 = [];
for (const [transformId, transform] of Object.entries(transforms)) {
const {name, description, rules} = transform;
/** @type {import('language-transformer-internal').Rule[]} */
const rules2 = [];
for (let j = 0, jj = rules.length; j < jj; ++j) {
const {type, isInflected, deinflect, conditionsIn, conditionsOut} = rules[j];
const conditionFlagsIn = this._getConditionFlagsStrict(conditionFlagsMap, conditionsIn);
if (conditionFlagsIn === null) { throw new Error(`Invalid conditionsIn for transform ${transformId}.rules[${j}]`); }
const conditionFlagsOut = this._getConditionFlagsStrict(conditionFlagsMap, conditionsOut);
if (conditionFlagsOut === null) { throw new Error(`Invalid conditionsOut for transform ${transformId}.rules[${j}]`); }
rules2.push({
type,
isInflected,
deinflect,
conditionsIn: conditionFlagsIn,
conditionsOut: conditionFlagsOut,
});
}
const isInflectedTests = rules.map((rule) => rule.isInflected);
const heuristic = new RegExp(isInflectedTests.map((regExp) => regExp.source).join('|'));
transforms2.push({id: transformId, name, description, rules: rules2, heuristic});
}
this._nextFlagIndex = nextFlagIndex;
for (const transform of transforms2) {
this._transforms.push(transform);
}
for (const [type, {isDictionaryForm}] of conditionEntries) {
const flags = conditionFlagsMap.get(type);
if (typeof flags === 'undefined') { continue; } // This case should never happen
this._conditionTypeToConditionFlagsMap.set(type, flags);
if (isDictionaryForm) {
this._partOfSpeechToConditionFlagsMap.set(type, flags);
}
}
}
/**
* @param {string[]} partsOfSpeech
* @returns {number}
*/
getConditionFlagsFromPartsOfSpeech(partsOfSpeech) {
return this._getConditionFlags(this._partOfSpeechToConditionFlagsMap, partsOfSpeech);
}
/**
* @param {string[]} conditionTypes
* @returns {number}
*/
getConditionFlagsFromConditionTypes(conditionTypes) {
return this._getConditionFlags(this._conditionTypeToConditionFlagsMap, conditionTypes);
}
/**
* @param {string} conditionType
* @returns {number}
*/
getConditionFlagsFromConditionType(conditionType) {
return this._getConditionFlags(this._conditionTypeToConditionFlagsMap, [conditionType]);
}
/**
* @param {string} sourceText
* @returns {import('language-transformer-internal').TransformedText[]}
*/
transform(sourceText) {
const results = [LanguageTransformer.createTransformedText(sourceText, 0, [])];
for (let i = 0; i < results.length; ++i) {
const {text, conditions, trace} = results[i];
for (const transform of this._transforms) {
if (!transform.heuristic.test(text)) { continue; }
const {id, rules} = transform;
for (let j = 0, jj = rules.length; j < jj; ++j) {
const rule = rules[j];
if (!LanguageTransformer.conditionsMatch(conditions, rule.conditionsIn)) { continue; }
const {isInflected, deinflect} = rule;
if (!isInflected.test(text)) { continue; }
const isCycle = trace.some((frame) => frame.transform === id && frame.ruleIndex === j && frame.text === text);
if (isCycle) {
log.warn(new Error(`Cycle detected in transform[${id}] rule[${j}] for text: ${text}\nTrace: ${JSON.stringify(trace)}`));
continue;
}
results.push(LanguageTransformer.createTransformedText(
deinflect(text),
rule.conditionsOut,
this._extendTrace(trace, {transform: id, ruleIndex: j, text}),
));
}
}
}
return results;
}
/**
* @param {string[]} inflectionRules
* @returns {import('dictionary').InflectionRuleChain}
*/
getUserFacingInflectionRules(inflectionRules) {
return inflectionRules.map((rule) => {
const fullRule = this._transforms.find((transform) => transform.id === rule);
if (typeof fullRule === 'undefined') { return {name: rule}; }
const {name, description} = fullRule;
return description ? {name, description} : {name};
});
}
/**
* @param {string} text
* @param {number} conditions
* @param {import('language-transformer-internal').Trace} trace
* @returns {import('language-transformer-internal').TransformedText}
*/
static createTransformedText(text, conditions, trace) {
return {text, conditions, trace};
}
/**
* If `currentConditions` is `0`, then `nextConditions` is ignored and `true` is returned.
* Otherwise, there must be at least one shared condition between `currentConditions` and `nextConditions`.
* @param {number} currentConditions
* @param {number} nextConditions
* @returns {boolean}
*/
static conditionsMatch(currentConditions, nextConditions) {
return currentConditions === 0 || (currentConditions & nextConditions) !== 0;
}
/**
* @param {import('language-transformer').ConditionMapEntries} conditions
* @param {number} nextFlagIndex
* @returns {{conditionFlagsMap: Map<string, number>, nextFlagIndex: number}}
* @throws {Error}
*/
_getConditionFlagsMap(conditions, nextFlagIndex) {
/** @type {Map<string, number>} */
const conditionFlagsMap = new Map();
/** @type {import('language-transformer').ConditionMapEntries} */
let targets = conditions;
while (targets.length > 0) {
const nextTargets = [];
for (const target of targets) {
const [type, condition] = target;
const {subConditions} = condition;
let flags = 0;
if (typeof subConditions === 'undefined') {
if (nextFlagIndex >= 32) {
// Flags greater than or equal to 32 don't work because JavaScript only supports up to 32-bit integer operations
throw new Error('Maximum number of conditions was exceeded');
}
flags = 1 << nextFlagIndex;
++nextFlagIndex;
} else {
const multiFlags = this._getConditionFlagsStrict(conditionFlagsMap, subConditions);
if (multiFlags === null) {
nextTargets.push(target);
continue;
} else {
flags = multiFlags;
}
}
conditionFlagsMap.set(type, flags);
}
if (nextTargets.length === targets.length) {
// Cycle in subRule declaration
throw new Error('Maximum number of conditions was exceeded');
}
targets = nextTargets;
}
return {conditionFlagsMap, nextFlagIndex};
}
/**
* @param {Map<string, number>} conditionFlagsMap
* @param {string[]} conditionTypes
* @returns {?number}
*/
_getConditionFlagsStrict(conditionFlagsMap, conditionTypes) {
let flags = 0;
for (const conditionType of conditionTypes) {
const flags2 = conditionFlagsMap.get(conditionType);
if (typeof flags2 === 'undefined') {
return null;
}
flags |= flags2;
}
return flags;
}
/**
* @param {Map<string, number>} conditionFlagsMap
* @param {string[]} conditionTypes
* @returns {number}
*/
_getConditionFlags(conditionFlagsMap, conditionTypes) {
let flags = 0;
for (const conditionType of conditionTypes) {
let flags2 = conditionFlagsMap.get(conditionType);
if (typeof flags2 === 'undefined') {
flags2 = 0;
}
flags |= flags2;
}
return flags;
}
/**
* @param {import('language-transformer-internal').Trace} trace
* @param {import('language-transformer-internal').TraceFrame} newFrame
* @returns {import('language-transformer-internal').Trace}
*/
_extendTrace(trace, newFrame) {
const newTrace = [newFrame];
for (const {transform, ruleIndex, text} of trace) {
newTrace.push({transform, ruleIndex, text});
}
return newTrace;
}
}

View File

@@ -0,0 +1,75 @@
/*
* Copyright (C) 2024-2025 Yomitan Authors
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
/**
* @template {string} TCondition
* @param {string} inflectedSuffix
* @param {string} deinflectedSuffix
* @param {TCondition[]} conditionsIn
* @param {TCondition[]} conditionsOut
* @returns {import('language-transformer').SuffixRule<TCondition>}
*/
export function suffixInflection(inflectedSuffix, deinflectedSuffix, conditionsIn, conditionsOut) {
const suffixRegExp = new RegExp(inflectedSuffix + '$');
return {
type: 'suffix',
isInflected: suffixRegExp,
deinflected: deinflectedSuffix,
deinflect: (text) => text.slice(0, -inflectedSuffix.length) + deinflectedSuffix,
conditionsIn,
conditionsOut,
};
}
/**
* @template {string} TCondition
* @param {string} inflectedPrefix
* @param {string} deinflectedPrefix
* @param {TCondition[]} conditionsIn
* @param {TCondition[]} conditionsOut
* @returns {import('language-transformer').Rule<TCondition>}
*/
export function prefixInflection(inflectedPrefix, deinflectedPrefix, conditionsIn, conditionsOut) {
const prefixRegExp = new RegExp('^' + inflectedPrefix);
return {
type: 'prefix',
isInflected: prefixRegExp,
deinflect: (text) => deinflectedPrefix + text.slice(inflectedPrefix.length),
conditionsIn,
conditionsOut,
};
}
/**
* @template {string} TCondition
* @param {string} inflectedWord
* @param {string} deinflectedWord
* @param {TCondition[]} conditionsIn
* @param {TCondition[]} conditionsOut
* @returns {import('language-transformer').Rule<TCondition>}
*/
export function wholeWordInflection(inflectedWord, deinflectedWord, conditionsIn, conditionsOut) {
const regex = new RegExp('^' + inflectedWord + '$');
return {
type: 'wholeWord',
isInflected: regex,
deinflect: () => deinflectedWord,
conditionsIn,
conditionsOut,
};
}

93
vendor/yomitan/js/language/languages.js vendored Executable file
View File

@@ -0,0 +1,93 @@
/*
* Copyright (C) 2024-2025 Yomitan Authors
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
import {languageDescriptorMap} from './language-descriptors.js';
/**
* @returns {import('language').LanguageSummary[]}
*/
export function getLanguageSummaries() {
const results = [];
for (const {name, iso, iso639_3, exampleText} of languageDescriptorMap.values()) {
results.push({name, iso, iso639_3, exampleText});
}
return results;
}
/**
* @returns {import('language').LanguageAndReadingNormalizer[]}
*/
export function getAllLanguageReadingNormalizers() {
const results = [];
for (const {iso, readingNormalizer} of languageDescriptorMap.values()) {
if (typeof readingNormalizer === 'undefined') { continue; }
results.push({iso, readingNormalizer});
}
return results;
}
/**
* @returns {import('language').LanguageAndProcessors[]}
* @throws {Error}
*/
export function getAllLanguageTextProcessors() {
const results = [];
for (const {iso, textPreprocessors = {}, textPostprocessors = {}} of languageDescriptorMap.values()) {
/** @type {import('language').TextProcessorWithId<unknown>[]} */
const textPreprocessorsArray = [];
for (const [id, textPreprocessor] of Object.entries(textPreprocessors)) {
textPreprocessorsArray.push({
id,
textProcessor: /** @type {import('language').TextProcessor<unknown>} */ (textPreprocessor),
});
}
/** @type {import('language').TextProcessorWithId<unknown>[]} */
const textPostprocessorsArray = [];
for (const [id, textPostprocessor] of Object.entries(textPostprocessors)) {
textPostprocessorsArray.push({
id,
textProcessor: /** @type {import('language').TextProcessor<unknown>} */ (textPostprocessor),
});
}
results.push({iso, textPreprocessors: textPreprocessorsArray, textPostprocessors: textPostprocessorsArray});
}
return results;
}
/**
* @param {string} text
* @param {string} language
* @returns {boolean}
*/
export function isTextLookupWorthy(text, language) {
const descriptor = languageDescriptorMap.get(language);
if (typeof descriptor === 'undefined') { return false; }
return typeof descriptor.isTextLookupWorthy === 'undefined' || descriptor.isTextLookupWorthy(text);
}
/**
* @returns {import('language').LanguageAndTransforms[]}
*/
export function getAllLanguageTransformDescriptors() {
const results = [];
for (const {iso, languageTransforms} of languageDescriptorMap.values()) {
if (languageTransforms) {
results.push({iso, languageTransforms});
}
}
return results;
}

View File

@@ -0,0 +1,90 @@
/*
* Copyright (C) 2024-2025 Yomitan Authors
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
import {LanguageTransformer} from './language-transformer.js';
import {getAllLanguageTransformDescriptors} from './languages.js';
export class MultiLanguageTransformer {
constructor() {
/** @type {Map<string, LanguageTransformer>} */
this._languageTransformers = new Map();
}
/** */
prepare() {
const languagesWithTransforms = getAllLanguageTransformDescriptors();
for (const {languageTransforms: descriptor} of languagesWithTransforms) {
const languageTransformer = new LanguageTransformer();
languageTransformer.addDescriptor(descriptor);
this._languageTransformers.set(descriptor.language, languageTransformer);
}
}
/**
* @param {string} language
* @param {string[]} partsOfSpeech
* @returns {number}
*/
getConditionFlagsFromPartsOfSpeech(language, partsOfSpeech) {
const languageTransformer = this._languageTransformers.get(language);
return typeof languageTransformer !== 'undefined' ? languageTransformer.getConditionFlagsFromPartsOfSpeech(partsOfSpeech) : 0;
}
/**
* @param {string} language
* @param {string[]} conditionTypes
* @returns {number}
*/
getConditionFlagsFromConditionTypes(language, conditionTypes) {
const languageTransformer = this._languageTransformers.get(language);
return typeof languageTransformer !== 'undefined' ? languageTransformer.getConditionFlagsFromConditionTypes(conditionTypes) : 0;
}
/**
* @param {string} language
* @param {string} conditionType
* @returns {number}
*/
getConditionFlagsFromConditionType(language, conditionType) {
const languageTransformer = this._languageTransformers.get(language);
return typeof languageTransformer !== 'undefined' ? languageTransformer.getConditionFlagsFromConditionType(conditionType) : 0;
}
/**
* @param {string} language
* @param {string} sourceText
* @returns {import('language-transformer-internal').TransformedText[]}
*/
transform(language, sourceText) {
const languageTransformer = this._languageTransformers.get(language);
if (typeof languageTransformer === 'undefined') { return [LanguageTransformer.createTransformedText(sourceText, 0, [])]; }
return languageTransformer.transform(sourceText);
}
/**
* @param {string} language
* @param {string[]} inflectionRules
* @returns {import('dictionary').InflectionRuleChain}
*/
getUserFacingInflectionRules(language, inflectionRules) {
const languageTransformer = this._languageTransformers.get(language);
if (typeof languageTransformer === 'undefined') {
return inflectionRules.map((rule) => ({name: rule}));
}
return languageTransformer.getUserFacingInflectionRules(inflectionRules);
}
}

View File

@@ -0,0 +1,45 @@
/*
* Copyright (C) 2024-2025 Yomitan Authors
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
import {basicTextProcessorOptions} from '../text-processors.js';
/** @type {import('language').TextProcessor<boolean>} */
export const removeRussianDiacritics = {
name: 'Remove diacritics',
description: 'A\u0301 → A, a\u0301 → a',
options: basicTextProcessorOptions,
process: (str, setting) => {
return setting ? str.replace(/\u0301/g, '') : str;
},
};
/** @type {import('language').BidirectionalConversionPreprocessor} */
export const yoToE = {
name: 'Convert "ё" to "е"',
description: 'ё → е, Ё → Е and vice versa',
options: ['off', 'direct', 'inverse'],
process: (str, setting) => {
switch (setting) {
case 'off':
return str;
case 'direct':
return str.replace(/ё/g, 'е').replace(/Ё/g, 'Е');
case 'inverse':
return str.replace(/е/g, 'ё').replace(/Е/g, 'Ё');
}
},
};

View File

@@ -0,0 +1,209 @@
/*
* Copyright (C) 2024-2025 Yomitan Authors
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
import {prefixInflection, suffixInflection} from '../language-transforms.js';
/** @typedef {keyof typeof conditions} Condition */
/**
* @param {boolean} notBeginning
* @param {string} originalOrthography
* @param {string} alternateOrthography
* @param {Condition[]} conditionsIn
* @param {Condition[]} conditionsOut
* @returns {import('language-transformer').Rule<Condition>}
*/
function tryAlternateOrthography(notBeginning, originalOrthography, alternateOrthography, conditionsIn, conditionsOut) {
const orthographyRegExp = notBeginning ? new RegExp('(?<!^)' + originalOrthography, 'g') : new RegExp(originalOrthography, 'g');
return {
type: 'other',
isInflected: orthographyRegExp,
deinflect: (text) => text.replace(orthographyRegExp, alternateOrthography),
conditionsIn,
conditionsOut,
};
}
const conditions = {};
/** @type {import('language-transformer').LanguageTransformDescriptor<Condition>} */
export const oldIrishTransforms = {
language: 'sga',
conditions,
transforms: {
'nd for nn': {
name: 'nd for nn',
description: 'nd for nn',
rules: [
suffixInflection('nd', 'nn', [], []),
],
},
'cg for c': {
name: 'cg for c',
description: 'cg for c',
rules: [
tryAlternateOrthography(false, 'cg', 'c', [], []),
],
},
'td for t': {
name: 'td for t',
description: 'td for t',
rules: [
tryAlternateOrthography(false, 'td', 't', [], []),
],
},
'pb for p': {
name: 'pb for p',
description: 'pb for p',
rules: [
tryAlternateOrthography(false, 'pb', 'p', [], []),
],
},
'ǽ/æ for é': {
name: 'ǽ/æ for é',
description: 'ǽ/æ for é',
rules: [
tryAlternateOrthography(false, 'ǽ', 'é', [], []),
tryAlternateOrthography(false, 'æ', 'é', [], []),
],
},
'doubled vowel': {
name: 'doubled vowel',
description: 'Doubled Vowel',
rules: [
tryAlternateOrthography(true, 'aa', 'á', [], []),
tryAlternateOrthography(true, 'ee', 'é', [], []),
tryAlternateOrthography(true, 'ii', 'í', [], []),
tryAlternateOrthography(true, 'oo', 'ó', [], []),
tryAlternateOrthography(true, 'uu', 'ú', [], []),
],
},
'doubled consonant': {
name: 'doubled consonant',
description: 'Doubled Consonant',
rules: [
tryAlternateOrthography(true, 'cc', 'c', [], []),
tryAlternateOrthography(true, 'pp', 'p', [], []),
tryAlternateOrthography(true, 'tt', 't', [], []),
tryAlternateOrthography(true, 'gg', 'g', [], []),
tryAlternateOrthography(true, 'bb', 'b', [], []),
tryAlternateOrthography(true, 'dd', 'd', [], []),
tryAlternateOrthography(true, 'rr', 'r', [], []),
tryAlternateOrthography(true, 'll', 'l', [], []),
tryAlternateOrthography(true, 'nn', 'n', [], []),
tryAlternateOrthography(true, 'mm', 'm', [], []),
tryAlternateOrthography(true, 'ss', 's', [], []),
],
},
'lenited': {
name: 'lenited',
description: 'Non-Beginning Lenition',
rules: [
tryAlternateOrthography(true, 'ch', 'c', [], []),
tryAlternateOrthography(true, 'ph', 'p', [], []),
tryAlternateOrthography(true, 'th', 't', [], []),
],
},
'lenited (Middle Irish)': {
name: 'lenited (Middle Irish)',
description: 'Non-Beginning Lenition (Middle Irish)',
rules: [
tryAlternateOrthography(true, 'gh', 'g', [], []),
tryAlternateOrthography(true, 'bh', 'b', [], []),
tryAlternateOrthography(true, 'dh', 'd', [], []),
],
},
'[IM] nasalized': {
name: '[IM] nasalized',
description: 'Nasalized Word',
rules: [
prefixInflection('ng', 'g', [], []),
prefixInflection('mb', 'b', [], []),
prefixInflection('nd', 'd', [], []),
prefixInflection('n-', '', [], []),
prefixInflection('m-', '', [], []),
],
},
'[IM] nasalized (Middle Irish)': {
name: '[IM] nasalized (Middle Irish)',
description: 'Nasalized Word (Middle Irish)',
rules: [
prefixInflection('gc', 'c', [], []),
prefixInflection('bp', 'p', [], []),
prefixInflection('dt', 'd', [], []),
],
},
'[IM] lenited': {
name: '[IM] lenited',
description: 'Lenited Word',
rules: [
prefixInflection('ch', 'c', [], []),
prefixInflection('ph', 'p', [], []),
prefixInflection('th', 't', [], []),
],
},
'[IM] lenited (Middle Irish)': {
name: '[IM] lenited (Middle Irish)',
description: 'Lenited Word (Middle Irish)',
rules: [
prefixInflection('gh', 'g', [], []),
prefixInflection('bh', 'b', [], []),
prefixInflection('dh', 'd', [], []),
],
},
'[IM] aspirated': {
name: '[IM] aspirated',
description: 'Aspirated Word',
rules: [
prefixInflection('ha', 'a', [], []),
prefixInflection('he', 'e', [], []),
prefixInflection('hi', 'i', [], []),
prefixInflection('ho', 'o', [], []),
prefixInflection('hu', 'u', [], []),
prefixInflection('h-', '', [], []),
],
},
'[IM] geminated': {
name: '[IM] geminated',
description: 'Geminated Word',
rules: [
prefixInflection('cc', 'c', [], []),
prefixInflection('pp', 'p', [], []),
prefixInflection('tt', 't', [], []),
prefixInflection('gg', 'g', [], []),
prefixInflection('bb', 'b', [], []),
prefixInflection('dd', 'd', [], []),
prefixInflection('rr', 'r', [], []),
prefixInflection('ll', 'l', [], []),
prefixInflection('nn', 'n', [], []),
prefixInflection('mm', 'm', [], []),
prefixInflection('ss', 's', [], []),
prefixInflection('c-c', 'c', [], []),
prefixInflection('p-p', 'p', [], []),
prefixInflection('t-t', 't', [], []),
prefixInflection('g-g', 'g', [], []),
prefixInflection('b-b', 'b', [], []),
prefixInflection('d-d', 'd', [], []),
prefixInflection('r-r', 'r', [], []),
prefixInflection('l-l', 'l', [], []),
prefixInflection('n-n', 'n', [], []),
prefixInflection('m-m', 'm', [], []),
prefixInflection('s-s', 's', [], []),
],
},
},
};

View File

@@ -0,0 +1,31 @@
/*
* Copyright (C) 2024-2025 Yomitan Authors
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
import {basicTextProcessorOptions} from '../text-processors.js';
/** @type {import('language').TextProcessor<boolean>} */
export const removeSerboCroatianAccentMarks = {
name: 'Remove diacritics',
description: 'A\u0301 → A, a\u0301 → a',
options: basicTextProcessorOptions,
process: (str, setting) => (
setting ?
str.normalize('NFD').replace(/[aeiourAEIOUR][\u0300-\u036f]/g, (match) => match[0]) :
str
),
};

View File

@@ -0,0 +1,355 @@
/*
* Copyright (C) 2024-2025 Yomitan Authors
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
import {suffixInflection} from '../language-transforms.js';
/** @typedef {keyof typeof conditions} Condition */
/**
* @param {string} inflectedSuffix
* @param {string} deinflectedSuffix
* @param {Condition[]} conditionsIn
* @param {Condition[]} conditionsOut
* @returns {import('language-transformer').Rule<Condition>}
*/
function conjugationIISuffixInflection(inflectedSuffix, deinflectedSuffix, conditionsIn, conditionsOut) {
return {
...suffixInflection(inflectedSuffix, deinflectedSuffix, conditionsIn, conditionsOut),
type: 'other',
isInflected: new RegExp('.*[^j]' + inflectedSuffix + '$'),
};
}
const conditions = {
v: {
name: 'Verb',
isDictionaryForm: true,
},
n: {
name: 'Noun',
isDictionaryForm: true,
subConditions: ['np', 'ns'],
},
np: {
name: 'Noun plural',
isDictionaryForm: true,
},
ns: {
name: 'Noun singular',
isDictionaryForm: true,
},
adj: {
name: 'Adjective',
isDictionaryForm: true,
},
adv: {
name: 'Adverb',
isDictionaryForm: true,
},
};
/** @type {import('language-transformer').LanguageTransformDescriptor<Condition>} */
export const albanianTransforms = {
language: 'sq',
conditions,
transforms: {
// Nouns
'definite': {
name: 'definite',
description: 'Definite form of a noun',
rules: [
// Masculine
suffixInflection('ku', 'k', [], ['n']),
suffixInflection('gu', 'g', [], ['n']),
suffixInflection('hu', 'h', [], ['n']),
suffixInflection('au', 'a', [], ['n']),
suffixInflection('iu', 'i', [], ['n']),
suffixInflection('eu', 'e', [], ['n']),
suffixInflection('i', 'ë', [], ['n']),
suffixInflection('i', '', [], ['n']),
suffixInflection('ri', '', [], ['n']),
suffixInflection('oi', 'ua', [], ['n']),
// Feminine
suffixInflection('a', 'ë', [], ['n']),
suffixInflection('a', '', [], ['n']),
suffixInflection('ja', 'e', [], ['n']),
],
},
'singular definite accusative': {
name: 'singular definite accusative',
description: 'Singular definite accusative form of a noun',
rules: [
suffixInflection('n', '', [], ['n']),
],
},
'plural': {
name: 'plural',
description: 'Plural form of a noun',
rules: [
suffixInflection('e', '', ['np'], ['ns']),
suffixInflection('t', '', ['np'], ['ns']),
],
},
// Verbs
'present indicative second-person singular': {
name: 'present indicative second-person singular',
description: 'Present indicative second-person singular form of a verb',
rules: [
suffixInflection('on', 'oj', [], ['v']),
suffixInflection('uan', 'uaj', [], ['v']),
suffixInflection('n', 'j', [], ['v']),
suffixInflection('hesh', 'hem', [], ['v']),
],
},
'present indicative third-person singular': {
name: 'present indicative third-person singular',
description: 'Present indicative third-person singular form of a verb',
rules: [
suffixInflection('on', 'oj', [], ['v']),
suffixInflection('uan', 'uaj', [], ['v']),
suffixInflection('n', 'j', [], ['v']),
suffixInflection('het', 'hem', [], ['v']),
],
},
'present indicative first-person plural': {
name: 'present indicative first-person plural',
description: 'Present indicative first-person plural form of a verb',
rules: [
suffixInflection('më', '', [], ['v']),
suffixInflection('im', '', [], ['v']),
suffixInflection('hemi', 'hem', [], ['v']),
],
},
'present indicative second-person plural': {
name: 'present indicative second-person plural',
description: 'Present indicative second-person plural form of a verb',
rules: [
suffixInflection('ni', 'j', [], ['v']),
suffixInflection('ni', '', [], ['v']),
suffixInflection('heni', 'hem', [], ['v']),
],
},
'present indicative third-person plural': {
name: 'present indicative third-person plural',
description: 'Present indicative third-person plural form of a verb',
rules: [
suffixInflection('në', '', [], ['v']),
suffixInflection('in', '', [], ['v']),
suffixInflection('hen', 'hem', [], ['v']),
],
},
'imperfect first-person singular indicative': {
name: 'imperfect first-person singular indicative',
description: 'Imperfect first-person singular indicative form of a verb',
rules: [
suffixInflection('ja', 'j', [], ['v']),
suffixInflection('ja', '', [], ['v']),
suffixInflection('hesha', 'hem', [], ['v']),
],
},
'imperfect second-person singular indicative': {
name: 'imperfect second-person singular indicative',
description: 'Imperfect second-person singular indicative form of a verb',
rules: [
suffixInflection('je', 'j', [], ['v']),
suffixInflection('je', '', [], ['v']),
suffixInflection('heshe', 'hem', [], ['v']),
],
},
'imperfect third-person singular indicative': {
name: 'imperfect third-person singular indicative',
description: 'Imperfect third-person singular indicative form of a verb',
rules: [
suffixInflection('nte', 'j', [], ['v']),
suffixInflection('te', '', [], ['v']),
suffixInflection('hej', 'hem', [], ['v']),
],
},
'imperfect first-person plural indicative': {
name: 'imperfect first-person plural indicative',
description: 'Imperfect first-person plural indicative form of a verb',
rules: [
suffixInflection('nim', 'j', [], ['v']),
suffixInflection('nim', '', [], ['v']),
suffixInflection('heshim', 'hem', [], ['v']),
],
},
'imperfect second-person plural indicative': {
name: 'imperfect second-person plural indicative',
description: 'Imperfect second-person plural indicative form of a verb',
rules: [
suffixInflection('nit', 'j', [], ['v']),
suffixInflection('nit', '', [], ['v']),
suffixInflection('heshit', 'hem', [], ['v']),
],
},
'imperfect third-person plural indicative': {
name: 'imperfect third-person plural indicative',
description: 'Imperfect third-person plural indicative form of a verb',
rules: [
suffixInflection('nin', 'j', [], ['v']),
suffixInflection('nin', '', [], ['v']),
suffixInflection('heshin', 'hem', [], ['v']),
],
},
'aorist first-person singular indicative': {
name: 'aorist first-person singular indicative',
description: 'Aorist first-person singular indicative form of a verb',
rules: [
suffixInflection('ova', 'uaj', [], ['v']),
suffixInflection('va', 'j', [], ['v']),
conjugationIISuffixInflection('a', '', [], ['v']),
],
},
'aorist second-person singular indicative': {
name: 'aorist second-person singular indicative',
description: 'Aorist second-person singular indicative form of a verb',
rules: [
suffixInflection('ove', 'uaj', [], ['v']),
suffixInflection('ve', 'j', [], ['v']),
conjugationIISuffixInflection('e', '', [], ['v']),
],
},
'aorist third-person singular indicative': {
name: 'aorist third-person singular indicative',
description: 'Aorist third-person singular indicative form of a verb',
rules: [
suffixInflection('oi', 'oj', [], ['v']),
suffixInflection('oi', 'uaj', [], ['v']),
suffixInflection('u', 'j', [], ['v']),
conjugationIISuffixInflection('i', '', [], ['v']),
suffixInflection('ye', 'ej', [], ['v']),
],
},
'aorist first-person plural indicative': {
name: 'aorist first-person plural indicative',
description: 'Aorist first-person plural indicative form of a verb',
rules: [
suffixInflection('uam', 'oj', [], ['v']),
suffixInflection('uam', 'uaj', [], ['v']),
suffixInflection('më', 'j', [], ['v']),
conjugationIISuffixInflection('ëm', '', [], ['v']),
],
},
'aorist second-person plural indicative': {
name: 'aorist second-person plural indicative',
description: 'Aorist second-person plural indicative form of a verb',
rules: [
suffixInflection('uat', 'oj', [], ['v']),
suffixInflection('uat', 'uaj', [], ['v']),
suffixInflection('të', 'j', [], ['v']),
conjugationIISuffixInflection('ët', '', [], ['v']),
],
},
'aorist third-person plural indicative': {
name: 'aorist third-person plural indicative',
description: 'Aorist third-person plural indicative form of a verb',
rules: [
suffixInflection('uan', 'oj', [], ['v']),
suffixInflection('uan', 'uaj', [], ['v']),
suffixInflection('në', 'j', [], ['v']),
conjugationIISuffixInflection('ën', '', [], ['v']),
],
},
'imperative second-person singular present': {
name: 'imperative second-person singular present',
description: 'Imperative second-person singular present form of a verb',
rules: [
suffixInflection('o', 'oj', [], ['v']),
suffixInflection('hu', 'hem', [], ['v']),
],
},
'imperative second-person plural present': {
name: 'imperative second-person plural present',
description: 'Imperative second-person plural present form of a verb',
rules: [
suffixInflection('ni', 'j', [], ['v']),
suffixInflection('ni', '', [], ['v']),
suffixInflection('huni', 'hem', [], ['v']),
],
},
'participle': {
name: 'participle',
description: 'Participle form of a verb',
rules: [
suffixInflection('uar', 'oj', [], ['v']),
suffixInflection('ur', '', [], ['v']),
suffixInflection('rë', 'j', [], ['v']),
suffixInflection('yer', 'ej', [], ['v']),
],
},
'mediopassive': {
name: 'mediopassive',
description: 'Mediopassive form of a verb',
rules: [
suffixInflection('hem', 'h', ['v'], ['v']),
suffixInflection('hem', 'j', ['v'], ['v']),
],
},
'optative first-person singular present': {
name: 'optative first-person singular present',
description: 'Optative first-person singular present form of a verb',
rules: [
suffixInflection('fsha', 'j', [], ['v']),
],
},
'optative second-person singular present': {
name: 'optative second-person singular present',
description: 'Optative second-person singular present form of a verb',
rules: [
suffixInflection('fsh', 'j', [], ['v']),
],
},
'optative third-person singular present': {
name: 'optative third-person singular present',
description: 'Optative third-person singular present form of a verb',
rules: [
suffixInflection('ftë', 'j', [], ['v']),
],
},
'optative first-person plural present': {
name: 'optative first-person plural present',
description: 'Optative first-person plural present form of a verb',
rules: [
suffixInflection('fshim', 'j', [], ['v']),
],
},
'optative second-person plural present': {
name: 'optative second-person plural present',
description: 'Optative second-person plural present form of a verb',
rules: [
suffixInflection('fshi', 'j', [], ['v']),
],
},
'optative third-person plural present': {
name: 'optative third-person plural present',
description: 'Optative third-person plural present form of a verb',
rules: [
suffixInflection('fshin', 'j', [], ['v']),
],
},
'nominalization': {
name: 'nominalization',
description: 'Noun form of a verb',
rules: [
suffixInflection('im', 'oj', [], ['v']),
suffixInflection('im', 'ej', [], ['v']),
suffixInflection('je', '', [], ['v']),
],
},
},
};

49
vendor/yomitan/js/language/text-processors.js vendored Executable file
View File

@@ -0,0 +1,49 @@
/*
* Copyright (C) 2024-2025 Yomitan Authors
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
/** @type {import('language').TextProcessorOptions<boolean>} */
export const basicTextProcessorOptions = [false, true];
/** @type {import('language').TextProcessor<boolean>} */
export const decapitalize = {
name: 'Decapitalize text',
description: 'CAPITALIZED TEXT → capitalized text',
options: basicTextProcessorOptions,
process: (str, setting) => (setting ? str.toLowerCase() : str),
};
/** @type {import('language').TextProcessor<boolean>} */
export const capitalizeFirstLetter = {
name: 'Capitalize first letter',
description: 'lowercase text → Lowercase text',
options: basicTextProcessorOptions,
process: (str, setting) => (setting ? str.charAt(0).toUpperCase() + str.slice(1) : str),
};
/**
* WARNING: This should NOT be used with languages that use Han characters,
* as it can result in undesirable normalization:
* - '\u9038'.normalize('NFD') => '\u9038' (逸)
* - '\ufa67'.normalize('NFD') => '\u9038' (逸 => 逸)
* @type {import('language').TextProcessor<boolean>}
*/
export const removeAlphabeticDiacritics = {
name: 'Remove Alphabetic Diacritics',
description: 'ἄήé -> αηe',
options: basicTextProcessorOptions,
process: (str, setting) => (setting ? str.normalize('NFD').replace(/[\u0300-\u036f]/g, '') : str),
};

1711
vendor/yomitan/js/language/text-scanner.js vendored Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,36 @@
/*
* Copyright (C) 2024-2025 Yomitan Authors
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
import {isStringPartiallyJapanese} from './ja/japanese.js';
import {isStringPartiallyChinese} from './zh/chinese.js';
/**
* Returns the language that the string might be by using some heuristic checks.
* Values returned are ISO codes. `null` is returned if no language can be determined.
* @param {string} text
* @param {?string} language
* @returns {?string}
*/
export function getLanguageFromText(text, language) {
const partiallyJapanese = isStringPartiallyJapanese(text);
const partiallyChinese = isStringPartiallyChinese(text);
if (!['zh', 'yue'].includes(language ?? '')) {
if (partiallyJapanese) { return 'ja'; }
if (partiallyChinese) { return 'zh'; }
}
return language;
}

View File

@@ -0,0 +1,718 @@
/*
* Copyright (C) 2024-2025 Yomitan Authors
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
import {prefixInflection, suffixInflection, wholeWordInflection} from '../language-transforms.js';
const CONSONANTS = 'bcdfghjklmnpqrstvwxyzBCDFGHJKLMNPQRSTVWXYZ';
const VOWELS = 'aeiou';
/**
* @param {string[]} conditionsIn
* @param {string[]} conditionsOut
* @returns {import('language-transformer').Rule}
*/
export function hyphenatedInflection(conditionsIn, conditionsOut) {
const regex = /-/;
return {
type: 'prefix',
isInflected: regex,
deinflect: (text) => text.replace(regex, ''),
conditionsIn,
conditionsOut,
};
}
/**
* @param {string} inflectedSuffix
* @param {string} deinflectedSuffix
* @param {string[]} conditionsIn
* @param {string[]} conditionsOut
* @returns {import('language-transformer').Rule}
*/
export function suffixInflectionWithOtoUSoundChange(inflectedSuffix, deinflectedSuffix, conditionsIn, conditionsOut) {
const regex = new RegExp(`u([${CONSONANTS}]+)${inflectedSuffix}$`);
return {
type: 'prefix',
isInflected: regex,
deinflect: (text) => text.replace(regex, `o$1${deinflectedSuffix}`),
conditionsIn,
conditionsOut,
};
}
/**
* Prefix inflection with repeated first syllable
* @param {string} inflectedPrefix
* @param {string} deinflectedPrefix
* @param {string[]} conditionsIn
* @param {string[]} conditionsOut
* @param {string} consonants
* @returns {import('language-transformer').Rule}
*/
export function prefixInflectionWithRep1(inflectedPrefix, deinflectedPrefix, conditionsIn, conditionsOut, consonants = CONSONANTS) {
const regex = new RegExp(`^(${inflectedPrefix})([${consonants}]*[${VOWELS}])(\\2)`);
return {
type: 'prefix',
isInflected: regex,
deinflect: (text) => text.replace(regex, `${deinflectedPrefix}$2`),
conditionsIn,
conditionsOut,
};
}
/**
* @param {string} inflectedPrefix
* @param {string} deinflectedPrefix
* @param {string} inflectedSuffix
* @param {string} deinflectedSuffix
* @param {string[]} conditionsIn
* @param {string[]} conditionsOut
* @returns {import('language-transformer').Rule}
*/
export function sandwichInflection(inflectedPrefix, deinflectedPrefix, inflectedSuffix, deinflectedSuffix, conditionsIn, conditionsOut) {
const regex = new RegExp(`^${inflectedPrefix}\\w+${inflectedSuffix}$`);
return {
type: 'other',
isInflected: regex,
deinflect: (text) => deinflectedPrefix + text.slice(inflectedPrefix.length, -inflectedSuffix.length) + deinflectedSuffix,
conditionsIn,
conditionsOut,
};
}
/**
* @param {string} inflectedPrefix
* @param {string} deinflectedPrefix
* @param {string} inflectedSuffix
* @param {string} deinflectedSuffix
* @param {string[]} conditionsIn
* @param {string[]} conditionsOut
* @returns {import('language-transformer').Rule}
*/
export function sandwichInflectionWithOtoUSoundChange(inflectedPrefix, deinflectedPrefix, inflectedSuffix, deinflectedSuffix, conditionsIn, conditionsOut) {
const regex = new RegExp(`^${inflectedPrefix}(\\w+)u([${CONSONANTS}]+)${inflectedSuffix}$`);
return {
type: 'prefix',
isInflected: regex,
deinflect: (text) => text.replace(regex, `${deinflectedPrefix}$1o$2${deinflectedSuffix}`),
conditionsIn,
conditionsOut,
};
}
/** @type {import('language-transformer').LanguageTransformDescriptor} */
export const tagalogTransforms = {
language: 'tl',
conditions: {
n: {
name: 'Noun',
isDictionaryForm: true,
subConditions: ['num'],
},
adj: {
name: 'Adjective',
isDictionaryForm: true,
},
num: {
name: 'Numeral',
isDictionaryForm: true,
},
},
transforms: {
'hyphenated': {
name: 'hyphenated',
description: 'hyphenated form of words',
rules: [
hyphenatedInflection([], []),
],
},
'-an': {
name: '-an',
rules: [
suffixInflection('an', '', [], ['n']),
suffixInflection('ran', 'd', [], ['n']),
suffixInflectionWithOtoUSoundChange('an', '', [], ['n']),
suffixInflectionWithOtoUSoundChange('ran', 'd', [], ['n']),
...[...'aeiou'].map((v) => suffixInflection(`${v}han`, `${v}`, [], ['n'])),
...[...'aeiou'].map((v) => suffixInflection(`${v}nan`, `${v}`, [], ['n'])),
suffixInflection('uhan', 'o', [], ['n']),
suffixInflection('unan', 'o', [], ['n']),
],
},
'-in': {
name: '-in',
rules: [
suffixInflection('in', '', [], ['n']),
suffixInflection('rin', 'd', [], ['n']),
suffixInflectionWithOtoUSoundChange('in', '', [], ['n']),
suffixInflectionWithOtoUSoundChange('rin', 'd', [], ['n']),
...[...'aeiou'].map((v) => suffixInflection(`${v}hin`, `${v}`, [], ['n'])),
...[...'aeiou'].map((v) => suffixInflection(`${v}nin`, `${v}`, [], ['n'])),
suffixInflection('uhin', 'o', [], ['n']),
suffixInflection('unin', 'o', [], ['n']),
],
},
'ma-': {
name: 'ma-',
rules: [
prefixInflection('ma', '', [], ['n', 'adj']),
prefixInflection('mar', 'd', [], ['n', 'adj']),
],
},
'pang-': {
name: 'pang-',
rules: [
prefixInflection('pang', '', [], ['n', 'adj']),
...[...'dlrst'].map((v) => prefixInflection(`pan${v}`, `${v}`, [], ['n', 'adj'])),
...[...'bp'].map((v) => prefixInflection(`pam${v}`, `${v}`, [], ['n', 'adj'])),
wholeWordInflection('pangalawa', 'dalawa', [], ['num']),
wholeWordInflection('pangatlo', 'tatlo', [], ['num']),
],
},
'ka-': {
name: 'ka-',
rules: [
prefixInflection('ka', '', [], ['n', 'adj']),
prefixInflection('kar', 'd', [], ['n', 'adj']),
],
},
'kaka-': {
name: 'kaka-',
rules: [
prefixInflection('kaka', '', [], ['n']),
prefixInflection('kakar', 'd', [], ['n']),
prefixInflectionWithRep1('ka', '', [], ['n']),
],
},
'ka-...-an': {
name: 'ka-...-an',
rules: [
sandwichInflection('ka', '', 'an', '', [], ['n']),
sandwichInflection('kar', 'd', 'an', '', [], ['n']),
sandwichInflection('ka', '', 'ran', 'd', [], ['n']),
sandwichInflection('kar', 'd', 'ran', 'd', [], ['n']),
...[...'aeiou'].map((v) => sandwichInflection('ka', '', `${v}han`, `${v}`, [], ['n'])),
...[...'aeiou'].map((v) => sandwichInflection('kar', 'd', `${v}han`, `${v}`, [], ['n'])),
...[...'aeiou'].map((v) => sandwichInflection('ka', '', `${v}nan`, `${v}`, [], ['n'])),
...[...'aeiou'].map((v) => sandwichInflection('kar', 'd', `${v}nan`, `${v}`, [], ['n'])),
sandwichInflection('ka', '', 'uhan', 'o', [], ['n']),
sandwichInflection('kar', 'd', 'uhan', 'o', [], ['n']),
sandwichInflection('ka', '', 'unan', 'o', [], ['n']),
sandwichInflection('kar', 'd', 'unan', 'o', [], ['n']),
sandwichInflectionWithOtoUSoundChange('ka', '', 'an', '', [], ['n']),
sandwichInflectionWithOtoUSoundChange('kar', 'd', 'an', '', [], ['n']),
sandwichInflectionWithOtoUSoundChange('ka', '', 'ran', 'd', [], ['n']),
sandwichInflectionWithOtoUSoundChange('kar', 'd', 'ran', 'd', [], ['n']),
],
},
'mag-': {
name: 'mag-',
rules: [
prefixInflection('mag', '', [], ['n']),
],
},
'mag- + rep1': {
name: 'mag- + rep1',
rules: [
prefixInflectionWithRep1('mag', '', [], ['n']),
],
},
'magka-': {
name: 'magka-',
rules: [
prefixInflection('magka', '', [], ['n', 'adj']),
prefixInflection('magkar', 'd', [], ['n', 'adj']),
],
},
'magkaka-': {
name: 'magkaka-',
rules: [
prefixInflection('magkaka', '', [], ['n', 'adj']),
prefixInflection('magkakar', 'd', [], ['n', 'adj']),
],
},
'mang- + rep1': {
name: 'mang- + rep1',
rules: [
prefixInflectionWithRep1('mang', '', [], ['n']),
prefixInflectionWithRep1('man', '', [], ['n'], 'dlrst'),
prefixInflectionWithRep1('mam', '', [], ['n'], 'bp'),
],
},
'pa-': {
name: 'pa-',
rules: [
prefixInflection('pa', '', [], ['n', 'adj']),
prefixInflection('par', 'd', [], ['n', 'adj']),
],
},
'pa-...-an': {
name: 'pa-...-an',
rules: [
sandwichInflection('pa', '', 'an', '', [], ['n']),
sandwichInflection('par', 'd', 'an', '', [], ['n']),
sandwichInflection('pa', '', 'ran', 'd', [], ['n']),
sandwichInflection('par', 'd', 'ran', 'd', [], ['n']),
...[...'aeiou'].map((v) => sandwichInflection('pa', '', `${v}han`, `${v}`, [], ['n'])),
...[...'aeiou'].map((v) => sandwichInflection('par', 'd', `${v}han`, `${v}`, [], ['n'])),
...[...'aeiou'].map((v) => sandwichInflection('pa', '', `${v}nan`, `${v}`, [], ['n'])),
...[...'aeiou'].map((v) => sandwichInflection('par', 'd', `${v}nan`, `${v}`, [], ['n'])),
sandwichInflection('pa', '', 'uhan', 'o', [], ['n']),
sandwichInflection('par', 'd', 'uhan', 'o', [], ['n']),
sandwichInflection('pa', '', 'unan', 'o', [], ['n']),
sandwichInflection('par', 'd', 'unan', 'o', [], ['n']),
sandwichInflectionWithOtoUSoundChange('pa', '', 'an', '', [], ['n']),
sandwichInflectionWithOtoUSoundChange('par', 'd', 'an', '', [], ['n']),
sandwichInflectionWithOtoUSoundChange('pa', '', 'ran', 'd', [], ['n']),
sandwichInflectionWithOtoUSoundChange('par', 'd', 'ran', 'd', [], ['n']),
],
},
'pag-': {
name: 'pag-',
rules: [
prefixInflection('pag', '', [], ['n']),
],
},
'pag- + rep1': {
name: 'pag- + rep1',
rules: [
prefixInflectionWithRep1('pag', '', [], ['n']),
],
},
'pagka-': {
name: 'pagka-',
rules: [
prefixInflection('pagka', '', [], ['n']),
prefixInflection('pagkar', 'd', [], ['n']),
prefixInflection('pagkaka', '', [], ['n']),
prefixInflection('pagkakar', 'd', [], ['n']),
],
},
'pakiki-': {
name: 'pakiki-',
rules: [
prefixInflection('pakiki', '', [], ['n']),
prefixInflectionWithRep1('pakiki', '', [], ['n']),
prefixInflection('pakikir', 'd', [], ['n']),
],
},
'pakikipag-': {
name: 'pakikipag-',
rules: [
prefixInflection('pakikipag', '', [], ['n']),
],
},
'pang- + rep1': {
name: 'pang- + rep1',
rules: [
prefixInflectionWithRep1('pang', '', [], ['n']),
prefixInflectionWithRep1('pan', '', [], ['n'], 'dlrst'),
prefixInflectionWithRep1('pam', '', [], ['n'], 'bp'),
],
},
'tag-': {
name: 'tag-',
rules: [
prefixInflection('tag', '', [], ['n']),
],
},
'taga-': {
name: 'taga-',
rules: [
prefixInflection('taga', '', [], ['n']),
],
},
'tagapag-': {
name: 'tagapag-',
rules: [
prefixInflection('tagapag', '', [], ['n']),
],
},
'tagapang-': {
name: 'tagapang-',
rules: [
prefixInflection('tagapang', '', [], ['n']),
...[...'dlrst'].map((v) => prefixInflection(`tagapan${v}`, `${v}`, [], ['n'])),
...[...'bp'].map((v) => prefixInflection(`tagapam${v}`, `${v}`, [], ['n'])),
],
},
'i-': {
name: 'i-',
rules: [
prefixInflection('i', '', [], ['n']),
],
},
'ika-': {
name: 'ika-',
rules: [
prefixInflection('ika', '', [], ['n']),
prefixInflection('ikar', 'd', [], ['n']),
wholeWordInflection('ikalawa', 'dalawa', [], ['num']),
wholeWordInflection('ikatlo', 'tatlo', [], ['num']),
],
},
'ipa-': {
name: 'ipa-',
rules: [
prefixInflection('ipa', '', [], ['n']),
],
},
'ipag-': {
name: 'ipag-',
rules: [
prefixInflection('ipag', '', [], ['n']),
],
},
'ipag- + rep1': {
name: 'ipag- + rep1',
rules: [
prefixInflectionWithRep1('ipag', '', [], ['n']),
],
},
'ipang-': {
name: 'ipang-',
rules: [
prefixInflection('ipang', '', [], ['n']),
...[...'dlrst'].map((v) => prefixInflection(`ipan${v}`, `${v}`, [], ['n'])),
...[...'bp'].map((v) => prefixInflection(`ipam${v}`, `${v}`, [], ['n'])),
],
},
'ma-...-an': {
name: 'ma-...-an',
rules: [
sandwichInflection('ma', '', 'an', '', [], ['n']),
sandwichInflection('mar', 'd', 'an', '', [], ['n']),
sandwichInflection('ma', '', 'ran', 'd', [], ['n']),
sandwichInflection('mar', 'd', 'ran', 'd', [], ['n']),
...[...'aeiou'].map((v) => sandwichInflection('ma', '', `${v}han`, `${v}`, [], ['n'])),
...[...'aeiou'].map((v) => sandwichInflection('mar', 'd', `${v}han`, `${v}`, [], ['n'])),
...[...'aeiou'].map((v) => sandwichInflection('ma', '', `${v}nan`, `${v}`, [], ['n'])),
...[...'aeiou'].map((v) => sandwichInflection('mar', 'd', `${v}nan`, `${v}`, [], ['n'])),
sandwichInflection('ma', '', 'uhan', 'o', [], ['n']),
sandwichInflection('mar', 'd', 'uhan', 'o', [], ['n']),
sandwichInflection('ma', '', 'unan', 'o', [], ['n']),
sandwichInflection('mar', 'd', 'unan', 'o', [], ['n']),
sandwichInflectionWithOtoUSoundChange('ma', '', 'an', '', [], ['n']),
sandwichInflectionWithOtoUSoundChange('mar', 'd', 'an', '', [], ['n']),
sandwichInflectionWithOtoUSoundChange('ma', '', 'ran', 'd', [], ['n']),
sandwichInflectionWithOtoUSoundChange('mar', 'd', 'ran', 'd', [], ['n']),
],
},
'mag-...-an': {
name: 'mag-...-an',
rules: [
sandwichInflection('mag', '', 'an', '', [], ['n']),
sandwichInflection('mag', '', 'ran', 'd', [], ['n']),
...[...'aeiou'].map((v) => sandwichInflection('mag', '', `${v}han`, `${v}`, [], ['n'])),
...[...'aeiou'].map((v) => sandwichInflection('mag', '', `${v}nan`, `${v}`, [], ['n'])),
sandwichInflection('mag', '', 'uhan', 'o', [], ['n']),
sandwichInflection('mag', '', 'unan', 'o', [], ['n']),
sandwichInflectionWithOtoUSoundChange('mag', '', 'an', '', [], ['n']),
sandwichInflectionWithOtoUSoundChange('mag', '', 'ran', 'd', [], ['n']),
],
},
'magkanda-': {
name: 'magkanda-',
rules: [
prefixInflection('magkanda', '', [], ['n']),
prefixInflection('magkandar', 'd', [], ['n']),
],
},
'magma-': {
name: 'magma-',
rules: [
prefixInflection('magma', '', [], ['n']),
prefixInflection('magmar', 'd', [], ['n']),
],
},
'magpa-': {
name: 'magpa-',
rules: [
prefixInflection('magpa', '', [], ['n']),
prefixInflection('magpar', 'd', [], ['n']),
],
},
'magpaka-': {
name: 'magpaka-',
rules: [
prefixInflection('magpaka', '', [], ['n']),
prefixInflection('magpakar', 'd', [], ['n']),
],
},
'magsi-': {
name: 'magsi-',
rules: [
prefixInflection('magsi', '', [], ['n']),
prefixInflection('magsipag', '', [], ['n']),
],
},
'makapang-': {
name: 'makapang-',
rules: [
prefixInflection('makapang', '', [], ['n']),
...[...'dlrst'].map((v) => prefixInflection(`makapan${v}`, `${v}`, [], ['n'])),
...[...'bp'].map((v) => prefixInflection(`makapam${v}`, `${v}`, [], ['n'])),
],
},
'makapag-': {
name: 'makapag-',
rules: [
prefixInflection('makapag', '', [], ['n']),
],
},
'maka-': {
name: 'maka-',
rules: [
prefixInflection('maka', '', [], ['n', 'adj']),
prefixInflection('makar', 'd', [], ['n', 'adj']),
],
},
'maki-': {
name: 'maki-',
rules: [
prefixInflection('maki', '', [], ['n']),
prefixInflection('makir', 'd', [], ['n']),
],
},
'makipag-': {
name: 'makipag-',
rules: [
prefixInflection('makipag', '', [], ['n']),
],
},
'makipag-...-an': {
name: 'makipag-...-an',
rules: [
sandwichInflection('makipag', '', 'an', '', [], ['n']),
sandwichInflection('makipag', '', 'ran', 'd', [], ['n']),
...[...'aeiou'].map((v) => sandwichInflection('makipag', '', `${v}han`, `${v}`, [], ['n'])),
...[...'aeiou'].map((v) => sandwichInflection('makipag', '', `${v}nan`, `${v}`, [], ['n'])),
sandwichInflection('makipag', '', 'uhan', 'o', [], ['n']),
sandwichInflection('makipag', '', 'unan', 'o', [], ['n']),
sandwichInflectionWithOtoUSoundChange('makipag', '', 'an', '', [], ['n']),
sandwichInflectionWithOtoUSoundChange('makipag', '', 'ran', 'd', [], ['n']),
],
},
'mang-': {
name: 'mang-',
rules: [
prefixInflection('mang', '', [], ['n']),
...[...'dlrst'].map((v) => prefixInflection(`man${v}`, `${v}`, [], ['n'])),
...[...'bp'].map((v) => prefixInflection(`mam${v}`, `${v}`, [], ['n'])),
],
},
'mapa-': {
name: 'mapa-',
rules: [
prefixInflection('mapa', '', [], ['n']),
prefixInflection('mapar', 'd', [], ['n']),
],
},
'pa-...-in': {
name: 'pa-...-in',
rules: [
sandwichInflection('pa', '', 'in', '', [], ['n']),
sandwichInflection('par', 'd', 'in', '', [], ['n']),
sandwichInflection('pa', '', 'rin', 'd', [], ['n']),
sandwichInflection('par', 'd', 'rin', 'd', [], ['n']),
...[...'aeiou'].map((v) => sandwichInflection('pa', '', `${v}hin`, `${v}`, [], ['n'])),
...[...'aeiou'].map((v) => sandwichInflection('par', 'd', `${v}hin`, `${v}`, [], ['n'])),
...[...'aeiou'].map((v) => sandwichInflection('pa', '', `${v}nin`, `${v}`, [], ['n'])),
...[...'aeiou'].map((v) => sandwichInflection('par', 'd', `${v}nin`, `${v}`, [], ['n'])),
sandwichInflection('pa', '', 'uhin', 'o', [], ['n']),
sandwichInflection('par', 'd', 'uhin', 'o', [], ['n']),
sandwichInflection('pa', '', 'unin', 'o', [], ['n']),
sandwichInflection('par', 'd', 'unin', 'o', [], ['n']),
sandwichInflectionWithOtoUSoundChange('pa', '', 'in', '', [], ['n']),
sandwichInflectionWithOtoUSoundChange('par', 'd', 'in', '', [], ['n']),
sandwichInflectionWithOtoUSoundChange('pa', '', 'rin', 'd', [], ['n']),
sandwichInflectionWithOtoUSoundChange('par', 'd', 'rin', 'd', [], ['n']),
],
},
'pag-...-an': {
name: 'pag-...-an',
rules: [
sandwichInflection('pag', '', 'an', '', [], ['n']),
sandwichInflection('pag', '', 'ran', 'd', [], ['n']),
...[...'aeiou'].map((v) => sandwichInflection('pag', '', `${v}han`, `${v}`, [], ['n'])),
...[...'aeiou'].map((v) => sandwichInflection('pag', '', `${v}nan`, `${v}`, [], ['n'])),
sandwichInflection('pag', '', 'uhan', 'o', [], ['n']),
sandwichInflection('pag', '', 'unan', 'o', [], ['n']),
sandwichInflectionWithOtoUSoundChange('pag', '', 'an', '', [], ['n']),
sandwichInflectionWithOtoUSoundChange('pag', '', 'ran', 'd', [], ['n']),
],
},
'pang-...-an': {
name: 'pang-...-an',
rules: [
sandwichInflection('pang', '', 'an', '', [], ['n']),
sandwichInflection('pang', '', 'ran', 'd', [], ['n']),
...[...'aeiou'].map((v) => sandwichInflection('pang', '', `${v}han`, `${v}`, [], ['n'])),
...[...'aeiou'].map((v) => sandwichInflection('pang', '', `${v}nan`, `${v}`, [], ['n'])),
sandwichInflection('pang', '', 'uhan', 'o', [], ['n']),
sandwichInflection('pang', '', 'unan', 'o', [], ['n']),
sandwichInflectionWithOtoUSoundChange('pang', '', 'an', '', [], ['n']),
sandwichInflectionWithOtoUSoundChange('pang', '', 'ran', 'd', [], ['n']),
...[...'dlrst'].flatMap((v) => [
sandwichInflection(`pan${v}`, `${v}`, 'an', '', [], ['n']),
sandwichInflection(`pan${v}`, `${v}`, 'ran', 'd', [], ['n']),
...[...'aeiou'].map((k) => sandwichInflection(`pan${v}`, `${v}`, `${k}han`, `${k}`, [], ['n'])),
...[...'aeiou'].map((k) => sandwichInflection(`pan${v}`, `${v}`, `${k}nan`, `${k}`, [], ['n'])),
sandwichInflection(`pan${v}`, '', 'uhan', 'o', [], ['n']),
sandwichInflection(`pan${v}`, '', 'unan', 'o', [], ['n']),
sandwichInflectionWithOtoUSoundChange(`pan${v}`, `${v}`, 'an', '', [], ['n']),
sandwichInflectionWithOtoUSoundChange(`pan${v}`, `${v}`, 'ran', 'd', [], ['n']),
]),
...[...'bp'].flatMap((v) => [
sandwichInflection(`pam${v}`, `${v}`, 'an', '', [], ['n']),
sandwichInflection(`pam${v}`, `${v}`, 'ran', 'd', [], ['n']),
...[...'aeiou'].map((k) => sandwichInflection(`pam${v}`, `${v}`, `${k}han`, `${k}`, [], ['n'])),
...[...'aeiou'].map((k) => sandwichInflection(`pam${v}`, `${v}`, `${k}nan`, `${k}`, [], ['n'])),
sandwichInflection(`pam${v}`, '', 'uhan', 'o', [], ['n']),
sandwichInflection(`pam${v}`, '', 'unan', 'o', [], ['n']),
sandwichInflectionWithOtoUSoundChange(`pam${v}`, `${v}`, 'an', '', [], ['n']),
sandwichInflectionWithOtoUSoundChange(`pam${v}`, `${v}`, 'ran', 'd', [], ['n']),
]),
],
},
'pag-...-in': {
name: 'pag-...-in',
rules: [
sandwichInflection('pag', '', 'in', '', [], ['n']),
sandwichInflection('pag', '', 'rin', 'd', [], ['n']),
...[...'aeiou'].map((v) => sandwichInflection('pag', '', `${v}hin`, `${v}`, [], ['n'])),
...[...'aeiou'].map((v) => sandwichInflection('pag', '', `${v}nin`, `${v}`, [], ['n'])),
sandwichInflection('pag', '', 'uhin', 'o', [], ['n']),
sandwichInflection('pag', '', 'unin', 'o', [], ['n']),
sandwichInflectionWithOtoUSoundChange('pag', '', 'in', '', [], ['n']),
sandwichInflectionWithOtoUSoundChange('pag', '', 'rin', 'd', [], ['n']),
],
},
'papang-...-in': {
name: 'papang-...-in',
rules: [
sandwichInflection('papang', '', 'in', '', [], ['n']),
sandwichInflection('papang', '', 'rin', 'd', [], ['n']),
...[...'aeiou'].map((v) => sandwichInflection('papang', '', `${v}hin`, `${v}`, [], ['n'])),
...[...'aeiou'].map((v) => sandwichInflection('papang', '', `${v}nin`, `${v}`, [], ['n'])),
sandwichInflection('papang', '', 'uhin', 'o', [], ['n']),
sandwichInflection('papang', '', 'unin', 'o', [], ['n']),
sandwichInflectionWithOtoUSoundChange('papang', '', 'in', '', [], ['n']),
sandwichInflectionWithOtoUSoundChange('papang', '', 'rin', 'd', [], ['n']),
...[...'dlrst'].flatMap((v) => [
sandwichInflection(`papan${v}`, `${v}`, 'in', '', [], ['n']),
sandwichInflection(`papan${v}`, `${v}`, 'rin', 'd', [], ['n']),
...[...'aeiou'].map((k) => sandwichInflection(`papan${v}`, `${v}`, `${k}hin`, `${k}`, [], ['n'])),
...[...'aeiou'].map((k) => sandwichInflection(`papan${v}`, `${v}`, `${k}nin`, `${k}`, [], ['n'])),
sandwichInflection(`papan${v}`, '', 'uhin', 'o', [], ['n']),
sandwichInflection(`papan${v}`, '', 'unin', 'o', [], ['n']),
sandwichInflectionWithOtoUSoundChange(`papan${v}`, `${v}`, 'in', '', [], ['n']),
sandwichInflectionWithOtoUSoundChange(`papan${v}`, `${v}`, 'rin', 'd', [], ['n']),
]),
...[...'bp'].flatMap((v) => [
sandwichInflection(`papam${v}`, `${v}`, 'in', '', [], ['n']),
sandwichInflection(`papam${v}`, `${v}`, 'rin', 'd', [], ['n']),
...[...'aeiou'].map((k) => sandwichInflection(`papam${v}`, `${v}`, `${k}hin`, `${k}`, [], ['n'])),
...[...'aeiou'].map((k) => sandwichInflection(`papam${v}`, `${v}`, `${k}nin`, `${k}`, [], ['n'])),
sandwichInflection(`papam${v}`, '', 'uhin', 'o', [], ['n']),
sandwichInflection(`papam${v}`, '', 'unin', 'o', [], ['n']),
sandwichInflectionWithOtoUSoundChange(`papam${v}`, `${v}`, 'in', '', [], ['n']),
sandwichInflectionWithOtoUSoundChange(`papam${v}`, `${v}`, 'rin', 'd', [], ['n']),
]),
],
},
'ma-...-in': {
name: 'ma-...-in',
rules: [
sandwichInflection('ma', '', 'in', '', [], ['n', 'adj']),
sandwichInflection('mar', 'd', 'in', '', [], ['n', 'adj']),
sandwichInflection('ma', '', 'rin', 'd', [], ['n', 'adj']),
sandwichInflection('mar', 'd', 'rin', 'd', [], ['n', 'adj']),
...[...'aeiou'].map((v) => sandwichInflection('ma', '', `${v}hin`, `${v}`, [], ['n', 'adj'])),
...[...'aeiou'].map((v) => sandwichInflection('mar', 'd', `${v}hin`, `${v}`, [], ['n', 'adj'])),
...[...'aeiou'].map((v) => sandwichInflection('ma', '', `${v}nin`, `${v}`, [], ['n', 'adj'])),
...[...'aeiou'].map((v) => sandwichInflection('mar', 'd', `${v}nin`, `${v}`, [], ['n', 'adj'])),
sandwichInflection('ma', '', 'uhin', 'o', [], ['n', 'adj']),
sandwichInflection('mar', 'd', 'uhin', 'o', [], ['n', 'adj']),
sandwichInflection('ma', '', 'unin', 'o', [], ['n', 'adj']),
sandwichInflection('mar', 'd', 'unin', 'o', [], ['n', 'adj']),
sandwichInflectionWithOtoUSoundChange('ma', '', 'in', '', [], ['n', 'adj']),
sandwichInflectionWithOtoUSoundChange('mar', 'd', 'in', '', [], ['n', 'adj']),
sandwichInflectionWithOtoUSoundChange('ma', '', 'rin', 'd', [], ['n', 'adj']),
sandwichInflectionWithOtoUSoundChange('mar', 'd', 'rin', 'd', [], ['n', 'adj']),
],
},
'mapag-': {
name: 'mapag-',
rules: [
prefixInflection('mapag', '', [], ['n', 'adj']),
],
},
'naka-': {
name: 'naka-',
rules: [
prefixInflection('naka', '', [], ['n', 'adj']),
prefixInflection('nakar', 'd', [], ['n', 'adj']),
],
},
'nakaka-': {
name: 'nakaka-',
rules: [
prefixInflection('nakaka', '', [], ['n', 'adj']),
prefixInflection('nakakar', 'd', [], ['n', 'adj']),
],
},
'nakakapang-': {
name: 'nakakapang-',
rules: [
prefixInflection('nakakapang', '', [], ['n', 'adj']),
...[...'dlrst'].map((v) => prefixInflection(`nakakapan${v}`, `${v}`, [], ['n', 'adj'])),
...[...'bp'].map((v) => prefixInflection(`nakakapam${v}`, `${v}`, [], ['n', 'adj'])),
],
},
'naka- + rep1': {
name: 'naka- + rep1',
rules: [
prefixInflectionWithRep1('naka', '', [], ['n', 'adj']),
],
},
'nakapang- + rep1': {
name: 'nakapang- + rep1',
rules: [
prefixInflectionWithRep1('nakapang', '', [], ['n', 'adj']),
prefixInflectionWithRep1('nakapan', '', [], ['n', 'adj'], 'dlrst'),
prefixInflectionWithRep1('nakapam', '', [], ['n', 'adj'], 'bp'),
],
},
'pala-': {
name: 'pala-',
rules: [
prefixInflection('pala', '', [], ['n', 'adj']),
prefixInflection('palar', 'd', [], ['n', 'adj']),
],
},
'-ng': {
name: 'ng',
rules: [
suffixInflection('ng', '', [], []),
],
},
},
};

2482
vendor/yomitan/js/language/translator.js vendored Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,58 @@
/*
* Copyright (C) 2024-2025 Yomitan Authors
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
const TONE = '([\u0300\u0309\u0303\u0301\u0323])'; // Huyền, hỏi, ngã, sắc, nặng
const COMBINING_BREVE = '\u0306'; // Ă
const COMBINING_CIRCUMFLEX_ACCENT = '\u0302'; // Â
const COMBINING_HORN = '\u031B'; // Ơ
const DIACRITICS = `${COMBINING_BREVE}${COMBINING_CIRCUMFLEX_ACCENT}${COMBINING_HORN}`;
// eslint-disable-next-line no-misleading-character-class
const re1 = new RegExp(`${TONE}([aeiouy${DIACRITICS}]+)`, 'i');
const re2 = new RegExp(`(?<=[${DIACRITICS}])(.)${TONE}`, 'i');
const re3 = new RegExp(`(?<=[ae])([iouy])${TONE}`, 'i');
const re4 = new RegExp(`(?<=[oy])([iuy])${TONE}`, 'i');
const re5 = new RegExp(`(?<!q)(u)([aeiou])${TONE}`, 'i');
const re6 = new RegExp(`(?<!g)(i)([aeiouy])${TONE}`, 'i');
const re7 = new RegExp(`(?<!q)([ou])([aeoy])${TONE}(?!\\w)`, 'i');
/**
* This function is adapted from https://github.com/enricobarzetti/viet_text_tools/blob/master/viet_text_tools/__init__.py
* @type {import('language').TextProcessor<'old'|'new'|'off'>}
*/
export const normalizeDiacritics = {
name: 'Normalize Diacritics',
description: 'Normalize diacritics and their placements (in either the old style or new style). NFC normalization is used.',
options: ['old', 'new', 'off'],
process: (str, setting) => {
if (setting === 'off') { return str; }
let result = str.normalize('NFD');
// Put the tone on the second vowel
result = result.replace(re1, '$2$1');
// Put the tone on the vowel with a diacritic
result = result.replace(re2, '$2$1');
// For vowels that are not oa, oe, uy put the tone on the penultimate vowel
result = result.replace(re3, '$2$1');
result = result.replace(re4, '$2$1');
result = result.replace(re5, '$1$3$2');
result = result.replace(re6, '$1$3$2');
if (setting === 'old') { result = result.replace(re7, '$1$3$2'); }
return result.normalize('NFC');
},
};

View File

@@ -0,0 +1,72 @@
/*
* Copyright (C) 2024-2025 Yomitan Authors
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
const final_letter_map = new Map([
['\u05de', '\u05dd'], // מ to ם
['\u05e0', '\u05df'], // נ to ן
['\u05e6', '\u05e5'], // צ to ץ
['\u05e4', '\u05e3'], // פ to ף
['\u05dB', '\u05da'], // כ to ך
]);
const ligatures = [
{lig: '\u05f0', split: '\u05d5' + '\u05d5'}, // װ -> וו
{lig: '\u05f1', split: '\u05d5' + '\u05d9'}, // ױ -> וי
{lig: '\u05f2', split: '\u05d9' + '\u05d9'}, // ײ -> יי
{lig: '\ufb1d', split: '\u05d9' + '\u05b4'}, // יִ -> יִ
{lig: '\ufb1f', split: '\u05d9' + '\u05d9' + '\u05b7'}, // ײַ -> ייַ
{lig: '\ufb2e', split: '\u05d0' + '\u05b7'}, // Pasekh alef
{lig: '\ufb2f', split: '\u05d0' + '\u05b8'}, // Komets alef
];
/** @type {import('language').TextProcessor<boolean>} */
export const convertFinalLetters = {
name: 'Convert to Final Letters',
description: 'קויף → קויפֿ',
options: [true],
process: (str) => {
const len = str.length - 1;
if ([...final_letter_map.keys()].includes(str.charAt(len))) {
str = str.substring(0, len) + final_letter_map.get(str.substring(len));
}
return str;
},
};
/** @type {import('language').BidirectionalConversionPreprocessor} */
export const convertYiddishLigatures = {
name: 'Split Ligatures',
description: 'וו → װ',
options: ['off', 'direct', 'inverse'],
process: (str, setting) => {
switch (setting) {
case 'off':
return str;
case 'direct':
for (const ligature of ligatures) {
str = str.replace(ligature.lig, ligature.split);
}
return str;
case 'inverse':
for (const ligature of ligatures) {
str = str.replace(ligature.split, ligature.lig);
}
return str;
}
},
};

View File

@@ -0,0 +1,49 @@
/*
* Copyright (C) 2024-2025 Yomitan Authors
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
const ligatures = [
{lig: '\u05f0', split: '\u05d5' + '\u05d5'}, // װ -> וו
{lig: '\u05f1', split: '\u05d5' + '\u05d9'}, // ױ -> וי
{lig: '\u05f2', split: '\u05d9' + '\u05d9'}, // ײ -> יי
{lig: '\ufb1d', split: '\u05d9' + '\u05b4'}, // יִ -> יִ
{lig: '\ufb1f', split: '\u05d9' + '\u05d9' + '\u05b7'}, // ײַ -> ייַ
{lig: '\ufb2e', split: '\u05d0' + '\u05b7'}, // Pasekh alef
{lig: '\ufb2f', split: '\u05d0' + '\u05b8'}, // Komets alef
];
/** @type {import('language').TextProcessor<boolean>} */
export const combineYiddishLigatures = {
name: 'Combine Ligatures',
description: 'וו → װ',
options: [true],
process: (str) => {
for (const ligature of ligatures) {
str = str.replace(ligature.split, ligature.lig);
}
return str;
},
};
/** @type {import('language').TextProcessor<boolean>} */
export const removeYiddishDiacritics = {
name: 'Remove Diacritics',
description: 'פאת → פֿאָתּ',
options: [true],
process: (str) => {
return str.replace(/[\u05B0-\u05C7]/g, '');
},
};

View File

@@ -0,0 +1,167 @@
/*
* Copyright (C) 2024-2025 Yomitan Authors
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
import {suffixInflection} from '../language-transforms.js';
/** @typedef {keyof typeof conditions} Condition */
const mutations = [
{new: '\u05e2', orig: '\ufb2e'}, // Ayin to pasekh alef
{new: '\u05e2', orig: '\ufb2f'}, // Ayin to komets alef
{new: '\u05e2', orig: '\u05D0'}, // Ayin to shumter alef
{new: '\u05f1', orig: '\u05e2'}, // Vov yud to ayin
{new: '\u05f2', orig: '\u05f1'}, // Tsvey yudn to Vov yud
{new: '\u05d9', orig: '\u05d5'}, // Yud to Vov
];
/**
* @param {string} inflectedSuffix
* @param {string} deinflectedSuffix
* @param {Condition[]} conditionsIn
* @param {Condition[]} conditionsOut
* @returns {import('language-transformer').SuffixRule<Condition>[]}
*/
function umlautMutationSuffixInflection(inflectedSuffix, deinflectedSuffix, conditionsIn, conditionsOut) {
const suffixRegExp = new RegExp(inflectedSuffix + '$');
return mutations.map((mutation) => (
{
type: 'suffix',
isInflected: suffixRegExp,
deinflected: deinflectedSuffix,
deinflect: (/** @type {string} */ text) => {
const match = new RegExp(/[\u05E2\u05F0\u05D0\uFB2E\u05F1\u05D5\u05F2\uFB1D\uFB1F\u05D9\uFB2F](?!.*[\u05E2\u05F0\u05D0\uFB2E\u05F1\u05D5\u05F2\uFB1D\uFB1F\u05D9\uFB2F])/).exec(text.slice(0, -inflectedSuffix.length));
return (match?.[0] !== mutation.new) ? '' : text.slice(0, match.index) + mutation.orig + text.slice(match.index + 1, -inflectedSuffix.length) + deinflectedSuffix;
},
conditionsIn,
conditionsOut,
}
));
}
const conditions = {
v: {
name: 'Verb',
isDictionaryForm: true,
subConditions: ['vpast', 'vpresent'],
},
vpast: {
name: 'Verb, past tense',
isDictionaryForm: false,
},
vpresent: {
name: 'Verb, present tense',
isDictionaryForm: true,
},
n: {
name: 'Noun',
isDictionaryForm: true,
subConditions: ['np', 'ns'],
},
np: {
name: 'Noun, plural',
isDictionaryForm: false,
},
ns: {
name: 'Noun, singular',
isDictionaryForm: true,
},
adj: {
name: 'Adjective',
isDictionaryForm: true,
},
adv: {
name: 'Adverb',
isDictionaryForm: true,
},
};
/** @type {import('language-transformer').LanguageTransformDescriptor<Condition>} */
export const yiddishTransforms = {
language: 'yi',
conditions,
transforms: {
plural: {
name: 'plural',
description: 'plural form of a noun',
rules: [
suffixInflection('\u05E1', '', ['np'], ['ns']), // -s
suffixInflection('\u05DF', '', ['np'], ['ns']), // -n
suffixInflection('\u05D9\u05DD', '', ['np'], ['ns']), // -im, hebrew
suffixInflection('\u05E2\u05E8', '', ['np'], ['ns']), // -er
suffixInflection('\u05E2\u05DA', '', ['np'], ['ns']), // -ekh
suffixInflection('\u05E2\u05DF', '', ['np'], ['ns']), // -en
suffixInflection('\u05E2\u05E1', '', ['np'], ['ns']), // -es
suffixInflection('\u05D5\u05EA', '', ['np'], ['ns']), // -ot, hebrew
suffixInflection('\u05E0\u05E1', '', ['np'], ['ns']), // -ns
suffixInflection('\u05E2\u05E8\u05E2\u05DF', '', ['np'], ['ns']), // -eren
suffixInflection('\u05E2\u05E0\u05E2\u05E1', '', ['np'], ['ns']), // -enes
suffixInflection('\u05E2\u05E0\u05E1', '', ['np'], ['ns']), // -ens
suffixInflection('\u05E2\u05E8\u05E1', '', ['np'], ['ns']), // -ers
suffixInflection('\u05E1\u05E2\u05E8', '', ['np'], ['ns']), // -ser
],
},
umlaut_plural: {
name: 'umlaut_plural',
description: 'plural form of a umlaut noun',
rules: [
...umlautMutationSuffixInflection('\u05E2\u05E8', '', ['np'], ['ns']), // -er
...umlautMutationSuffixInflection('\u05E2\u05E1', '', ['np'], ['ns']), // -es
...umlautMutationSuffixInflection('\u05D9\u05DD', '', ['np'], ['ns']), // -im
...umlautMutationSuffixInflection('\u05E2\u05DF', '', ['np'], ['ns']), // -en
...umlautMutationSuffixInflection('\u05DF', '', ['np'], ['ns']), // -n
...umlautMutationSuffixInflection('\u05E1', '', ['np'], ['ns']), // -s
...umlautMutationSuffixInflection('\u05E2\u05DA', '', ['np'], ['ns']), // -ekh
...umlautMutationSuffixInflection('\u05E2\u05E8\u05E1', '', ['np'], ['ns']), // -ers
],
},
diminutive: {
name: 'diminutive',
description: 'diminutive form of a noun',
rules: [
suffixInflection('\u05D8\u05E9\u05D9\u05E7', '', ['n'], ['n']), // -tshik
suffixInflection('\u05E7\u05E2', '', ['n'], ['n']), // -ke
suffixInflection('\u05DC', '', ['n'], ['n']), // -l
suffixInflection('\u05E2\u05DC\u05E2', '', ['n'], ['n']), // -ele
],
},
diminutive_and_umlaut: {
name: 'diminutive_and_umlaut',
description: 'diminutive form of a noun with stem umlaut',
rules: [
...umlautMutationSuffixInflection('\u05DC', '', ['n'], ['n']), // -l
...umlautMutationSuffixInflection('\u05E2\u05DC\u05E2', '', ['n'], ['n']), // -ele
],
},
verb_present_singular_to_first_person: {
name: 'verb_present_singular_to_first_person',
description: 'Turn the second and third person singular form to first person',
rules: [
suffixInflection('\u05E1\u05D8', '', ['v'], ['vpresent']), // -st
suffixInflection('\u05D8', '', ['v'], ['vpresent']), // -t
suffixInflection('\u05E0\u05D3\u05D9\u05E7', '', ['v'], ['vpresent']), // -ndik
],
},
verb_present_plural_to_first_person: {
name: 'verb_present_plural_to_first_person',
description: 'Turn the second plural form to first person plural form',
rules: [
suffixInflection('\u05D8\u05E1', '\u05E0', ['v'], ['vpresent']), // -ts
suffixInflection('\u05D8', '\u05E0', ['v'], ['vpresent']), // -t
],
},
},
};

View File

@@ -0,0 +1,75 @@
/*
* Copyright (C) 2024-2025 Yomitan Authors
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
import {CJK_IDEOGRAPH_RANGES, CJK_PUNCTUATION_RANGE, FULLWIDTH_CHARACTER_RANGES, isCodePointInRanges} from '../CJK-util.js';
/** @type {import('CJK-util').CodepointRange} */
const BOPOMOFO_RANGE = [0x3100, 0x312f];
/** @type {import('CJK-util').CodepointRange} */
const BOPOMOFO_EXTENDED_RANGE = [0x31a0, 0x31bf];
/** @type {import('CJK-util').CodepointRange} */
const IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION_RANGE = [0x16fe0, 0x16fff];
/** @type {import('CJK-util').CodepointRange} */
const SMALL_FORM_RANGE = [0xfe50, 0xfe6f];
/** @type {import('CJK-util').CodepointRange} */
const VERTICAL_FORM_RANGE = [0xfe10, 0xfe1f];
/**
* Chinese character ranges, roughly ordered in order of expected frequency.
* @type {import('CJK-util').CodepointRange[]}
*/
const CHINESE_RANGES = [
...CJK_IDEOGRAPH_RANGES,
CJK_PUNCTUATION_RANGE,
...FULLWIDTH_CHARACTER_RANGES,
BOPOMOFO_RANGE,
BOPOMOFO_EXTENDED_RANGE,
IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION_RANGE,
SMALL_FORM_RANGE,
VERTICAL_FORM_RANGE,
];
/**
* @param {string} str
* @returns {boolean}
*/
export function isStringPartiallyChinese(str) {
if (str.length === 0) { return false; }
for (const c of str) {
if (isCodePointInRanges(/** @type {number} */ (c.codePointAt(0)), CHINESE_RANGES)) {
return true;
}
}
return false;
}
/**
* @param {number} codePoint
* @returns {boolean}
*/
export function isCodePointChinese(codePoint) {
return isCodePointInRanges(codePoint, CHINESE_RANGES);
}
/** @type {import('language').ReadingNormalizer} */
export function normalizePinyin(str) {
return str.normalize('NFC').toLowerCase().replace(/[\s・:'-]|\/\//g, '');
}