mirror of
https://github.com/ksyasuda/SubMiner.git
synced 2026-02-28 18:22:42 -08:00
initial commit
This commit is contained in:
138
vendor/yomitan/js/language/CJK-util.js
vendored
Normal file
138
vendor/yomitan/js/language/CJK-util.js
vendored
Normal file
@@ -0,0 +1,138 @@
|
||||
/*
|
||||
* Copyright (C) 2024-2025 Yomitan Authors
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
import {basicTextProcessorOptions} from './text-processors.js';
|
||||
|
||||
/** @type {import('CJK-util').CodepointRange} */
|
||||
const CJK_UNIFIED_IDEOGRAPHS_RANGE = [0x4e00, 0x9fff];
|
||||
/** @type {import('CJK-util').CodepointRange} */
|
||||
const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_RANGE = [0x3400, 0x4dbf];
|
||||
/** @type {import('CJK-util').CodepointRange} */
|
||||
const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_RANGE = [0x20000, 0x2a6df];
|
||||
/** @type {import('CJK-util').CodepointRange} */
|
||||
const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_RANGE = [0x2a700, 0x2b73f];
|
||||
/** @type {import('CJK-util').CodepointRange} */
|
||||
const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_RANGE = [0x2b740, 0x2b81f];
|
||||
/** @type {import('CJK-util').CodepointRange} */
|
||||
const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_RANGE = [0x2b820, 0x2ceaf];
|
||||
/** @type {import('CJK-util').CodepointRange} */
|
||||
const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F_RANGE = [0x2ceb0, 0x2ebef];
|
||||
/** @type {import('CJK-util').CodepointRange} */
|
||||
const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G_RANGE = [0x30000, 0x3134f];
|
||||
/** @type {import('CJK-util').CodepointRange} */
|
||||
const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_H_RANGE = [0x31350, 0x323af];
|
||||
/** @type {import('CJK-util').CodepointRange} */
|
||||
const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_I_RANGE = [0x2ebf0, 0x2ee5f];
|
||||
/** @type {import('CJK-util').CodepointRange} */
|
||||
const CJK_COMPATIBILITY_IDEOGRAPHS_RANGE = [0xf900, 0xfaff];
|
||||
/** @type {import('CJK-util').CodepointRange} */
|
||||
const CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_RANGE = [0x2f800, 0x2fa1f];
|
||||
|
||||
/** @type {import('CJK-util').CodepointRange[]} */
|
||||
export const CJK_IDEOGRAPH_RANGES = [
|
||||
CJK_UNIFIED_IDEOGRAPHS_RANGE,
|
||||
CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_RANGE,
|
||||
CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_RANGE,
|
||||
CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_RANGE,
|
||||
CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_RANGE,
|
||||
CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_RANGE,
|
||||
CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F_RANGE,
|
||||
CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G_RANGE,
|
||||
CJK_UNIFIED_IDEOGRAPHS_EXTENSION_H_RANGE,
|
||||
CJK_UNIFIED_IDEOGRAPHS_EXTENSION_I_RANGE,
|
||||
CJK_COMPATIBILITY_IDEOGRAPHS_RANGE,
|
||||
CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_RANGE,
|
||||
];
|
||||
|
||||
/** @type {import('CJK-util').CodepointRange[]} */
|
||||
export const FULLWIDTH_CHARACTER_RANGES = [
|
||||
[0xff10, 0xff19], // Fullwidth numbers
|
||||
[0xff21, 0xff3a], // Fullwidth upper case Latin letters
|
||||
[0xff41, 0xff5a], // Fullwidth lower case Latin letters
|
||||
|
||||
[0xff01, 0xff0f], // Fullwidth punctuation 1
|
||||
[0xff1a, 0xff1f], // Fullwidth punctuation 2
|
||||
[0xff3b, 0xff3f], // Fullwidth punctuation 3
|
||||
[0xff5b, 0xff60], // Fullwidth punctuation 4
|
||||
[0xffe0, 0xffee], // Currency markers
|
||||
];
|
||||
|
||||
/** @type {import('CJK-util').CodepointRange} */
|
||||
export const CJK_PUNCTUATION_RANGE = [0x3000, 0x303f];
|
||||
|
||||
/** @type {import('CJK-util').CodepointRange} */
|
||||
export const CJK_COMPATIBILITY = [0x3300, 0x33ff];
|
||||
|
||||
/**
|
||||
* @param {number} codePoint
|
||||
* @param {import('CJK-util').CodepointRange} range
|
||||
* @returns {boolean}
|
||||
*/
|
||||
export function isCodePointInRange(codePoint, [min, max]) {
|
||||
return (codePoint >= min && codePoint <= max);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {number} codePoint
|
||||
* @param {import('CJK-util').CodepointRange[]} ranges
|
||||
* @returns {boolean}
|
||||
*/
|
||||
export function isCodePointInRanges(codePoint, ranges) {
|
||||
for (const [min, max] of ranges) {
|
||||
if (codePoint >= min && codePoint <= max) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/** @type {import('CJK-util').CodepointRange} */
|
||||
export const KANGXI_RADICALS_RANGE = [0x2f00, 0x2fdf];
|
||||
|
||||
/** @type {import('CJK-util').CodepointRange} */
|
||||
export const CJK_RADICALS_SUPPLEMENT_RANGE = [0x2e80, 0x2eff];
|
||||
|
||||
/** @type {import('CJK-util').CodepointRange} */
|
||||
export const CJK_STROKES_RANGE = [0x31c0, 0x31ef];
|
||||
|
||||
/** @type {import('CJK-util').CodepointRange[]} */
|
||||
export const CJK_RADICALS_RANGES = [
|
||||
KANGXI_RADICALS_RANGE,
|
||||
CJK_RADICALS_SUPPLEMENT_RANGE,
|
||||
CJK_STROKES_RANGE,
|
||||
];
|
||||
|
||||
/**
|
||||
* @param {string} text
|
||||
* @returns {string}
|
||||
*/
|
||||
export function normalizeRadicals(text) {
|
||||
let result = '';
|
||||
for (let i = 0; i < text.length; i++) {
|
||||
const codePoint = text[i].codePointAt(0);
|
||||
result += codePoint && (isCodePointInRanges(codePoint, CJK_RADICALS_RANGES)) ? text[i].normalize('NFKD') : text[i];
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/** @type {import('language').TextProcessor<boolean>} */
|
||||
export const normalizeRadicalCharacters = {
|
||||
name: 'Normalize radical characters',
|
||||
description: '⼀ → 一 (U+2F00 → U+4E00)',
|
||||
options: basicTextProcessorOptions,
|
||||
process: (str, setting) => (setting ? normalizeRadicals(str) : str),
|
||||
};
|
||||
32
vendor/yomitan/js/language/aii/assyrian-neo-aramaic-text-preprocessors.js
vendored
Normal file
32
vendor/yomitan/js/language/aii/assyrian-neo-aramaic-text-preprocessors.js
vendored
Normal file
@@ -0,0 +1,32 @@
|
||||
/*
|
||||
* Copyright (C) 2024-2025 Yomitan Authors
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
import {basicTextProcessorOptions} from '../text-processors.js';
|
||||
|
||||
const optionalDiacritics = ['\u0303', '\u0304', '\u0307', '\u0308', '\u0323', '\u032E', '\u0330', '\u0331', '\u0730', '\u0731', '\u0732', '\u0733', '\u0734', '\u0735', '\u0736', '\u0737', '\u0738', '\u0739', '\u073A', '\u073B', '\u073C', '\u073D', '\u073E', '\u073F', '\u0740', '\u0741', '\u0742', '\u0743', '\u0744', '\u0745', '\u0746', '\u0747', '\u0748', '\u0749', '\u074A'];
|
||||
|
||||
const diacriticsRegex = new RegExp(`[${optionalDiacritics.join('')}]`, 'g');
|
||||
|
||||
/** @type {import('language').TextProcessor<boolean>} */
|
||||
export const removeSyriacScriptDiacritics = {
|
||||
name: 'Remove diacritics',
|
||||
description: 'ܟܵܬܹܒ݂ ⬅️ ܟܬܒ',
|
||||
options: basicTextProcessorOptions,
|
||||
process: (text, setting) => {
|
||||
return setting ? text.replace(diacriticsRegex, '') : text;
|
||||
},
|
||||
};
|
||||
109
vendor/yomitan/js/language/ar/arabic-text-preprocessors.js
vendored
Normal file
109
vendor/yomitan/js/language/ar/arabic-text-preprocessors.js
vendored
Normal file
@@ -0,0 +1,109 @@
|
||||
/*
|
||||
* Copyright (C) 2024-2025 Yomitan Authors
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
import {basicTextProcessorOptions} from '../text-processors.js';
|
||||
|
||||
const optionalDiacritics = [
|
||||
'\u0618', // Small Fatha
|
||||
'\u0619', // Small Damma
|
||||
'\u061A', // Small Kasra
|
||||
'\u064B', // Fathatan
|
||||
'\u064C', // Dammatan
|
||||
'\u064D', // Kasratan
|
||||
'\u064E', // Fatha
|
||||
'\u064F', // Damma
|
||||
'\u0650', // Kasra
|
||||
'\u0651', // Shadda
|
||||
'\u0652', // Sukun
|
||||
'\u0653', // Maddah
|
||||
'\u0654', // Hamza Above
|
||||
'\u0655', // Hamza Below
|
||||
'\u0656', // Subscript Alef
|
||||
'\u0670', // Dagger Alef
|
||||
];
|
||||
|
||||
const diacriticsRegex = new RegExp(`[${optionalDiacritics.join('')}]`, 'g');
|
||||
|
||||
/** @type {import('language').TextProcessor<boolean>} */
|
||||
export const removeArabicScriptDiacritics = {
|
||||
name: 'Remove diacritics',
|
||||
description: 'وَلَدَ → ولد',
|
||||
options: basicTextProcessorOptions,
|
||||
process: (text, setting) => {
|
||||
return setting ? text.replace(diacriticsRegex, '') : text;
|
||||
},
|
||||
};
|
||||
|
||||
/** @type {import('language').TextProcessor<boolean>} */
|
||||
export const removeTatweel = {
|
||||
name: 'Remove tatweel characters',
|
||||
description: 'لـكن → لكن',
|
||||
options: basicTextProcessorOptions,
|
||||
process: (text, setting) => {
|
||||
return setting ? text.replaceAll('ـ', '') : text;
|
||||
},
|
||||
};
|
||||
|
||||
/** @type {import('language').TextProcessor<boolean>} */
|
||||
export const normalizeUnicode = {
|
||||
name: 'Normalize unicode',
|
||||
description: 'ﻴ → ي',
|
||||
options: basicTextProcessorOptions,
|
||||
process: (text, setting) => {
|
||||
return setting ? text.normalize('NFKC') : text;
|
||||
},
|
||||
};
|
||||
|
||||
/** @type {import('language').TextProcessor<boolean>} */
|
||||
export const addHamzaTop = {
|
||||
name: 'Add Hamza to top of Alif',
|
||||
description: 'اكبر → أكبر',
|
||||
options: basicTextProcessorOptions,
|
||||
process: (text, setting) => {
|
||||
return setting ? text.replace('ا', 'أ') : text;
|
||||
},
|
||||
};
|
||||
|
||||
/** @type {import('language').TextProcessor<boolean>} */
|
||||
export const addHamzaBottom = {
|
||||
name: 'Add Hamza to bottom of Alif',
|
||||
description: 'اسلام → إسلام',
|
||||
options: basicTextProcessorOptions,
|
||||
process: (text, setting) => {
|
||||
return setting ? text.replace('ا', 'إ') : text;
|
||||
},
|
||||
};
|
||||
|
||||
/** @type {import('language').TextProcessor<boolean>} */
|
||||
export const convertAlifMaqsuraToYaa = {
|
||||
name: 'Convert Alif Maqsura to Yaa',
|
||||
description: 'فى → في',
|
||||
options: basicTextProcessorOptions,
|
||||
process: (text, setting) => {
|
||||
return setting ? text.replace(/ى$/, 'ي') : text;
|
||||
},
|
||||
};
|
||||
|
||||
/** @type {import('language').TextProcessor<boolean>} */
|
||||
export const convertHaToTaMarbuta = {
|
||||
name: 'Convert final Ha to Ta Marbuta',
|
||||
description: 'لغه → لغة',
|
||||
options: basicTextProcessorOptions,
|
||||
process: (text, setting) => {
|
||||
return setting ? text.replace(/ه$/, 'ة') : text;
|
||||
},
|
||||
};
|
||||
834
vendor/yomitan/js/language/ar/arabic-transforms.js
vendored
Normal file
834
vendor/yomitan/js/language/ar/arabic-transforms.js
vendored
Normal file
@@ -0,0 +1,834 @@
|
||||
/*
|
||||
* Copyright (C) 2025 Yomitan Authors
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
import {prefixInflection, suffixInflection} from '../language-transforms.js';
|
||||
|
||||
const arabicLetters = '[\u0620-\u065F\u066E-\u06D3\u06D5\u06EE\u06EF\u06FA-\u06FC\u06FF]';
|
||||
const directObjectPronouns1st = ['ني', 'نا'];
|
||||
const directObjectPronouns2nd = ['ك', 'كما', 'كم', 'كن'];
|
||||
const directObjectPronouns3rd = ['ه', 'ها', 'هما', 'هم', 'هن'];
|
||||
const directObjectPronouns = [...directObjectPronouns1st, ...directObjectPronouns2nd, ...directObjectPronouns3rd];
|
||||
const possessivePronouns = ['ي', 'نا', ...directObjectPronouns2nd, ...directObjectPronouns3rd];
|
||||
const nonAssimilatingPossessivePronouns = ['نا', ...directObjectPronouns2nd, ...directObjectPronouns3rd];
|
||||
|
||||
/**
|
||||
* @param {string} prefix
|
||||
* @param {boolean} includeLiPrefix
|
||||
* @returns {string[]}
|
||||
*/
|
||||
function getImperfectPrefixes(prefix, includeLiPrefix = true) {
|
||||
return [
|
||||
`${prefix}`,
|
||||
`و${prefix}`,
|
||||
`ف${prefix}`,
|
||||
`س${prefix}`,
|
||||
`وس${prefix}`,
|
||||
`فس${prefix}`,
|
||||
...(includeLiPrefix ? [`ل${prefix}`, `ول${prefix}`, `فل${prefix}`] : []),
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} inflectedPrefix
|
||||
* @param {string} deinflectedPrefix
|
||||
* @param {string} initialStemSegment
|
||||
* @param {Condition[]} conditionsIn
|
||||
* @param {Condition[]} conditionsOut
|
||||
* @returns {import('language-transformer').Rule<Condition>}
|
||||
*/
|
||||
function conditionalPrefixInflection(inflectedPrefix, deinflectedPrefix, initialStemSegment, conditionsIn, conditionsOut) {
|
||||
const prefixRegExp = new RegExp('^' + inflectedPrefix + initialStemSegment);
|
||||
return {
|
||||
type: 'prefix',
|
||||
isInflected: prefixRegExp,
|
||||
deinflect: (text) => deinflectedPrefix + text.slice(inflectedPrefix.length),
|
||||
conditionsIn,
|
||||
conditionsOut,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} inflectedSuffix
|
||||
* @param {string} deinflectedSuffix
|
||||
* @param {string} finalStemSegment
|
||||
* @param {Condition[]} conditionsIn
|
||||
* @param {Condition[]} conditionsOut
|
||||
* @returns {import('language-transformer').SuffixRule<Condition>}
|
||||
*/
|
||||
function conditionalSuffixInflection(inflectedSuffix, deinflectedSuffix, finalStemSegment, conditionsIn, conditionsOut) {
|
||||
const suffixRegExp = new RegExp(finalStemSegment + inflectedSuffix + '$');
|
||||
return {
|
||||
type: 'suffix',
|
||||
isInflected: suffixRegExp,
|
||||
deinflected: deinflectedSuffix,
|
||||
deinflect: (text) => text.slice(0, -inflectedSuffix.length) + deinflectedSuffix,
|
||||
conditionsIn,
|
||||
conditionsOut,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} inflectedPrefix
|
||||
* @param {string} deinflectedPrefix
|
||||
* @param {string} inflectedSuffix
|
||||
* @param {string} deinflectedSuffix
|
||||
* @param {Condition[]} conditionsIn
|
||||
* @param {Condition[]} conditionsOut
|
||||
* @param {object} [options={}]
|
||||
* @param {string} [options.initialStemSegment = '']
|
||||
* @param {string} [options.finalStemSegment = '']
|
||||
* @returns {import('language-transformer').Rule<Condition>}
|
||||
*/
|
||||
function sandwichInflection(
|
||||
inflectedPrefix,
|
||||
deinflectedPrefix,
|
||||
inflectedSuffix,
|
||||
deinflectedSuffix,
|
||||
conditionsIn,
|
||||
conditionsOut,
|
||||
{initialStemSegment = '', finalStemSegment = ''} = {},
|
||||
) {
|
||||
if (!inflectedSuffix && !deinflectedSuffix) {
|
||||
return conditionalPrefixInflection(
|
||||
inflectedPrefix,
|
||||
deinflectedPrefix,
|
||||
initialStemSegment,
|
||||
conditionsIn,
|
||||
conditionsOut,
|
||||
);
|
||||
}
|
||||
if (!inflectedPrefix && !deinflectedPrefix) {
|
||||
return conditionalSuffixInflection(
|
||||
inflectedSuffix,
|
||||
deinflectedSuffix,
|
||||
finalStemSegment,
|
||||
conditionsIn,
|
||||
conditionsOut,
|
||||
);
|
||||
}
|
||||
|
||||
const regex = new RegExp(
|
||||
`^${inflectedPrefix}${initialStemSegment}${arabicLetters}+${finalStemSegment}${inflectedSuffix}$`,
|
||||
);
|
||||
return {
|
||||
type: 'other',
|
||||
isInflected: regex,
|
||||
deinflect: (text) => deinflectedPrefix + text.slice(inflectedPrefix.length, -inflectedSuffix.length) + deinflectedSuffix,
|
||||
conditionsIn,
|
||||
conditionsOut,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} inflectedPrefix
|
||||
* @param {string} deinflectedPrefix
|
||||
* @param {string} inflectedSuffix
|
||||
* @param {string} deinflectedSuffix
|
||||
* @param {object} [options={}]
|
||||
* @param {string} [options.attachedSuffix = inflectedSuffix]
|
||||
* @param {boolean} [options.attachesTo1st = true]
|
||||
* @param {boolean} [options.attachesTo2nd = true]
|
||||
* @param {boolean} [options.includeLiPrefix = true]
|
||||
* @param {string} [options.initialStemSegment = '']
|
||||
* @param {string} [options.finalStemSegment = '']
|
||||
* @returns {import('language-transformer').Rule<Condition>[]}
|
||||
*/
|
||||
function getImperfectRules(
|
||||
inflectedPrefix,
|
||||
deinflectedPrefix,
|
||||
inflectedSuffix,
|
||||
deinflectedSuffix,
|
||||
{
|
||||
attachedSuffix = inflectedSuffix,
|
||||
attachesTo1st = true,
|
||||
attachesTo2nd = true,
|
||||
includeLiPrefix = true,
|
||||
initialStemSegment = '',
|
||||
finalStemSegment = '',
|
||||
} = {},
|
||||
) {
|
||||
const stemSegments = {initialStemSegment, finalStemSegment};
|
||||
const rules = getImperfectPrefixes(inflectedPrefix, includeLiPrefix).flatMap((pre) => [
|
||||
sandwichInflection(pre, deinflectedPrefix, inflectedSuffix, deinflectedSuffix, ['iv_p'], ['iv'], stemSegments),
|
||||
|
||||
// With attached direct object pronouns
|
||||
...(attachesTo1st ?
|
||||
directObjectPronouns1st.map((p) => sandwichInflection(
|
||||
pre,
|
||||
deinflectedPrefix,
|
||||
attachedSuffix + p,
|
||||
deinflectedSuffix,
|
||||
['iv_p'],
|
||||
['iv'],
|
||||
stemSegments,
|
||||
)) :
|
||||
[]),
|
||||
...(attachesTo2nd ?
|
||||
directObjectPronouns2nd.map((p) => sandwichInflection(
|
||||
pre,
|
||||
deinflectedPrefix,
|
||||
attachedSuffix + p,
|
||||
deinflectedSuffix,
|
||||
['iv_p'],
|
||||
['iv'],
|
||||
stemSegments,
|
||||
)) :
|
||||
[]),
|
||||
...directObjectPronouns3rd.map((p) => sandwichInflection(
|
||||
pre,
|
||||
deinflectedPrefix,
|
||||
attachedSuffix + p,
|
||||
deinflectedSuffix,
|
||||
['iv_p'],
|
||||
['iv'],
|
||||
stemSegments,
|
||||
)),
|
||||
]);
|
||||
|
||||
if (!deinflectedPrefix) {
|
||||
const opts = {
|
||||
attachedSuffix,
|
||||
attachesTo1st,
|
||||
attachesTo2nd,
|
||||
includeLiPrefix,
|
||||
initialStemSegment,
|
||||
finalStemSegment,
|
||||
};
|
||||
|
||||
// For Form IV, VII, VIII, IX, X, XI, XII, XIII, XIV, XV verbs
|
||||
rules.push(
|
||||
...getImperfectRules(inflectedPrefix, 'أ', inflectedSuffix, deinflectedSuffix, opts),
|
||||
...getImperfectRules(inflectedPrefix, 'ا', inflectedSuffix, deinflectedSuffix, opts),
|
||||
);
|
||||
}
|
||||
|
||||
return rules;
|
||||
}
|
||||
|
||||
/** @typedef {keyof typeof conditions} Condition */
|
||||
const conditions = {
|
||||
n: {
|
||||
name: 'Noun',
|
||||
isDictionaryForm: true,
|
||||
},
|
||||
n_p: {
|
||||
name: 'Noun with Prefix only',
|
||||
isDictionaryForm: false,
|
||||
subConditions: ['n_wa', 'n_bi', 'n_ka', 'n_li', 'n_al', 'n_bi_al', 'n_ka_al', 'n_lil', 'n_li_al'],
|
||||
},
|
||||
n_def: {
|
||||
name: 'Noun with Definite Prefix',
|
||||
isDictionaryForm: false,
|
||||
subConditions: ['n_al', 'n_bi_al', 'n_ka_al', 'n_lil', 'n_li_al'],
|
||||
},
|
||||
n_indef: {
|
||||
name: 'Noun with Indefinite Prefix',
|
||||
isDictionaryForm: false,
|
||||
subConditions: ['n_wa', 'n_bi', 'n_ka', 'n_li'],
|
||||
},
|
||||
n_nom: {
|
||||
name: 'Nominative Noun with Prefix',
|
||||
isDictionaryForm: false,
|
||||
subConditions: ['n_wa', 'n_li', 'n_al'],
|
||||
},
|
||||
n_nom_indef: {
|
||||
name: 'Nominative Noun with Indefinite Prefix',
|
||||
isDictionaryForm: false,
|
||||
subConditions: ['n_wa', 'n_li'],
|
||||
},
|
||||
n_wa: {
|
||||
name: 'Noun with و Prefix',
|
||||
isDictionaryForm: false,
|
||||
},
|
||||
n_bi: {
|
||||
name: 'Noun with ب Prefix',
|
||||
isDictionaryForm: false,
|
||||
},
|
||||
n_ka: {
|
||||
name: 'Noun with ك Prefix',
|
||||
isDictionaryForm: false,
|
||||
},
|
||||
n_li: {
|
||||
name: 'Noun with ل Prefix',
|
||||
isDictionaryForm: false,
|
||||
},
|
||||
n_al: {
|
||||
name: 'Noun with ال Prefix',
|
||||
isDictionaryForm: false,
|
||||
},
|
||||
n_bi_al: {
|
||||
name: 'Noun with بال Prefix',
|
||||
isDictionaryForm: false,
|
||||
},
|
||||
n_ka_al: {
|
||||
name: 'Noun with كال Prefix',
|
||||
isDictionaryForm: false,
|
||||
},
|
||||
n_lil: {
|
||||
name: 'Noun with لل Prefix',
|
||||
isDictionaryForm: false,
|
||||
},
|
||||
n_li_al: {
|
||||
name: 'Noun with Assimilated لل Prefix',
|
||||
isDictionaryForm: false,
|
||||
},
|
||||
n_s: {
|
||||
name: 'Noun with Suffix',
|
||||
isDictionaryForm: false,
|
||||
},
|
||||
v: {
|
||||
name: 'Verb',
|
||||
isDictionaryForm: true,
|
||||
subConditions: ['pv', 'iv', 'cv'],
|
||||
},
|
||||
pv: {
|
||||
name: 'Perfect Verb (no affixes)',
|
||||
isDictionaryForm: true,
|
||||
},
|
||||
pv_p: {
|
||||
name: 'Perfect Verb with Prefix',
|
||||
isDictionaryForm: false,
|
||||
},
|
||||
pv_s: {
|
||||
name: 'Perfect Verb with Suffix only',
|
||||
isDictionaryForm: false,
|
||||
},
|
||||
iv: {
|
||||
name: 'Imperfect Verb (no affixes)',
|
||||
isDictionaryForm: true,
|
||||
},
|
||||
iv_p: {
|
||||
name: 'Imperfect Verb with Prefix',
|
||||
isDictionaryForm: false,
|
||||
},
|
||||
iv_s: {
|
||||
name: 'Imperfect Verb with Suffix only',
|
||||
isDictionaryForm: false,
|
||||
},
|
||||
cv: {
|
||||
name: 'Command Verb (no affixes)',
|
||||
isDictionaryForm: true,
|
||||
},
|
||||
cv_p: {
|
||||
name: 'Command Verb with Prefix',
|
||||
isDictionaryForm: false,
|
||||
},
|
||||
cv_s: {
|
||||
name: 'Command Verb with Suffix only',
|
||||
isDictionaryForm: false,
|
||||
},
|
||||
};
|
||||
|
||||
/** @type {import('language-transformer').LanguageTransformDescriptor<Condition>} */
|
||||
export const arabicTransforms = {
|
||||
language: 'ar',
|
||||
conditions,
|
||||
transforms: {
|
||||
// Noun
|
||||
'NPref-Wa': {
|
||||
name: 'and',
|
||||
description: 'and (و); and, so (ف)',
|
||||
rules: [
|
||||
prefixInflection('و', '', ['n_wa'], ['n']),
|
||||
prefixInflection('ف', '', ['n_wa'], ['n']),
|
||||
],
|
||||
},
|
||||
'NPref-Bi': {
|
||||
name: 'by, with',
|
||||
description: 'by, with',
|
||||
rules: [
|
||||
prefixInflection('ب', '', ['n_bi'], ['n']),
|
||||
prefixInflection('وب', '', ['n_bi'], ['n']),
|
||||
prefixInflection('فب', '', ['n_bi'], ['n']),
|
||||
],
|
||||
},
|
||||
'NPref-Ka': {
|
||||
name: 'like, such as',
|
||||
description: 'like, such as',
|
||||
rules: [
|
||||
prefixInflection('ك', '', ['n_ka'], ['n']),
|
||||
prefixInflection('وك', '', ['n_ka'], ['n']),
|
||||
prefixInflection('فك', '', ['n_ka'], ['n']),
|
||||
],
|
||||
},
|
||||
'NPref-Li': {
|
||||
name: 'for, to; indeed, truly',
|
||||
description: 'for, to (لِ); indeed, truly (لَ)',
|
||||
rules: [
|
||||
prefixInflection('ل', '', ['n_li'], ['n']),
|
||||
prefixInflection('ول', '', ['n_li'], ['n']),
|
||||
prefixInflection('فل', '', ['n_li'], ['n']),
|
||||
],
|
||||
},
|
||||
'NPref-Al': {
|
||||
name: 'the',
|
||||
description: 'the',
|
||||
rules: [
|
||||
prefixInflection('ال', '', ['n_al'], ['n']),
|
||||
prefixInflection('وال', '', ['n_al'], ['n']),
|
||||
prefixInflection('فال', '', ['n_al'], ['n']),
|
||||
],
|
||||
},
|
||||
'NPref-BiAl': {
|
||||
name: 'by/with + the',
|
||||
description: 'by/with + the',
|
||||
rules: [
|
||||
prefixInflection('بال', '', ['n_bi_al'], ['n']),
|
||||
prefixInflection('وبال', '', ['n_bi_al'], ['n']),
|
||||
prefixInflection('فبال', '', ['n_bi_al'], ['n']),
|
||||
],
|
||||
},
|
||||
'NPref-KaAl': {
|
||||
name: 'like/such as + the',
|
||||
description: 'like/such as + the',
|
||||
rules: [
|
||||
prefixInflection('كال', '', ['n_ka_al'], ['n']),
|
||||
prefixInflection('وكال', '', ['n_ka_al'], ['n']),
|
||||
prefixInflection('فكال', '', ['n_ka_al'], ['n']),
|
||||
],
|
||||
},
|
||||
'NPref-Lil': {
|
||||
name: 'for/to + the',
|
||||
description: 'for/to + the',
|
||||
rules: [
|
||||
conditionalPrefixInflection('لل', '', '(?!ل)', ['n_lil'], ['n']),
|
||||
conditionalPrefixInflection('ولل', '', '(?!ل)', ['n_lil'], ['n']),
|
||||
conditionalPrefixInflection('فلل', '', '(?!ل)', ['n_lil'], ['n']),
|
||||
],
|
||||
},
|
||||
'NPref-LiAl': {
|
||||
name: 'for/to + the',
|
||||
description: 'for/to + the, assimilated with initial ل',
|
||||
rules: [
|
||||
prefixInflection('لل', 'ل', ['n_li_al'], ['n']),
|
||||
prefixInflection('ولل', 'ل', ['n_li_al'], ['n']),
|
||||
prefixInflection('فلل', 'ل', ['n_li_al'], ['n']),
|
||||
],
|
||||
},
|
||||
|
||||
'NSuff-h': {
|
||||
name: 'pos. pron.',
|
||||
description: 'possessive pronoun',
|
||||
rules: [
|
||||
...nonAssimilatingPossessivePronouns.map((p) => suffixInflection(p, '', ['n_s'], ['n_indef', 'n'])),
|
||||
conditionalSuffixInflection('ي', '', '(?<!ي)', ['n_s'], ['n_indef', 'n']),
|
||||
],
|
||||
},
|
||||
'NSuff-ap': {
|
||||
name: 'fem. sg.',
|
||||
description: 'fem. sg.',
|
||||
rules: [
|
||||
suffixInflection('ة', '', ['n_s'], ['n_p', 'n']),
|
||||
],
|
||||
},
|
||||
'NSuff-ath': {
|
||||
name: 'fem. sg. + pos. pron.',
|
||||
description: 'fem. sg. + possessive pronoun',
|
||||
rules: [
|
||||
...possessivePronouns.map((p) => suffixInflection(`ت${p}`, '', ['n_s'], ['n_indef', 'n'])),
|
||||
...possessivePronouns.map((p) => suffixInflection(`ت${p}`, 'ة', ['n_s'], ['n_indef', 'n'])),
|
||||
],
|
||||
},
|
||||
'NSuff-AF': {
|
||||
name: 'acc. indef.',
|
||||
description: 'accusative indefinite (اً)',
|
||||
rules: [
|
||||
suffixInflection('ا', '', ['n_s'], ['n_wa', 'n']),
|
||||
suffixInflection('اً', '', ['n_s'], ['n_wa', 'n']),
|
||||
suffixInflection('ًا', '', ['n_s'], ['n_wa', 'n']),
|
||||
],
|
||||
},
|
||||
'NSuff-An': {
|
||||
name: 'dual',
|
||||
description: 'nominative m. dual',
|
||||
rules: [
|
||||
suffixInflection('ان', '', ['n_s'], ['n_nom', 'n']),
|
||||
suffixInflection('آن', 'أ', ['n_s'], ['n_nom', 'n']),
|
||||
],
|
||||
},
|
||||
'NSuff-Ah': {
|
||||
name: 'dual + pos. pron.',
|
||||
description: 'nominative m. dual + possessive pronoun',
|
||||
rules: [
|
||||
suffixInflection('ا', '', ['n_s'], ['n_nom_indef', 'n']),
|
||||
suffixInflection('آ', 'أ', ['n_s'], ['n_nom_indef', 'n']),
|
||||
...possessivePronouns.map((p) => suffixInflection(`ا${p}`, '', ['n_s'], ['n_nom_indef', 'n'])),
|
||||
...possessivePronouns.map((p) => suffixInflection(`آ${p}`, 'أ', ['n_s'], ['n_nom_indef', 'n'])),
|
||||
],
|
||||
},
|
||||
'NSuff-ayn': {
|
||||
name: 'dual',
|
||||
description: 'accusative/genitive m. dual',
|
||||
rules: [
|
||||
suffixInflection('ين', '', ['n_s'], ['n_p', 'n']),
|
||||
],
|
||||
},
|
||||
'NSuff-ayh': {
|
||||
name: 'dual + pos. pron.',
|
||||
description: 'accusative/genitive m. dual + possessive pronoun',
|
||||
rules: [
|
||||
suffixInflection('ي', '', ['n_s'], ['n_indef', 'n']),
|
||||
...nonAssimilatingPossessivePronouns.map((p) => suffixInflection(`ي${p}`, '', ['n_s'], ['n_indef', 'n'])),
|
||||
],
|
||||
},
|
||||
'NSuff-atAn': {
|
||||
name: 'dual',
|
||||
description: 'nominative f. dual',
|
||||
rules: [
|
||||
suffixInflection('تان', '', ['n_s'], ['n_nom', 'n']),
|
||||
suffixInflection('تان', 'ة', ['n_s'], ['n_nom', 'n']),
|
||||
],
|
||||
},
|
||||
'NSuff-atAh': {
|
||||
name: 'dual + pos. pron.',
|
||||
description: 'nominative f. dual + possessive pronoun',
|
||||
rules: [
|
||||
suffixInflection('تا', '', ['n_s'], ['n_nom_indef', 'n']),
|
||||
suffixInflection('تا', 'ة', ['n_s'], ['n_nom_indef', 'n']),
|
||||
...possessivePronouns.map((p) => suffixInflection(`تا${p}`, '', ['n_s'], ['n_nom_indef', 'n'])),
|
||||
...possessivePronouns.map((p) => suffixInflection(`تا${p}`, 'ة', ['n_s'], ['n_nom_indef', 'n'])),
|
||||
],
|
||||
},
|
||||
'NSuff-tayn': {
|
||||
name: 'dual',
|
||||
description: 'accusative/genitive f. dual',
|
||||
rules: [
|
||||
suffixInflection('تين', '', ['n_s'], ['n_p', 'n']),
|
||||
suffixInflection('تين', 'ة', ['n_s'], ['n_p', 'n']),
|
||||
],
|
||||
},
|
||||
'NSuff-tayh': {
|
||||
name: 'dual + pos. pron.',
|
||||
description: 'accusative/genitive f. dual + possessive pronoun',
|
||||
rules: [
|
||||
suffixInflection('تي', '', ['n_s'], ['n_indef', 'n']),
|
||||
suffixInflection('تي', 'ة', ['n_s'], ['n_indef', 'n']),
|
||||
...nonAssimilatingPossessivePronouns.map((p) => suffixInflection(`تي${p}`, '', ['n_s'], ['n_indef', 'n'])),
|
||||
...nonAssimilatingPossessivePronouns.map((p) => suffixInflection(`تي${p}`, 'ة', ['n_s'], ['n_indef', 'n'])),
|
||||
],
|
||||
},
|
||||
'NSuff-At': {
|
||||
name: 'f. pl.',
|
||||
description: 'sound f. plural',
|
||||
rules: [
|
||||
suffixInflection('ات', '', ['n_s'], ['n_p', 'n']),
|
||||
suffixInflection('ات', 'ة', ['n_s'], ['n_p', 'n']),
|
||||
suffixInflection('آت', 'أ', ['n_s'], ['n_p', 'n']),
|
||||
suffixInflection('آت', 'أة', ['n_s'], ['n_p', 'n']),
|
||||
],
|
||||
},
|
||||
'NSuff-Ath': {
|
||||
name: 'f. pl. + pos. pron.',
|
||||
description: 'sound f. plural + possessive pronoun',
|
||||
rules: [
|
||||
...possessivePronouns.map((p) => suffixInflection(`ات${p}`, '', ['n_s'], ['n_indef', 'n'])),
|
||||
...possessivePronouns.map((p) => suffixInflection(`ات${p}`, 'ة', ['n_s'], ['n_indef', 'n'])),
|
||||
...possessivePronouns.map((p) => suffixInflection(`آت${p}`, 'أ', ['n_s'], ['n_indef', 'n'])),
|
||||
...possessivePronouns.map((p) => suffixInflection(`آت${p}`, 'أة', ['n_s'], ['n_indef', 'n'])),
|
||||
],
|
||||
},
|
||||
'NSuff-wn': {
|
||||
name: 'm. pl.',
|
||||
description: 'nominative sound m. plural',
|
||||
rules: [
|
||||
suffixInflection('ون', '', ['n_s'], ['n_nom', 'n']),
|
||||
],
|
||||
},
|
||||
'NSuff-wh': {
|
||||
name: 'm. pl + pos. pron.',
|
||||
description: 'nominative sound m. plural + possessive pronoun',
|
||||
rules: [
|
||||
suffixInflection('و', '', ['n_s'], ['n_nom_indef', 'n']),
|
||||
...nonAssimilatingPossessivePronouns.map((p) => suffixInflection(`و${p}`, '', ['n_s'], ['n_nom_indef', 'n'])),
|
||||
],
|
||||
},
|
||||
'NSuff-iyn': {
|
||||
name: 'm. pl.',
|
||||
description: 'accusative/genitive sound m. plural',
|
||||
rules: [
|
||||
suffixInflection('ين', '', ['n_s'], ['n_p', 'n']),
|
||||
],
|
||||
},
|
||||
'NSuff-iyh': {
|
||||
name: 'm. pl. + pos. pron.',
|
||||
description: 'accusative/genitive sound m. plural + possessive pronoun',
|
||||
rules: [
|
||||
suffixInflection('ي', '', ['n_s'], ['n_indef', 'n']),
|
||||
...nonAssimilatingPossessivePronouns.map((p) => suffixInflection(`ي${p}`, '', ['n_s'], ['n_indef', 'n'])),
|
||||
],
|
||||
},
|
||||
|
||||
// Perfect Verb
|
||||
'PVPref-Wa': {
|
||||
name: 'and',
|
||||
description: 'and (و); and, so (ف)',
|
||||
rules: [
|
||||
prefixInflection('و', '', ['pv_p'], ['pv_s', 'pv']),
|
||||
prefixInflection('ف', '', ['pv_p'], ['pv_s', 'pv']),
|
||||
],
|
||||
},
|
||||
'PVPref-La': {
|
||||
name: 'would have',
|
||||
description: 'Result clause particle (if ... I would have ...)',
|
||||
rules: [prefixInflection('ل', '', ['pv_p'], ['pv_s', 'pv'])],
|
||||
},
|
||||
|
||||
'PVSuff-ah': {
|
||||
name: 'Perfect Tense',
|
||||
description: 'Perfect Verb + D.O pronoun',
|
||||
rules: directObjectPronouns.map((p) => suffixInflection(p, '', ['pv_s'], ['pv'])),
|
||||
},
|
||||
'PVSuff-n': {
|
||||
name: 'Perfect Tense',
|
||||
description: 'Perfect Verb suffixes assimilating with ن',
|
||||
rules: [
|
||||
// Stem doesn't end in ن
|
||||
conditionalSuffixInflection('ن', '', '(?<!ن)', ['pv_s'], ['pv']),
|
||||
...directObjectPronouns.map((p) => conditionalSuffixInflection(`ن${p}`, '', '(?<!ن)', ['pv_s'], ['pv'])),
|
||||
|
||||
conditionalSuffixInflection('نا', '', '(?<!ن)', ['pv_s'], ['pv']),
|
||||
...directObjectPronouns2nd.map((p) => conditionalSuffixInflection(`نا${p}`, '', '(?<!ن)', ['pv_s'], ['pv'])),
|
||||
...directObjectPronouns3rd.map((p) => conditionalSuffixInflection(`نا${p}`, '', '(?<!ن)', ['pv_s'], ['pv'])),
|
||||
|
||||
// Suffixes assimilated with stems ending in ن
|
||||
...directObjectPronouns.map((p) => suffixInflection(`ن${p}`, 'ن', ['pv_s'], ['pv'])),
|
||||
|
||||
suffixInflection('نا', 'ن', ['pv_s'], ['pv']),
|
||||
...directObjectPronouns2nd.map((p) => suffixInflection(`نا${p}`, 'ن', ['pv_s'], ['pv'])),
|
||||
...directObjectPronouns3rd.map((p) => suffixInflection(`نا${p}`, 'ن', ['pv_s'], ['pv'])),
|
||||
],
|
||||
},
|
||||
'PVSuff-t': {
|
||||
name: 'Perfect Tense',
|
||||
description: 'Perfect Verb suffixes assimilating with ت',
|
||||
rules: [
|
||||
// This can either be 3rd p. f. singular, or 1st/2nd p. singular
|
||||
// The former doesn't assimilate, the latter do, so the below accounts for both
|
||||
suffixInflection('ت', '', ['pv_s'], ['pv']),
|
||||
...directObjectPronouns.map((p) => suffixInflection(`ت${p}`, '', ['pv_s'], ['pv'])),
|
||||
|
||||
// Stem doesn't end in ت
|
||||
conditionalSuffixInflection('تما', '', '(?<!ت)', ['pv_s'], ['pv']),
|
||||
...directObjectPronouns1st.map((p) => conditionalSuffixInflection(`تما${p}`, '', '(?<!ت)', ['pv_s'], ['pv'])),
|
||||
...directObjectPronouns3rd.map((p) => conditionalSuffixInflection(`تما${p}`, '', '(?<!ت)', ['pv_s'], ['pv'])),
|
||||
|
||||
conditionalSuffixInflection('تم', '', '(?<!ت)', ['pv_s'], ['pv']),
|
||||
...directObjectPronouns1st.map((p) => conditionalSuffixInflection(`تمو${p}`, '', '(?<!ت)', ['pv_s'], ['pv'])),
|
||||
...directObjectPronouns3rd.map((p) => conditionalSuffixInflection(`تمو${p}`, '', '(?<!ت)', ['pv_s'], ['pv'])),
|
||||
|
||||
conditionalSuffixInflection('تن', '', '(?<!ت)', ['pv_s'], ['pv']),
|
||||
...directObjectPronouns1st.map((p) => conditionalSuffixInflection(`تن${p}`, '', '(?<!ت)', ['pv_s'], ['pv'])),
|
||||
...directObjectPronouns3rd.map((p) => conditionalSuffixInflection(`تن${p}`, '', '(?<!ت)', ['pv_s'], ['pv'])),
|
||||
|
||||
// Suffixes assimilated with stems ending in ت
|
||||
...directObjectPronouns.map((p) => suffixInflection(`ت${p}`, 'ت', ['pv_s'], ['pv'])),
|
||||
|
||||
suffixInflection('تما', 'ت', ['pv_s'], ['pv']),
|
||||
...directObjectPronouns1st.map((p) => suffixInflection(`تما${p}`, 'ت', ['pv_s'], ['pv'])),
|
||||
...directObjectPronouns3rd.map((p) => suffixInflection(`تما${p}`, 'ت', ['pv_s'], ['pv'])),
|
||||
|
||||
suffixInflection('تم', 'ت', ['pv_s'], ['pv']),
|
||||
...directObjectPronouns1st.map((p) => suffixInflection(`تمو${p}`, 'ت', ['pv_s'], ['pv'])),
|
||||
...directObjectPronouns3rd.map((p) => suffixInflection(`تمو${p}`, 'ت', ['pv_s'], ['pv'])),
|
||||
|
||||
suffixInflection('تن', 'ت', ['pv_s'], ['pv']),
|
||||
...directObjectPronouns1st.map((p) => suffixInflection(`تن${p}`, 'ت', ['pv_s'], ['pv'])),
|
||||
...directObjectPronouns3rd.map((p) => suffixInflection(`تن${p}`, 'ت', ['pv_s'], ['pv'])),
|
||||
],
|
||||
},
|
||||
'PVSuff-at': {
|
||||
name: 'Perfect Tense',
|
||||
description: 'Perfect Verb non-assimilating ت suffixes',
|
||||
rules: [
|
||||
suffixInflection('تا', '', ['pv_s'], ['pv']),
|
||||
...directObjectPronouns.map((p) => suffixInflection(`تا${p}`, '', ['pv_s'], ['pv'])),
|
||||
],
|
||||
},
|
||||
'PVSuff-A': {
|
||||
name: 'Perfect Tense',
|
||||
description: 'Perfect Verb 3rd. m. dual',
|
||||
rules: [
|
||||
suffixInflection('ا', '', ['pv_s'], ['pv']),
|
||||
...directObjectPronouns.map((p) => suffixInflection(`ا${p}`, '', ['pv_s'], ['pv'])),
|
||||
|
||||
// Combines with أ to form آ
|
||||
suffixInflection('آ', 'أ', ['pv_s'], ['pv']),
|
||||
...directObjectPronouns.map((p) => suffixInflection(`آ${p}`, 'أ', ['pv_s'], ['pv'])),
|
||||
],
|
||||
},
|
||||
'PVSuff-uw': {
|
||||
name: 'Perfect Tense',
|
||||
description: 'Perfect Verb 3rd. m. pl.',
|
||||
rules: [
|
||||
suffixInflection('وا', '', ['pv_s'], ['pv']),
|
||||
...directObjectPronouns.map((p) => suffixInflection(`و${p}`, '', ['pv_s'], ['pv'])),
|
||||
],
|
||||
},
|
||||
|
||||
// Imperfect Verb
|
||||
'IVPref-hw': {
|
||||
name: 'Imperfect Tense',
|
||||
description: 'Imperfect Verb 3rd. m. sing.',
|
||||
rules: [...getImperfectRules('ي', '', '', '')],
|
||||
},
|
||||
'IVPref-hy': {
|
||||
name: 'Imperfect Tense',
|
||||
description: 'Imperfect Verb 3rd. f. sing.',
|
||||
rules: [...getImperfectRules('ت', '', '', '')],
|
||||
},
|
||||
'IVPref-hmA': {
|
||||
name: 'Imperfect Tense',
|
||||
description: 'Imperfect Verb 3rd. m. dual',
|
||||
rules: [
|
||||
// Indicative
|
||||
...getImperfectRules('ي', '', 'ان', '', {includeLiPrefix: false}),
|
||||
...getImperfectRules('ي', '', 'آن', 'أ', {includeLiPrefix: false}),
|
||||
// Subjunctive
|
||||
...getImperfectRules('ي', '', 'ا', ''),
|
||||
...getImperfectRules('ي', '', 'آ', 'أ'),
|
||||
],
|
||||
},
|
||||
'IVPref-hmA-ta': {
|
||||
name: 'Imperfect Tense',
|
||||
description: 'Imperfect Verb 3rd. f. dual',
|
||||
rules: [
|
||||
// Indicative
|
||||
...getImperfectRules('ت', '', 'ان', '', {includeLiPrefix: false}),
|
||||
...getImperfectRules('ت', '', 'آن', 'أ', {includeLiPrefix: false}),
|
||||
// Subjunctive
|
||||
...getImperfectRules('ت', '', 'ا', ''),
|
||||
...getImperfectRules('ت', '', 'آ', 'أ'),
|
||||
],
|
||||
},
|
||||
'IVPref-hm': {
|
||||
name: 'Imperfect Tense',
|
||||
description: 'Imperfect Verb 3rd. m. pl.',
|
||||
rules: [
|
||||
// Indicative
|
||||
...getImperfectRules('ي', '', 'ون', '', {includeLiPrefix: false}),
|
||||
// Subjunctive
|
||||
...getImperfectRules('ي', '', 'وا', '', {attachedSuffix: 'و'}),
|
||||
],
|
||||
},
|
||||
'IVPref-hn': {
|
||||
name: 'Imperfect Tense',
|
||||
description: 'Imperfect Verb 3rd. f. pl.',
|
||||
rules: [
|
||||
...getImperfectRules('ي', '', 'ن', '', {finalStemSegment: '(?<!ن)'}),
|
||||
...getImperfectRules('ي', '', 'ن', 'ن'),
|
||||
],
|
||||
},
|
||||
'IVPref-Anta': {
|
||||
name: 'Imperfect Tense',
|
||||
description: 'Imperfect Verb 2nd. m. sing.',
|
||||
rules: [...getImperfectRules('ت', '', '', '', {attachesTo2nd: false})],
|
||||
},
|
||||
'IVPref-Anti': {
|
||||
name: 'Imperfect Tense',
|
||||
description: 'Imperfect Verb 2nd. f. sing.',
|
||||
rules: [
|
||||
...getImperfectRules('ت', '', 'ين', '', {attachesTo2nd: false, includeLiPrefix: false}), // Indicative
|
||||
...getImperfectRules('ت', '', 'ي', '', {attachesTo2nd: false}), // Subjunctive
|
||||
],
|
||||
},
|
||||
'IVPref-AntmA': {
|
||||
name: 'Imperfect Tense',
|
||||
description: 'Imperfect Verb 2nd. dual',
|
||||
rules: [
|
||||
// Indicative
|
||||
...getImperfectRules('ت', '', 'ان', '', {attachesTo2nd: false, includeLiPrefix: false}),
|
||||
...getImperfectRules('ت', '', 'آن', 'أ', {attachesTo2nd: false, includeLiPrefix: false}),
|
||||
// Subjunctive
|
||||
...getImperfectRules('ت', '', 'ا', '', {attachesTo2nd: false}),
|
||||
...getImperfectRules('ت', '', 'آ', 'أ', {attachesTo2nd: false}),
|
||||
],
|
||||
},
|
||||
'IVPref-Antm': {
|
||||
name: 'Imperfect Tense',
|
||||
description: 'Imperfect Verb 2nd. m. pl.',
|
||||
rules: [
|
||||
// Indicative
|
||||
...getImperfectRules('ت', '', 'ون', '', {attachesTo2nd: false, includeLiPrefix: false}),
|
||||
// Subjunctive
|
||||
...getImperfectRules('ت', '', 'وا', '', {attachesTo2nd: false, attachedSuffix: 'و'}),
|
||||
],
|
||||
},
|
||||
'IVPref-Antn': {
|
||||
name: 'Imperfect Tense',
|
||||
description: 'Imperfect Verb 2nd. f. pl.',
|
||||
rules: [
|
||||
...getImperfectRules('ت', '', 'ن', '', {attachesTo2nd: false, finalStemSegment: '(?<!ن)'}),
|
||||
...getImperfectRules('ت', '', 'ن', 'ن', {attachesTo2nd: false}),
|
||||
],
|
||||
},
|
||||
'IVPref-AnA': {
|
||||
name: 'Imperfect Tense',
|
||||
description: 'Imperfect Verb 1st. sing.',
|
||||
rules: [
|
||||
...getImperfectRules('أ', '', '', '', {attachesTo1st: false}),
|
||||
...getImperfectRules('آ', 'أ', '', '', {attachesTo1st: false}),
|
||||
],
|
||||
},
|
||||
'IVPref-nHn': {
|
||||
name: 'Imperfect Tense',
|
||||
description: 'Imperfect Verb 1st. pl.',
|
||||
rules: [...getImperfectRules('ن', '', '', '', {attachesTo1st: false})],
|
||||
},
|
||||
|
||||
// Command Verb
|
||||
'CVPref': {
|
||||
name: 'Imperative',
|
||||
description: 'Command Verb',
|
||||
rules: [
|
||||
prefixInflection('و', '', ['cv_p'], ['cv_s']),
|
||||
prefixInflection('ف', '', ['cv_p'], ['cv_s']),
|
||||
prefixInflection('ا', '', ['cv_p'], ['cv_s', 'cv']),
|
||||
prefixInflection('وا', '', ['cv_p'], ['cv_s', 'cv']),
|
||||
prefixInflection('فا', '', ['cv_p'], ['cv_s', 'cv']),
|
||||
],
|
||||
},
|
||||
'CVSuff': {
|
||||
name: 'Imperative',
|
||||
description: 'Command Verb',
|
||||
rules: [
|
||||
// 2nd. m. sing.
|
||||
...directObjectPronouns1st.map((p) => suffixInflection(p, '', ['cv_s'], ['cv'])),
|
||||
...directObjectPronouns3rd.map((p) => suffixInflection(p, '', ['cv_s'], ['cv'])),
|
||||
|
||||
// 2nd. f. sing
|
||||
suffixInflection('ي', '', ['cv_s'], ['cv']),
|
||||
...directObjectPronouns1st.map((p) => suffixInflection(`ي${p}`, '', ['cv_s'], ['cv'])),
|
||||
...directObjectPronouns3rd.map((p) => suffixInflection(`ي${p}`, '', ['cv_s'], ['cv'])),
|
||||
|
||||
// 2nd. dual
|
||||
suffixInflection('ا', '', ['cv_s'], ['cv']),
|
||||
...directObjectPronouns1st.map((p) => suffixInflection(`ا${p}`, '', ['cv_s'], ['cv'])),
|
||||
...directObjectPronouns3rd.map((p) => suffixInflection(`ا${p}`, '', ['cv_s'], ['cv'])),
|
||||
|
||||
// 2nd. m. pl.
|
||||
suffixInflection('وا', '', ['cv_s'], ['cv']),
|
||||
...directObjectPronouns1st.map((p) => suffixInflection(`و${p}`, '', ['cv_s'], ['cv'])),
|
||||
...directObjectPronouns3rd.map((p) => suffixInflection(`و${p}`, '', ['cv_s'], ['cv'])),
|
||||
|
||||
// 2nd. f. pl.
|
||||
suffixInflection('ن', '', ['cv_s'], ['cv']),
|
||||
...directObjectPronouns1st.map((p) => suffixInflection(`ن${p}`, '', ['cv_s'], ['cv'])),
|
||||
...directObjectPronouns3rd.map((p) => suffixInflection(`ن${p}`, '', ['cv_s'], ['cv'])),
|
||||
],
|
||||
},
|
||||
},
|
||||
};
|
||||
34
vendor/yomitan/js/language/de/german-text-preprocessors.js
vendored
Normal file
34
vendor/yomitan/js/language/de/german-text-preprocessors.js
vendored
Normal file
@@ -0,0 +1,34 @@
|
||||
/*
|
||||
* Copyright (C) 2024-2025 Yomitan Authors
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
|
||||
/** @type {import('language').BidirectionalConversionPreprocessor} */
|
||||
export const eszettPreprocessor = {
|
||||
name: 'Convert "ß" to "ss"',
|
||||
description: 'ß → ss, ẞ → SS and vice versa',
|
||||
options: ['off', 'direct', 'inverse'],
|
||||
process: (str, setting) => {
|
||||
switch (setting) {
|
||||
case 'off':
|
||||
return str;
|
||||
case 'direct':
|
||||
return str.replace(/ẞ/g, 'SS').replace(/ß/g, 'ss');
|
||||
case 'inverse':
|
||||
return str.replace(/SS/g, 'ẞ').replace(/ss/g, 'ß');
|
||||
}
|
||||
},
|
||||
};
|
||||
176
vendor/yomitan/js/language/de/german-transforms.js
vendored
Normal file
176
vendor/yomitan/js/language/de/german-transforms.js
vendored
Normal file
@@ -0,0 +1,176 @@
|
||||
/*
|
||||
* Copyright (C) 2024-2025 Yomitan Authors
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
import {prefixInflection, suffixInflection} from '../language-transforms.js';
|
||||
|
||||
/** @typedef {keyof typeof conditions} Condition */
|
||||
|
||||
// https://www.dartmouth.edu/~deutsch/Grammatik/Wortbildung/Separables.html
|
||||
const separablePrefixes = ['ab', 'an', 'auf', 'aus', 'auseinander', 'bei', 'da', 'dabei', 'dar', 'daran', 'dazwischen', 'durch', 'ein', 'empor', 'entgegen', 'entlang', 'entzwei', 'fehl', 'fern', 'fest', 'fort', 'frei', 'gegenüber', 'gleich', 'heim', 'her', 'herab', 'heran', 'herauf', 'heraus', 'herbei', 'herein', 'herüber', 'herum', 'herunter', 'hervor', 'hin', 'hinab', 'hinauf', 'hinaus', 'hinein', 'hinterher', 'hinunter', 'hinweg', 'hinzu', 'hoch', 'los', 'mit', 'nach', 'nebenher', 'nieder', 'statt', 'um', 'vor', 'voran', 'voraus', 'vorbei', 'vorüber', 'vorweg', 'weg', 'weiter', 'wieder', 'zu', 'zurecht', 'zurück', 'zusammen'];
|
||||
const germanLetters = 'a-zA-ZäöüßÄÖÜẞ';
|
||||
|
||||
/**
|
||||
* @param {string} prefix
|
||||
* @param {Condition[]} conditionsIn
|
||||
* @param {Condition[]} conditionsOut
|
||||
* @returns {import('language-transformer').Rule<Condition>}
|
||||
*/
|
||||
function separatedPrefix(prefix, conditionsIn, conditionsOut) {
|
||||
const regex = new RegExp(`^([${germanLetters}]+) .+ ${prefix}$`);
|
||||
return {
|
||||
type: 'other',
|
||||
isInflected: regex,
|
||||
deinflect: (term) => {
|
||||
return term.replace(regex, '$1 ' + prefix);
|
||||
},
|
||||
conditionsIn,
|
||||
conditionsOut,
|
||||
};
|
||||
}
|
||||
|
||||
const separatedPrefixInflections = separablePrefixes.map((prefix) => {
|
||||
return separatedPrefix(prefix, [], []);
|
||||
});
|
||||
|
||||
const zuInfinitiveInflections = separablePrefixes.map((prefix) => {
|
||||
return prefixInflection(prefix + 'zu', prefix, [], ['v']);
|
||||
});
|
||||
|
||||
/**
|
||||
* @returns {import('language-transformer').Rule<Condition>[]}
|
||||
*/
|
||||
function getBasicPastParticiples() {
|
||||
const regularPastParticiple = new RegExp(`^ge([${germanLetters}]+)t$`);
|
||||
const suffixes = ['n', 'en'];
|
||||
return suffixes.map((suffix) => ({
|
||||
type: 'other',
|
||||
isInflected: regularPastParticiple,
|
||||
deinflect: (term) => {
|
||||
return term.replace(regularPastParticiple, `$1${suffix}`);
|
||||
},
|
||||
conditionsIn: [],
|
||||
conditionsOut: ['vw'],
|
||||
}));
|
||||
}
|
||||
|
||||
/**
|
||||
* @returns {import('language-transformer').Rule<Condition>[]}
|
||||
*/
|
||||
function getSeparablePastParticiples() {
|
||||
const prefixDisjunction = separablePrefixes.join('|');
|
||||
const separablePastParticiple = new RegExp(`^(${prefixDisjunction})ge([${germanLetters}]+)t$`);
|
||||
const suffixes = ['n', 'en'];
|
||||
return suffixes.map((suffix) => ({
|
||||
type: 'other',
|
||||
isInflected: separablePastParticiple,
|
||||
deinflect: (term) => {
|
||||
return term.replace(separablePastParticiple, `$1$2${suffix}`);
|
||||
},
|
||||
conditionsIn: [],
|
||||
conditionsOut: ['vw'],
|
||||
}));
|
||||
}
|
||||
|
||||
const conditions = {
|
||||
v: {
|
||||
name: 'Verb',
|
||||
isDictionaryForm: true,
|
||||
subConditions: ['vw', 'vs'],
|
||||
},
|
||||
vw: {
|
||||
name: 'Weak verb',
|
||||
isDictionaryForm: true,
|
||||
},
|
||||
vs: {
|
||||
name: 'Strong verb',
|
||||
isDictionaryForm: true,
|
||||
},
|
||||
n: {
|
||||
name: 'Noun',
|
||||
isDictionaryForm: true,
|
||||
},
|
||||
adj: {
|
||||
name: 'Adjective',
|
||||
isDictionaryForm: true,
|
||||
},
|
||||
};
|
||||
|
||||
export const germanTransforms = {
|
||||
language: 'de',
|
||||
conditions,
|
||||
transforms: {
|
||||
'nominalization': {
|
||||
name: 'nominalization',
|
||||
description: 'Noun formed from a verb',
|
||||
rules: [
|
||||
suffixInflection('ung', 'en', [], ['v']),
|
||||
suffixInflection('lung', 'eln', [], ['v']),
|
||||
suffixInflection('rung', 'rn', [], ['v']),
|
||||
],
|
||||
},
|
||||
'-bar': {
|
||||
name: '-bar',
|
||||
description: '-able adjective from a verb',
|
||||
rules: [
|
||||
suffixInflection('bar', 'en', ['adj'], ['v']),
|
||||
suffixInflection('bar', 'n', ['adj'], ['v']),
|
||||
],
|
||||
},
|
||||
'negative': {
|
||||
name: 'negative',
|
||||
description: 'Negation',
|
||||
rules: [
|
||||
prefixInflection('un', '', [], ['adj']),
|
||||
],
|
||||
},
|
||||
'past participle': {
|
||||
name: 'past participle',
|
||||
rules: [
|
||||
...getBasicPastParticiples(),
|
||||
...getSeparablePastParticiples(),
|
||||
],
|
||||
},
|
||||
'separated prefix': {
|
||||
name: 'separated prefix',
|
||||
rules: [
|
||||
...separatedPrefixInflections,
|
||||
],
|
||||
},
|
||||
'zu-infinitive': {
|
||||
name: 'zu-infinitive',
|
||||
rules: [
|
||||
...zuInfinitiveInflections,
|
||||
],
|
||||
},
|
||||
'-heit': {
|
||||
name: '-heit',
|
||||
description:
|
||||
'1. Converts an adjective into a noun and usually denotes an abstract quality of the adjectival root. ' +
|
||||
'It is often equivalent to the English suffixes -ness, -th, -ty, -dom:\n' +
|
||||
'\t schön (“beautiful”) + -heit → Schönheit (“beauty”)\n' +
|
||||
'\t neu (“new”) + -heit → Neuheit (“novelty”)\n' +
|
||||
'2. Converts concrete nouns into abstract nouns:\n' +
|
||||
'\t Kind (“child”) + -heit → Kindheit (“childhood”)\n' +
|
||||
'\t Christ (“Christian”) + -heit → Christenheit (“Christendom”)\n',
|
||||
rules: [
|
||||
suffixInflection('heit', '', ['n'], ['adj', 'n']),
|
||||
suffixInflection('keit', '', ['n'], ['adj', 'n']),
|
||||
],
|
||||
},
|
||||
|
||||
},
|
||||
};
|
||||
42
vendor/yomitan/js/language/el/modern-greek-processors.js
vendored
Normal file
42
vendor/yomitan/js/language/el/modern-greek-processors.js
vendored
Normal file
@@ -0,0 +1,42 @@
|
||||
/*
|
||||
* Copyright (C) 2025 Yomitan Authors
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
import {basicTextProcessorOptions} from '../text-processors.js';
|
||||
|
||||
/** @type {import('language').TextProcessor<boolean>} */
|
||||
export const removeDoubleAcuteAccents = {
|
||||
name: 'Remove double acute accents',
|
||||
description: 'πρόσωπό → πρόσωπο',
|
||||
options: basicTextProcessorOptions,
|
||||
process: (str, setting) => {
|
||||
return setting ? removeDoubleAcuteAccentsImpl(str) : str;
|
||||
},
|
||||
};
|
||||
|
||||
/**
|
||||
* @param {string} word
|
||||
* @returns {string}
|
||||
*/
|
||||
export function removeDoubleAcuteAccentsImpl(word) {
|
||||
const ACUTE_ACCENT = '\u0301';
|
||||
const decomposed = [...word.normalize('NFD')];
|
||||
|
||||
const firstIndex = decomposed.indexOf(ACUTE_ACCENT);
|
||||
const updated = decomposed.filter((char, index) => char !== ACUTE_ACCENT || index === firstIndex);
|
||||
|
||||
return updated.join('').normalize('NFC');
|
||||
}
|
||||
292
vendor/yomitan/js/language/en/english-transforms.js
vendored
Normal file
292
vendor/yomitan/js/language/en/english-transforms.js
vendored
Normal file
@@ -0,0 +1,292 @@
|
||||
/*
|
||||
* Copyright (C) 2024-2025 Yomitan Authors
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
import {prefixInflection, suffixInflection} from '../language-transforms.js';
|
||||
|
||||
/** @typedef {keyof typeof conditions} Condition */
|
||||
|
||||
/**
|
||||
* @param {string} consonants
|
||||
* @param {string} suffix
|
||||
* @param {Condition[]} conditionsIn
|
||||
* @param {Condition[]} conditionsOut
|
||||
* @returns {import('language-transformer').SuffixRule<Condition>[]}
|
||||
*/
|
||||
function doubledConsonantInflection(consonants, suffix, conditionsIn, conditionsOut) {
|
||||
const inflections = [];
|
||||
for (const consonant of consonants) {
|
||||
inflections.push(suffixInflection(`${consonant}${consonant}${suffix}`, consonant, conditionsIn, conditionsOut));
|
||||
}
|
||||
return inflections;
|
||||
}
|
||||
|
||||
const pastSuffixInflections = [
|
||||
suffixInflection('ed', '', ['v'], ['v']), // 'walked'
|
||||
suffixInflection('ed', 'e', ['v'], ['v']), // 'hoped'
|
||||
suffixInflection('ied', 'y', ['v'], ['v']), // 'tried'
|
||||
suffixInflection('cked', 'c', ['v'], ['v']), // 'frolicked'
|
||||
...doubledConsonantInflection('bdgklmnprstz', 'ed', ['v'], ['v']),
|
||||
|
||||
suffixInflection('laid', 'lay', ['v'], ['v']),
|
||||
suffixInflection('paid', 'pay', ['v'], ['v']),
|
||||
suffixInflection('said', 'say', ['v'], ['v']),
|
||||
];
|
||||
|
||||
const ingSuffixInflections = [
|
||||
suffixInflection('ing', '', ['v'], ['v']), // 'walking'
|
||||
suffixInflection('ing', 'e', ['v'], ['v']), // 'driving'
|
||||
suffixInflection('ying', 'ie', ['v'], ['v']), // 'lying'
|
||||
suffixInflection('cking', 'c', ['v'], ['v']), // 'panicking'
|
||||
...doubledConsonantInflection('bdgklmnprstz', 'ing', ['v'], ['v']),
|
||||
];
|
||||
|
||||
const thirdPersonSgPresentSuffixInflections = [
|
||||
suffixInflection('s', '', ['v'], ['v']), // 'walks'
|
||||
suffixInflection('es', '', ['v'], ['v']), // 'teaches'
|
||||
suffixInflection('ies', 'y', ['v'], ['v']), // 'tries'
|
||||
];
|
||||
|
||||
const phrasalVerbParticles = ['aboard', 'about', 'above', 'across', 'ahead', 'alongside', 'apart', 'around', 'aside', 'astray', 'away', 'back', 'before', 'behind', 'below', 'beneath', 'besides', 'between', 'beyond', 'by', 'close', 'down', 'east', 'west', 'north', 'south', 'eastward', 'westward', 'northward', 'southward', 'forward', 'backward', 'backwards', 'forwards', 'home', 'in', 'inside', 'instead', 'near', 'off', 'on', 'opposite', 'out', 'outside', 'over', 'overhead', 'past', 'round', 'since', 'through', 'throughout', 'together', 'under', 'underneath', 'up', 'within', 'without'];
|
||||
const phrasalVerbPrepositions = ['aback', 'about', 'above', 'across', 'after', 'against', 'ahead', 'along', 'among', 'apart', 'around', 'as', 'aside', 'at', 'away', 'back', 'before', 'behind', 'below', 'between', 'beyond', 'by', 'down', 'even', 'for', 'forth', 'forward', 'from', 'in', 'into', 'of', 'off', 'on', 'onto', 'open', 'out', 'over', 'past', 'round', 'through', 'to', 'together', 'toward', 'towards', 'under', 'up', 'upon', 'way', 'with', 'without'];
|
||||
|
||||
const particlesDisjunction = phrasalVerbParticles.join('|');
|
||||
const phrasalVerbWordSet = new Set([...phrasalVerbParticles, ...phrasalVerbPrepositions]);
|
||||
const phrasalVerbWordDisjunction = [...phrasalVerbWordSet].join('|');
|
||||
/**
|
||||
* @type {import('language-transformer').Rule<Condition>}
|
||||
*/
|
||||
const phrasalVerbInterposedObjectRule = {
|
||||
type: 'other',
|
||||
isInflected: new RegExp(`^\\w* (?:(?!\\b(${phrasalVerbWordDisjunction})\\b).)+ (?:${particlesDisjunction})`),
|
||||
deinflect: (term) => {
|
||||
return term.replace(new RegExp(`(?<=\\w) (?:(?!\\b(${phrasalVerbWordDisjunction})\\b).)+ (?=(?:${particlesDisjunction}))`), ' ');
|
||||
},
|
||||
conditionsIn: [],
|
||||
conditionsOut: ['v_phr'],
|
||||
};
|
||||
|
||||
/**
|
||||
* @param {string} inflected
|
||||
* @param {string} deinflected
|
||||
* @returns {import('language-transformer').Rule<Condition>}
|
||||
*/
|
||||
function createPhrasalVerbInflection(inflected, deinflected) {
|
||||
return {
|
||||
type: 'other',
|
||||
isInflected: new RegExp(`^\\w*${inflected} (?:${phrasalVerbWordDisjunction})`),
|
||||
deinflect: (term) => {
|
||||
return term.replace(new RegExp(`(?<=)${inflected}(?= (?:${phrasalVerbWordDisjunction}))`), deinflected);
|
||||
},
|
||||
conditionsIn: ['v'],
|
||||
conditionsOut: ['v_phr'],
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {import('language-transformer').SuffixRule<Condition>[]} sourceRules
|
||||
* @returns {import('language-transformer').Rule<Condition>[]}
|
||||
*/
|
||||
function createPhrasalVerbInflectionsFromSuffixInflections(sourceRules) {
|
||||
return sourceRules.flatMap(({isInflected, deinflected}) => {
|
||||
if (typeof deinflected === 'undefined') { return []; }
|
||||
const inflectedSuffix = isInflected.source.replace('$', '');
|
||||
const deinflectedSuffix = deinflected;
|
||||
return [createPhrasalVerbInflection(inflectedSuffix, deinflectedSuffix)];
|
||||
});
|
||||
}
|
||||
|
||||
const conditions = {
|
||||
v: {
|
||||
name: 'Verb',
|
||||
isDictionaryForm: true,
|
||||
subConditions: ['v_phr'],
|
||||
},
|
||||
v_phr: {
|
||||
name: 'Phrasal verb',
|
||||
isDictionaryForm: true,
|
||||
},
|
||||
n: {
|
||||
name: 'Noun',
|
||||
isDictionaryForm: true,
|
||||
subConditions: ['np', 'ns'],
|
||||
},
|
||||
np: {
|
||||
name: 'Noun plural',
|
||||
isDictionaryForm: true,
|
||||
},
|
||||
ns: {
|
||||
name: 'Noun singular',
|
||||
isDictionaryForm: true,
|
||||
},
|
||||
adj: {
|
||||
name: 'Adjective',
|
||||
isDictionaryForm: true,
|
||||
},
|
||||
adv: {
|
||||
name: 'Adverb',
|
||||
isDictionaryForm: true,
|
||||
},
|
||||
};
|
||||
|
||||
/** @type {import('language-transformer').LanguageTransformDescriptor<Condition>} */
|
||||
export const englishTransforms = {
|
||||
language: 'en',
|
||||
conditions,
|
||||
transforms: {
|
||||
'plural': {
|
||||
name: 'plural',
|
||||
description: 'Plural form of a noun',
|
||||
rules: [
|
||||
suffixInflection('s', '', ['np'], ['ns']),
|
||||
suffixInflection('es', '', ['np'], ['ns']),
|
||||
suffixInflection('ies', 'y', ['np'], ['ns']),
|
||||
suffixInflection('ves', 'fe', ['np'], ['ns']),
|
||||
suffixInflection('ves', 'f', ['np'], ['ns']),
|
||||
],
|
||||
},
|
||||
'possessive': {
|
||||
name: 'possessive',
|
||||
description: 'Possessive form of a noun',
|
||||
rules: [
|
||||
suffixInflection('\'s', '', ['n'], ['n']),
|
||||
suffixInflection('s\'', 's', ['n'], ['n']),
|
||||
],
|
||||
},
|
||||
'past': {
|
||||
name: 'past',
|
||||
description: 'Simple past tense of a verb',
|
||||
rules: [
|
||||
...pastSuffixInflections,
|
||||
...createPhrasalVerbInflectionsFromSuffixInflections(pastSuffixInflections),
|
||||
],
|
||||
},
|
||||
'ing': {
|
||||
name: 'ing',
|
||||
description: 'Present participle of a verb',
|
||||
rules: [
|
||||
...ingSuffixInflections,
|
||||
...createPhrasalVerbInflectionsFromSuffixInflections(ingSuffixInflections),
|
||||
],
|
||||
},
|
||||
'3rd pers. sing. pres': {
|
||||
name: '3rd pers. sing. pres',
|
||||
description: 'Third person singular present tense of a verb',
|
||||
rules: [
|
||||
...thirdPersonSgPresentSuffixInflections,
|
||||
...createPhrasalVerbInflectionsFromSuffixInflections(thirdPersonSgPresentSuffixInflections),
|
||||
],
|
||||
},
|
||||
'interposed object': {
|
||||
name: 'interposed object',
|
||||
description: 'Phrasal verb with interposed object',
|
||||
rules: [
|
||||
phrasalVerbInterposedObjectRule,
|
||||
],
|
||||
},
|
||||
'archaic': {
|
||||
name: 'archaic',
|
||||
description: 'Archaic form of a word',
|
||||
rules: [
|
||||
suffixInflection('\'d', 'ed', ['v'], ['v']),
|
||||
],
|
||||
},
|
||||
'adverb': {
|
||||
name: 'adverb',
|
||||
description: 'Adverb form of an adjective',
|
||||
rules: [
|
||||
suffixInflection('ly', '', ['adv'], ['adj']), // 'quickly'
|
||||
suffixInflection('ily', 'y', ['adv'], ['adj']), // 'happily'
|
||||
suffixInflection('ly', 'le', ['adv'], ['adj']), // 'humbly'
|
||||
],
|
||||
},
|
||||
'comparative': {
|
||||
name: 'comparative',
|
||||
description: 'Comparative form of an adjective',
|
||||
rules: [
|
||||
suffixInflection('er', '', ['adj'], ['adj']), // 'faster'
|
||||
suffixInflection('er', 'e', ['adj'], ['adj']), // 'nicer'
|
||||
suffixInflection('ier', 'y', ['adj'], ['adj']), // 'happier'
|
||||
...doubledConsonantInflection('bdgmnt', 'er', ['adj'], ['adj']),
|
||||
],
|
||||
},
|
||||
'superlative': {
|
||||
name: 'superlative',
|
||||
description: 'Superlative form of an adjective',
|
||||
rules: [
|
||||
suffixInflection('est', '', ['adj'], ['adj']), // 'fastest'
|
||||
suffixInflection('est', 'e', ['adj'], ['adj']), // 'nicest'
|
||||
suffixInflection('iest', 'y', ['adj'], ['adj']), // 'happiest'
|
||||
...doubledConsonantInflection('bdgmnt', 'est', ['adj'], ['adj']),
|
||||
],
|
||||
},
|
||||
'dropped g': {
|
||||
name: 'dropped g',
|
||||
description: 'Dropped g in -ing form of a verb',
|
||||
rules: [
|
||||
suffixInflection('in\'', 'ing', ['v'], ['v']),
|
||||
],
|
||||
},
|
||||
'-y': {
|
||||
name: '-y',
|
||||
description: 'Adjective formed from a verb or noun',
|
||||
rules: [
|
||||
suffixInflection('y', '', ['adj'], ['n', 'v']), // 'dirty', 'pushy'
|
||||
suffixInflection('y', 'e', ['adj'], ['n', 'v']), // 'hazy'
|
||||
...doubledConsonantInflection('glmnprst', 'y', [], ['n', 'v']), // 'baggy', 'saggy'
|
||||
],
|
||||
},
|
||||
'un-': {
|
||||
name: 'un-',
|
||||
description: 'Negative form of an adjective, adverb, or verb',
|
||||
rules: [
|
||||
prefixInflection('un', '', ['adj', 'adv', 'v'], ['adj', 'adv', 'v']),
|
||||
],
|
||||
},
|
||||
'going-to future': {
|
||||
name: 'going-to future',
|
||||
description: 'Going-to future tense of a verb',
|
||||
rules: [
|
||||
prefixInflection('going to ', '', ['v'], ['v']),
|
||||
],
|
||||
},
|
||||
'will future': {
|
||||
name: 'will future',
|
||||
description: 'Will-future tense of a verb',
|
||||
rules: [
|
||||
prefixInflection('will ', '', ['v'], ['v']),
|
||||
],
|
||||
},
|
||||
'imperative negative': {
|
||||
name: 'imperative negative',
|
||||
description: 'Negative imperative form of a verb',
|
||||
rules: [
|
||||
prefixInflection('don\'t ', '', ['v'], ['v']),
|
||||
prefixInflection('do not ', '', ['v'], ['v']),
|
||||
],
|
||||
},
|
||||
'-able': {
|
||||
name: '-able',
|
||||
description: 'Adjective formed from a verb',
|
||||
rules: [
|
||||
suffixInflection('able', '', ['v'], ['adj']),
|
||||
suffixInflection('able', 'e', ['v'], ['adj']),
|
||||
suffixInflection('iable', 'y', ['v'], ['adj']),
|
||||
...doubledConsonantInflection('bdgklmnprstz', 'able', ['v'], ['adj']),
|
||||
],
|
||||
},
|
||||
},
|
||||
};
|
||||
316
vendor/yomitan/js/language/eo/esperanto-transforms.js
vendored
Normal file
316
vendor/yomitan/js/language/eo/esperanto-transforms.js
vendored
Normal file
@@ -0,0 +1,316 @@
|
||||
/*
|
||||
* Copyright (C) 2024-2025 Yomitan Authors
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
import {prefixInflection, suffixInflection} from '../language-transforms.js';
|
||||
|
||||
const conditions = {
|
||||
n: {
|
||||
name: 'Noun',
|
||||
isDictionaryForm: true,
|
||||
},
|
||||
adj: {
|
||||
name: 'Adjective',
|
||||
isDictionaryForm: true,
|
||||
},
|
||||
adv: {
|
||||
name: 'Adverb',
|
||||
isDictionaryForm: true,
|
||||
},
|
||||
v: {
|
||||
name: 'Verb',
|
||||
isDictionaryForm: true,
|
||||
},
|
||||
};
|
||||
|
||||
/** @type {import('language-transformer').LanguageTransformDescriptor<keyof typeof conditions>} */
|
||||
export const esperantoTransforms = {
|
||||
language: 'eo',
|
||||
conditions,
|
||||
transforms: {
|
||||
// general inflections
|
||||
'accusative': {
|
||||
name: 'accusative',
|
||||
description: 'Accusative form of a word',
|
||||
rules: [
|
||||
suffixInflection('n', '', [], []),
|
||||
],
|
||||
},
|
||||
'plural': {
|
||||
name: 'plural',
|
||||
description: 'Plural form of a word',
|
||||
rules: [
|
||||
suffixInflection('j', '', [], []),
|
||||
],
|
||||
},
|
||||
'diminutive': {
|
||||
name: 'diminutive',
|
||||
description: 'Diminutive form of a noun',
|
||||
rules: [
|
||||
suffixInflection('eto', 'o', [], ['n']),
|
||||
],
|
||||
},
|
||||
'directional': {
|
||||
name: 'directional',
|
||||
description: [
|
||||
'An adverb in accusative case indicates direction',
|
||||
'kie: where',
|
||||
'kien: to where',
|
||||
].join('\n'),
|
||||
rules: [
|
||||
suffixInflection('en', 'e', [], ['adv']),
|
||||
],
|
||||
},
|
||||
'locational': {
|
||||
name: 'locational',
|
||||
description: [
|
||||
'A noun becoming an adverb indicates location',
|
||||
'surpinto : peak, tip, top',
|
||||
'surpinte: at the peak, at the tip, at the top',
|
||||
'ĉambro : room',
|
||||
'ĉambre: at the room, in the room',
|
||||
].join('\n'),
|
||||
rules: [
|
||||
suffixInflection('e', 'o', [], ['n']),
|
||||
],
|
||||
},
|
||||
'adjectival': {
|
||||
name: 'adjectival',
|
||||
description: 'Adjectival form of a noun',
|
||||
rules: [
|
||||
suffixInflection('a', 'o', [], ['n']),
|
||||
],
|
||||
},
|
||||
'adverbial (adj -> adv)': {
|
||||
name: 'adverbial',
|
||||
description: 'Adverbial form of an adjective',
|
||||
rules: [
|
||||
suffixInflection('e', 'a', [], ['adj']),
|
||||
],
|
||||
},
|
||||
'adverbial (v -> adv)': {
|
||||
name: 'adverbial',
|
||||
description: 'Adverbial form of a verb',
|
||||
rules: [
|
||||
suffixInflection('e', 'i', [], ['v']),
|
||||
],
|
||||
},
|
||||
// suffixes
|
||||
'-ejo (noun)': {
|
||||
name: '-ejo',
|
||||
description: [
|
||||
'Suffix which turns a word into a place designed for that specific thing',
|
||||
'kafo: coffee',
|
||||
'kafejo: café',
|
||||
].join('\n'),
|
||||
rules: [
|
||||
suffixInflection('ejo', 'o', [], ['n']),
|
||||
],
|
||||
},
|
||||
'-ejo (verb)': {
|
||||
name: '-ejo',
|
||||
description: [
|
||||
'Suffix which turns a word into a place designed for that specific action',
|
||||
'kuiri: to cook',
|
||||
'kuirejo: kitchen',
|
||||
].join('\n'),
|
||||
rules: [
|
||||
suffixInflection('ejo', 'i', [], ['v']),
|
||||
],
|
||||
},
|
||||
'-ujo (noun)': {
|
||||
name: '-ujo',
|
||||
description: [
|
||||
'Suffix which turns a word into a box or container for that specific thing',
|
||||
'abelo: a bee',
|
||||
'abelujo: a beehive',
|
||||
'',
|
||||
'Suffix which turns a word into a place where a type of people can be found',
|
||||
'patro: father',
|
||||
'patrujo: fatherland',
|
||||
'',
|
||||
'Suffix which turns a flower or fruit into a plant or tree which the flower or fruit can be found in',
|
||||
'pomo: an apple',
|
||||
'pomujo: an apple tree',
|
||||
].join('\n'),
|
||||
rules: [
|
||||
suffixInflection('ujo', 'o', [], ['n']),
|
||||
],
|
||||
},
|
||||
'-ujo (adjective)': {
|
||||
name: '-ujo',
|
||||
description: [
|
||||
'Suffix which turns a descriptive word into a box or container for that specific type of thing',
|
||||
'frida: cold',
|
||||
'fridujo: a refrigerator',
|
||||
].join('\n'),
|
||||
rules: [
|
||||
suffixInflection('ujo', 'a', [], ['adj']),
|
||||
],
|
||||
},
|
||||
'-ujo (verb)': {
|
||||
name: '-ujo',
|
||||
description: [
|
||||
'Suffix which turns an action into a box or container for that specific type of activity',
|
||||
'lavi: to wash',
|
||||
'lavujo: a sink',
|
||||
].join('\n'),
|
||||
rules: [
|
||||
suffixInflection('ujo', 'i', [], ['v']),
|
||||
],
|
||||
},
|
||||
'-ebla': {
|
||||
name: '-ebla',
|
||||
description: [
|
||||
'Suffix which shows possibility',
|
||||
'kompreni: to understand',
|
||||
'komprenebla: understandable',
|
||||
].join('\n'),
|
||||
rules: [
|
||||
suffixInflection('ebla', 'i', [], ['v']),
|
||||
],
|
||||
},
|
||||
'-ado': {
|
||||
name: '-ado',
|
||||
description: [
|
||||
'Suffix which turns an action into a thing representing the action',
|
||||
'vivi: to live',
|
||||
'vivado: life',
|
||||
'spiri: to breathe',
|
||||
'spirado: respiration',
|
||||
].join('\n'),
|
||||
rules: [
|
||||
suffixInflection('ado', 'i', [], ['v']),
|
||||
],
|
||||
},
|
||||
// prefixes
|
||||
'mal-': {
|
||||
name: 'mal-',
|
||||
description: 'Prefix which turns an action, description, thing, or direction into its opposite meaning',
|
||||
rules: [
|
||||
prefixInflection('mal', '', [], []),
|
||||
],
|
||||
},
|
||||
'kun-': {
|
||||
name: 'kun-',
|
||||
description: [
|
||||
'Prefix meaning to do the action together with other people',
|
||||
'labori: to work',
|
||||
'kunlabori: to collaborate',
|
||||
].join('\n'),
|
||||
rules: [
|
||||
prefixInflection('kun', '', [], []),
|
||||
],
|
||||
},
|
||||
'ekster-': {
|
||||
name: 'ekster-',
|
||||
description: [
|
||||
'Prefix meaning "outside of"',
|
||||
'lando: country',
|
||||
'eksterlando: foreign country',
|
||||
].join('\n'),
|
||||
rules: [
|
||||
prefixInflection('ekster', '', [], []),
|
||||
],
|
||||
},
|
||||
'ek-': {
|
||||
name: 'ek-',
|
||||
description: [
|
||||
'Prefix meaning to begin the action',
|
||||
'kanti: to sing',
|
||||
'ekkanti: to begin to sing',
|
||||
].join('\n'),
|
||||
rules: [
|
||||
prefixInflection('ek', '', [], []),
|
||||
],
|
||||
},
|
||||
'ĵus-': {
|
||||
name: 'ĵus-',
|
||||
description: [
|
||||
'Prefix meaning something is newly or recently done',
|
||||
'vekita: awakened',
|
||||
'ĵusvekita: newly/recently awakened',
|
||||
].join('\n'),
|
||||
rules: [
|
||||
prefixInflection('ĵus', '', [], []),
|
||||
],
|
||||
},
|
||||
'el-': {
|
||||
name: 'el-',
|
||||
description: [
|
||||
'Prefix meaning to do the action in an outward direction',
|
||||
'tiri: to pull',
|
||||
'eltiri: to pull out',
|
||||
'',
|
||||
'Prefix meaning to do the action all the way to completion',
|
||||
'trinki: to drink',
|
||||
'eltrinki: to drink up, to drink all of something',
|
||||
'lerni: to learn',
|
||||
'ellerni: to learn all that you can, to master',
|
||||
'uzi: to use',
|
||||
'eluzi: to use up, wear out',
|
||||
].join('\n'),
|
||||
rules: [
|
||||
prefixInflection('el', '', [], []),
|
||||
],
|
||||
},
|
||||
'dis-': {
|
||||
name: 'dis-',
|
||||
description: [
|
||||
'Prefix meaning "separation, being apart, spreading out"',
|
||||
'ŝvebi: to float',
|
||||
'disŝvebi: to float out/separately',
|
||||
'fali: to fall',
|
||||
'disfali: to fall apart',
|
||||
'doni: to give',
|
||||
'disdoni: to give out, to distribute',
|
||||
'sendo: a thing that is sent',
|
||||
'dissendo: a broadcast',
|
||||
].join('\n'),
|
||||
rules: [
|
||||
prefixInflection('dis', '', [], []),
|
||||
],
|
||||
},
|
||||
'for-': {
|
||||
name: 'for-',
|
||||
description: [
|
||||
'Prefix meaning "movement to a far distance"',
|
||||
'flugi: to fly',
|
||||
'forflugi: to fly away',
|
||||
'',
|
||||
'Prefix meaning "disappearance/annihilation"',
|
||||
'uzi: to use',
|
||||
'foruzi: to use up (until nothing is left)',
|
||||
].join('\n'),
|
||||
rules: [
|
||||
prefixInflection('for', '', [], []),
|
||||
],
|
||||
},
|
||||
'mis-': {
|
||||
name: 'mis-',
|
||||
description: [
|
||||
'Prefix meaning "wrong, erroneous"',
|
||||
'kompreni: to understand',
|
||||
'miskompreni: to misunderstand',
|
||||
'paroli: to speak',
|
||||
'misparoli: to misspeak',
|
||||
].join('\n'),
|
||||
rules: [
|
||||
prefixInflection('mis', '', [], []),
|
||||
],
|
||||
},
|
||||
},
|
||||
};
|
||||
969
vendor/yomitan/js/language/es/spanish-transforms.js
vendored
Normal file
969
vendor/yomitan/js/language/es/spanish-transforms.js
vendored
Normal file
@@ -0,0 +1,969 @@
|
||||
/*
|
||||
* Copyright (C) 2024-2025 Yomitan Authors
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
import {suffixInflection, wholeWordInflection} from '../language-transforms.js';
|
||||
|
||||
/** @typedef {keyof typeof conditions} Condition */
|
||||
const REFLEXIVE_PATTERN = /\b(me|te|se|nos|os)\s+(\w+)(ar|er|ir)\b/g;
|
||||
|
||||
const ACCENTS = new Map([
|
||||
['a', 'á'],
|
||||
['e', 'é'],
|
||||
['i', 'í'],
|
||||
['o', 'ó'],
|
||||
['u', 'ú'],
|
||||
]);
|
||||
|
||||
|
||||
/**
|
||||
* @param {string} char
|
||||
* @returns {string}
|
||||
*/
|
||||
function addAccent(char) {
|
||||
return ACCENTS.get(char) || char;
|
||||
}
|
||||
|
||||
const conditions = {
|
||||
n: {
|
||||
name: 'Noun',
|
||||
isDictionaryForm: true,
|
||||
subConditions: ['ns', 'np'],
|
||||
},
|
||||
np: {
|
||||
name: 'Noun plural',
|
||||
isDictionaryForm: false,
|
||||
},
|
||||
ns: {
|
||||
name: 'Noun singular',
|
||||
isDictionaryForm: false,
|
||||
},
|
||||
v: {
|
||||
name: 'Verb',
|
||||
isDictionaryForm: true,
|
||||
subConditions: ['v_ar', 'v_er', 'v_ir'],
|
||||
},
|
||||
v_ar: {
|
||||
name: '-ar verb',
|
||||
isDictionaryForm: false,
|
||||
},
|
||||
v_er: {
|
||||
name: '-er verb',
|
||||
isDictionaryForm: false,
|
||||
},
|
||||
v_ir: {
|
||||
name: '-ir verb',
|
||||
isDictionaryForm: false,
|
||||
},
|
||||
adj: {
|
||||
name: 'Adjective',
|
||||
isDictionaryForm: true,
|
||||
},
|
||||
};
|
||||
|
||||
/** @type {import('language-transformer').LanguageTransformDescriptor<keyof typeof conditions>} */
|
||||
export const spanishTransforms = {
|
||||
language: 'es',
|
||||
conditions,
|
||||
transforms: {
|
||||
'plural': {
|
||||
name: 'plural',
|
||||
description: 'Plural form of a noun',
|
||||
rules: [
|
||||
suffixInflection('s', '', ['np'], ['ns']),
|
||||
suffixInflection('es', '', ['np'], ['ns']),
|
||||
suffixInflection('ces', 'z', ['np'], ['ns']), // 'lápices' -> lápiz
|
||||
...[...'aeiou'].map((v) => suffixInflection(`${v}ses`, `${addAccent(v)}s`, ['np'], ['ns'])), // 'autobuses' -> autobús
|
||||
...[...'aeiou'].map((v) => suffixInflection(`${v}nes`, `${addAccent(v)}n`, ['np'], ['ns'])), // 'canciones' -> canción
|
||||
],
|
||||
},
|
||||
'feminine adjective': {
|
||||
name: 'feminine adjective',
|
||||
description: 'feminine form of an adjective',
|
||||
rules: [
|
||||
suffixInflection('a', 'o', ['adj'], ['adj']),
|
||||
suffixInflection('a', '', ['adj'], ['adj']), // encantadora -> encantador, española -> español
|
||||
...[...'aeio'].map((v) => suffixInflection(`${v}na`, `${addAccent(v)}n`, ['adj'], ['adj'])), // dormilona -> dormilón, chiquitina -> chiquitín
|
||||
...[...'aeio'].map((v) => suffixInflection(`${v}sa`, `${addAccent(v)}s`, ['adj'], ['adj'])), // francesa -> francés
|
||||
],
|
||||
},
|
||||
'present indicative': {
|
||||
name: 'present indicative',
|
||||
description: 'Present indicative form of a verb',
|
||||
rules: [
|
||||
// STEM-CHANGING RULES FIRST
|
||||
// e->ie for -ar
|
||||
{
|
||||
type: 'other',
|
||||
isInflected: /ie([a-z]*)(o|as|a|an)$/,
|
||||
deinflect: (term) => term.replace(/ie/, 'e').replace(/(o|as|a|an)$/, 'ar'),
|
||||
conditionsIn: ['v_ar'],
|
||||
conditionsOut: ['v_ar'],
|
||||
},
|
||||
// e->ie for -er
|
||||
{
|
||||
type: 'other',
|
||||
isInflected: /ie([a-z]*)(o|es|e|en)$/,
|
||||
deinflect: (term) => term.replace(/ie/, 'e').replace(/(o|es|e|en)$/, 'er'),
|
||||
conditionsIn: ['v_er'],
|
||||
conditionsOut: ['v_er'],
|
||||
},
|
||||
// e->ie for -ir
|
||||
{
|
||||
type: 'other',
|
||||
isInflected: /ie([a-z]*)(o|es|e|en)$/,
|
||||
deinflect: (term) => term.replace(/ie/, 'e').replace(/(o|es|e|en)$/, 'ir'),
|
||||
conditionsIn: ['v_ir'],
|
||||
conditionsOut: ['v_ir'],
|
||||
},
|
||||
// o->ue for -ar
|
||||
{
|
||||
type: 'other',
|
||||
isInflected: /ue([a-z]*)(o|as|a|an)$/,
|
||||
deinflect: (term) => {
|
||||
// "jugar" (u->ue)
|
||||
if (term.startsWith('jue')) {
|
||||
return term.replace(/ue/, 'u').replace(/(o|as|a|an)$/, 'ar');
|
||||
}
|
||||
return term.replace(/ue/, 'o').replace(/(o|as|a|an)$/, 'ar');
|
||||
},
|
||||
conditionsIn: ['v_ar'],
|
||||
conditionsOut: ['v_ar'],
|
||||
},
|
||||
// o->ue for -er
|
||||
{
|
||||
type: 'other',
|
||||
isInflected: /ue([a-z]*)(o|es|e|en)$/,
|
||||
deinflect: (term) => {
|
||||
// "oler" (o->hue)
|
||||
if (term.startsWith('hue')) {
|
||||
return term.replace(/hue/, 'o').replace(/(o|es|e|en)$/, 'er');
|
||||
}
|
||||
return term.replace(/ue/, 'o').replace(/(o|es|e|en)$/, 'er');
|
||||
},
|
||||
conditionsIn: ['v_er'],
|
||||
conditionsOut: ['v_er'],
|
||||
},
|
||||
// o->ue for -ir
|
||||
{
|
||||
type: 'other',
|
||||
isInflected: /ue([a-z]*)(o|es|e|en)$/,
|
||||
deinflect: (term) => term.replace(/ue/, 'o').replace(/(o|es|e|en)$/, 'ir'),
|
||||
conditionsIn: ['v_ir'],
|
||||
conditionsOut: ['v_ir'],
|
||||
},
|
||||
// e->i for -ir
|
||||
{
|
||||
type: 'other',
|
||||
isInflected: /i([a-z]*)(o|es|e|en)$/,
|
||||
deinflect: (term) => term.replace(/i/, 'e').replace(/(o|es|e|en)$/, 'ir'),
|
||||
conditionsIn: ['v_ir'],
|
||||
conditionsOut: ['v_ir'],
|
||||
},
|
||||
// -ar verbs
|
||||
suffixInflection('o', 'ar', ['v_ar'], ['v_ar']),
|
||||
suffixInflection('as', 'ar', ['v_ar'], ['v_ar']),
|
||||
suffixInflection('a', 'ar', ['v_ar'], ['v_ar']),
|
||||
suffixInflection('amos', 'ar', ['v_ar'], ['v_ar']),
|
||||
suffixInflection('áis', 'ar', ['v_ar'], ['v_ar']),
|
||||
suffixInflection('an', 'ar', ['v_ar'], ['v_ar']),
|
||||
// -er verbs
|
||||
suffixInflection('o', 'er', ['v_er'], ['v_er']),
|
||||
suffixInflection('es', 'er', ['v_er'], ['v_er']),
|
||||
suffixInflection('e', 'er', ['v_er'], ['v_er']),
|
||||
suffixInflection('emos', 'er', ['v_er'], ['v_er']),
|
||||
suffixInflection('éis', 'er', ['v_er'], ['v_er']),
|
||||
suffixInflection('en', 'er', ['v_er'], ['v_er']),
|
||||
// -ir verbs
|
||||
suffixInflection('o', 'ir', ['v_ir'], ['v_ir']),
|
||||
suffixInflection('es', 'ir', ['v_ir'], ['v_ir']),
|
||||
suffixInflection('e', 'ir', ['v_ir'], ['v_ir']),
|
||||
suffixInflection('imos', 'ir', ['v_ir'], ['v_ir']),
|
||||
suffixInflection('ís', 'ir', ['v_ir'], ['v_ir']),
|
||||
suffixInflection('en', 'ir', ['v_ir'], ['v_ir']),
|
||||
// i -> y verbs (incluir, huir, construir...)
|
||||
suffixInflection('uyo', 'uir', ['v_ir'], ['v_ir']),
|
||||
suffixInflection('uyes', 'uir', ['v_ir'], ['v_ir']),
|
||||
suffixInflection('uye', 'uir', ['v_ir'], ['v_ir']),
|
||||
suffixInflection('uyen', 'uir', ['v_ir'], ['v_ir']),
|
||||
// -tener verbs
|
||||
suffixInflection('tengo', 'tener', ['v'], ['v']),
|
||||
suffixInflection('tienes', 'tener', ['v'], ['v']),
|
||||
suffixInflection('tiene', 'tener', ['v'], ['v']),
|
||||
suffixInflection('tenemos', 'tener', ['v'], ['v']),
|
||||
suffixInflection('tenéis', 'tener', ['v'], ['v']),
|
||||
suffixInflection('tienen', 'tener', ['v'], ['v']),
|
||||
// -oír verbs
|
||||
suffixInflection('oigo', 'oír', ['v'], ['v']),
|
||||
suffixInflection('oyes', 'oír', ['v'], ['v']),
|
||||
suffixInflection('oye', 'oír', ['v'], ['v']),
|
||||
suffixInflection('oímos', 'oír', ['v'], ['v']),
|
||||
suffixInflection('oís', 'oír', ['v'], ['v']),
|
||||
suffixInflection('oyen', 'oír', ['v'], ['v']),
|
||||
// -venir verbs
|
||||
suffixInflection('vengo', 'venir', ['v'], ['v']),
|
||||
suffixInflection('vienes', 'venir', ['v'], ['v']),
|
||||
suffixInflection('viene', 'venir', ['v'], ['v']),
|
||||
suffixInflection('venimos', 'venir', ['v'], ['v']),
|
||||
suffixInflection('venís', 'venir', ['v'], ['v']),
|
||||
suffixInflection('vienen', 'venir', ['v'], ['v']),
|
||||
// Verbs with Irregular Yo Forms
|
||||
// -guir, -ger, or -gir verbs
|
||||
suffixInflection('go', 'guir', ['v'], ['v']),
|
||||
suffixInflection('jo', 'ger', ['v'], ['v']),
|
||||
suffixInflection('jo', 'gir', ['v'], ['v']),
|
||||
suffixInflection('aigo', 'aer', ['v'], ['v']),
|
||||
suffixInflection('zco', 'cer', ['v'], ['v']),
|
||||
suffixInflection('zco', 'cir', ['v'], ['v']),
|
||||
suffixInflection('hago', 'hacer', ['v'], ['v']),
|
||||
suffixInflection('pongo', 'poner', ['v'], ['v']),
|
||||
suffixInflection('lgo', 'lir', ['v'], ['v']),
|
||||
suffixInflection('lgo', 'ler', ['v'], ['v']),
|
||||
wholeWordInflection('quepo', 'caber', ['v'], ['v']),
|
||||
wholeWordInflection('doy', 'dar', ['v'], ['v']),
|
||||
wholeWordInflection('sé', 'saber', ['v'], ['v']),
|
||||
wholeWordInflection('veo', 'ver', ['v'], ['v']),
|
||||
// Ser, estar, ir, haber
|
||||
wholeWordInflection('soy', 'ser', ['v'], ['v']),
|
||||
wholeWordInflection('eres', 'ser', ['v'], ['v']),
|
||||
wholeWordInflection('es', 'ser', ['v'], ['v']),
|
||||
wholeWordInflection('somos', 'ser', ['v'], ['v']),
|
||||
wholeWordInflection('sois', 'ser', ['v'], ['v']),
|
||||
wholeWordInflection('son', 'ser', ['v'], ['v']),
|
||||
wholeWordInflection('estoy', 'estar', ['v'], ['v']),
|
||||
wholeWordInflection('estás', 'estar', ['v'], ['v']),
|
||||
wholeWordInflection('está', 'estar', ['v'], ['v']),
|
||||
wholeWordInflection('estamos', 'estar', ['v'], ['v']),
|
||||
wholeWordInflection('estáis', 'estar', ['v'], ['v']),
|
||||
wholeWordInflection('están', 'estar', ['v'], ['v']),
|
||||
wholeWordInflection('voy', 'ir', ['v'], ['v']),
|
||||
wholeWordInflection('vas', 'ir', ['v'], ['v']),
|
||||
wholeWordInflection('va', 'ir', ['v'], ['v']),
|
||||
wholeWordInflection('vamos', 'ir', ['v'], ['v']),
|
||||
wholeWordInflection('vais', 'ir', ['v'], ['v']),
|
||||
wholeWordInflection('van', 'ir', ['v'], ['v']),
|
||||
wholeWordInflection('he', 'haber', ['v'], ['v']),
|
||||
wholeWordInflection('has', 'haber', ['v'], ['v']),
|
||||
wholeWordInflection('ha', 'haber', ['v'], ['v']),
|
||||
wholeWordInflection('hemos', 'haber', ['v'], ['v']),
|
||||
wholeWordInflection('habéis', 'haber', ['v'], ['v']),
|
||||
wholeWordInflection('han', 'haber', ['v'], ['v']),
|
||||
],
|
||||
},
|
||||
'preterite': {
|
||||
name: 'preterite',
|
||||
description: 'Preterite (past) form of a verb',
|
||||
rules: [
|
||||
// e->i for -ir
|
||||
{
|
||||
type: 'other',
|
||||
isInflected: /i([a-z]*)(ió|ieron)$/, // this only happens in 3rd person - singular and plural
|
||||
deinflect: (term) => term.replace(/i/, 'e').replace(/(ió|ieron)$/, 'ir'),
|
||||
conditionsIn: ['v_ir'],
|
||||
conditionsOut: ['v_ir'],
|
||||
},
|
||||
// o->u for -ir
|
||||
{
|
||||
type: 'other',
|
||||
isInflected: /u([a-z]*)(ió|ieron)$/,
|
||||
deinflect: (term) => term.replace(/u/, 'o').replace(/(ió|ieron)$/, 'ir'),
|
||||
conditionsIn: ['v_ir'],
|
||||
conditionsOut: ['v_ir'],
|
||||
},
|
||||
// -ar verbs
|
||||
suffixInflection('é', 'ar', ['v_ar'], ['v_ar']),
|
||||
suffixInflection('aste', 'ar', ['v_ar'], ['v_ar']),
|
||||
suffixInflection('ó', 'ar', ['v_ar'], ['v_ar']),
|
||||
suffixInflection('amos', 'ar', ['v_ar'], ['v_ar']),
|
||||
suffixInflection('asteis', 'ar', ['v_ar'], ['v_ar']),
|
||||
suffixInflection('aron', 'ar', ['v_ar'], ['v_ar']),
|
||||
// -er verbs
|
||||
suffixInflection('í', 'er', ['v_er'], ['v_er']),
|
||||
suffixInflection('iste', 'er', ['v_er'], ['v_er']),
|
||||
suffixInflection('ió', 'er', ['v_er'], ['v_er']),
|
||||
suffixInflection('imos', 'er', ['v_er'], ['v_er']),
|
||||
suffixInflection('isteis', 'er', ['v_er'], ['v_er']),
|
||||
suffixInflection('ieron', 'er', ['v_er'], ['v_er']),
|
||||
// -ir verbs
|
||||
suffixInflection('í', 'ir', ['v_ir'], ['v_ir']),
|
||||
suffixInflection('iste', 'ir', ['v_ir'], ['v_ir']),
|
||||
suffixInflection('ió', 'ir', ['v_ir'], ['v_ir']),
|
||||
suffixInflection('imos', 'ir', ['v_ir'], ['v_ir']),
|
||||
suffixInflection('isteis', 'ir', ['v_ir'], ['v_ir']),
|
||||
suffixInflection('ieron', 'ir', ['v_ir'], ['v_ir']),
|
||||
// -car, -gar, -zar verbs
|
||||
suffixInflection('qué', 'car', ['v'], ['v']),
|
||||
suffixInflection('gué', 'gar', ['v'], ['v']),
|
||||
suffixInflection('cé', 'zar', ['v'], ['v']),
|
||||
// -uir verbs
|
||||
suffixInflection('í', 'uir', ['v'], ['v']),
|
||||
// Verbs with irregular forms
|
||||
wholeWordInflection('fui', 'ser', ['v'], ['v']),
|
||||
wholeWordInflection('fuiste', 'ser', ['v'], ['v']),
|
||||
wholeWordInflection('fue', 'ser', ['v'], ['v']),
|
||||
wholeWordInflection('fuimos', 'ser', ['v'], ['v']),
|
||||
wholeWordInflection('fuisteis', 'ser', ['v'], ['v']),
|
||||
wholeWordInflection('fueron', 'ser', ['v'], ['v']),
|
||||
wholeWordInflection('fui', 'ir', ['v'], ['v']),
|
||||
wholeWordInflection('fuiste', 'ir', ['v'], ['v']),
|
||||
wholeWordInflection('fue', 'ir', ['v'], ['v']),
|
||||
wholeWordInflection('fuimos', 'ir', ['v'], ['v']),
|
||||
wholeWordInflection('fuisteis', 'ir', ['v'], ['v']),
|
||||
wholeWordInflection('fueron', 'ir', ['v'], ['v']),
|
||||
wholeWordInflection('di', 'dar', ['v'], ['v']),
|
||||
wholeWordInflection('diste', 'dar', ['v'], ['v']),
|
||||
wholeWordInflection('dio', 'dar', ['v'], ['v']),
|
||||
wholeWordInflection('dimos', 'dar', ['v'], ['v']),
|
||||
wholeWordInflection('disteis', 'dar', ['v'], ['v']),
|
||||
wholeWordInflection('dieron', 'dar', ['v'], ['v']),
|
||||
suffixInflection('hice', 'hacer', ['v'], ['v']),
|
||||
suffixInflection('hiciste', 'hacer', ['v'], ['v']),
|
||||
suffixInflection('hizo', 'hacer', ['v'], ['v']),
|
||||
suffixInflection('hicimos', 'hacer', ['v'], ['v']),
|
||||
suffixInflection('hicisteis', 'hacer', ['v'], ['v']),
|
||||
suffixInflection('hicieron', 'hacer', ['v'], ['v']),
|
||||
suffixInflection('puse', 'poner', ['v'], ['v']),
|
||||
suffixInflection('pusiste', 'poner', ['v'], ['v']),
|
||||
suffixInflection('puso', 'poner', ['v'], ['v']),
|
||||
suffixInflection('pusimos', 'poner', ['v'], ['v']),
|
||||
suffixInflection('pusisteis', 'poner', ['v'], ['v']),
|
||||
suffixInflection('pusieron', 'poner', ['v'], ['v']),
|
||||
suffixInflection('dije', 'decir', ['v'], ['v']),
|
||||
suffixInflection('dijiste', 'decir', ['v'], ['v']),
|
||||
suffixInflection('dijo', 'decir', ['v'], ['v']),
|
||||
suffixInflection('dijimos', 'decir', ['v'], ['v']),
|
||||
suffixInflection('dijisteis', 'decir', ['v'], ['v']),
|
||||
suffixInflection('dijeron', 'decir', ['v'], ['v']),
|
||||
suffixInflection('vine', 'venir', ['v'], ['v']),
|
||||
suffixInflection('viniste', 'venir', ['v'], ['v']),
|
||||
suffixInflection('vino', 'venir', ['v'], ['v']),
|
||||
suffixInflection('vinimos', 'venir', ['v'], ['v']),
|
||||
suffixInflection('vinisteis', 'venir', ['v'], ['v']),
|
||||
suffixInflection('vinieron', 'venir', ['v'], ['v']),
|
||||
wholeWordInflection('quise', 'querer', ['v'], ['v']),
|
||||
wholeWordInflection('quisiste', 'querer', ['v'], ['v']),
|
||||
wholeWordInflection('quiso', 'querer', ['v'], ['v']),
|
||||
wholeWordInflection('quisimos', 'querer', ['v'], ['v']),
|
||||
wholeWordInflection('quisisteis', 'querer', ['v'], ['v']),
|
||||
wholeWordInflection('quisieron', 'querer', ['v'], ['v']),
|
||||
suffixInflection('tuve', 'tener', ['v'], ['v']),
|
||||
suffixInflection('tuviste', 'tener', ['v'], ['v']),
|
||||
suffixInflection('tuvo', 'tener', ['v'], ['v']),
|
||||
suffixInflection('tuvimos', 'tener', ['v'], ['v']),
|
||||
suffixInflection('tuvisteis', 'tener', ['v'], ['v']),
|
||||
suffixInflection('tuvieron', 'tener', ['v'], ['v']),
|
||||
wholeWordInflection('pude', 'poder', ['v'], ['v']),
|
||||
wholeWordInflection('pudiste', 'poder', ['v'], ['v']),
|
||||
wholeWordInflection('pudo', 'poder', ['v'], ['v']),
|
||||
wholeWordInflection('pudimos', 'poder', ['v'], ['v']),
|
||||
wholeWordInflection('pudisteis', 'poder', ['v'], ['v']),
|
||||
wholeWordInflection('pudieron', 'poder', ['v'], ['v']),
|
||||
wholeWordInflection('supe', 'saber', ['v'], ['v']),
|
||||
wholeWordInflection('supiste', 'saber', ['v'], ['v']),
|
||||
wholeWordInflection('supo', 'saber', ['v'], ['v']),
|
||||
wholeWordInflection('supimos', 'saber', ['v'], ['v']),
|
||||
wholeWordInflection('supisteis', 'saber', ['v'], ['v']),
|
||||
wholeWordInflection('supieron', 'saber', ['v'], ['v']),
|
||||
wholeWordInflection('estuve', 'estar', ['v'], ['v']),
|
||||
wholeWordInflection('estuviste', 'estar', ['v'], ['v']),
|
||||
wholeWordInflection('estuvo', 'estar', ['v'], ['v']),
|
||||
wholeWordInflection('estuvimos', 'estar', ['v'], ['v']),
|
||||
wholeWordInflection('estuvisteis', 'estar', ['v'], ['v']),
|
||||
wholeWordInflection('estuvieron', 'estar', ['v'], ['v']),
|
||||
wholeWordInflection('anduve', 'andar', ['v'], ['v']),
|
||||
wholeWordInflection('anduviste', 'andar', ['v'], ['v']),
|
||||
wholeWordInflection('anduvo', 'andar', ['v'], ['v']),
|
||||
wholeWordInflection('anduvimos', 'andar', ['v'], ['v']),
|
||||
wholeWordInflection('anduvisteis', 'andar', ['v'], ['v']),
|
||||
wholeWordInflection('anduvieron', 'andar', ['v'], ['v']),
|
||||
],
|
||||
},
|
||||
'imperfect': {
|
||||
name: 'imperfect',
|
||||
description: 'Imperfect form of a verb',
|
||||
rules: [
|
||||
// -ar verbs
|
||||
suffixInflection('aba', 'ar', ['v_ar'], ['v_ar']),
|
||||
suffixInflection('abas', 'ar', ['v_ar'], ['v_ar']),
|
||||
suffixInflection('aba', 'ar', ['v_ar'], ['v_ar']),
|
||||
suffixInflection('ábamos', 'ar', ['v_ar'], ['v_ar']),
|
||||
suffixInflection('abais', 'ar', ['v_ar'], ['v_ar']),
|
||||
suffixInflection('aban', 'ar', ['v_ar'], ['v_ar']),
|
||||
// -er verbs
|
||||
suffixInflection('ía', 'er', ['v_er'], ['v_er']),
|
||||
suffixInflection('ías', 'er', ['v_er'], ['v_er']),
|
||||
suffixInflection('ía', 'er', ['v_er'], ['v_er']),
|
||||
suffixInflection('íamos', 'er', ['v_er'], ['v_er']),
|
||||
suffixInflection('íais', 'er', ['v_er'], ['v_er']),
|
||||
suffixInflection('ían', 'er', ['v_er'], ['v_er']),
|
||||
// -ir verbs
|
||||
suffixInflection('ía', 'ir', ['v_ir'], ['v_ir']),
|
||||
suffixInflection('ías', 'ir', ['v_ir'], ['v_ir']),
|
||||
suffixInflection('ía', 'ir', ['v_ir'], ['v_ir']),
|
||||
suffixInflection('íamos', 'ir', ['v_ir'], ['v_ir']),
|
||||
suffixInflection('íais', 'ir', ['v_ir'], ['v_ir']),
|
||||
suffixInflection('ían', 'ir', ['v_ir'], ['v_ir']),
|
||||
// -ir verbs with stem changes
|
||||
suffixInflection('eía', 'ir', ['v_ir'], ['v_ir']),
|
||||
suffixInflection('eías', 'ir', ['v_ir'], ['v_ir']),
|
||||
suffixInflection('eía', 'ir', ['v_ir'], ['v_ir']),
|
||||
suffixInflection('eíamos', 'ir', ['v_ir'], ['v_ir']),
|
||||
suffixInflection('eíais', 'ir', ['v_ir'], ['v_ir']),
|
||||
suffixInflection('eían', 'ir', ['v_ir'], ['v_ir']),
|
||||
// irregular verbs ir, ser, ver
|
||||
wholeWordInflection('era', 'ser', ['v'], ['v']),
|
||||
wholeWordInflection('eras', 'ser', ['v'], ['v']),
|
||||
wholeWordInflection('era', 'ser', ['v'], ['v']),
|
||||
wholeWordInflection('éramos', 'ser', ['v'], ['v']),
|
||||
wholeWordInflection('erais', 'ser', ['v'], ['v']),
|
||||
wholeWordInflection('eran', 'ser', ['v'], ['v']),
|
||||
wholeWordInflection('iba', 'ir', ['v'], ['v']),
|
||||
wholeWordInflection('ibas', 'ir', ['v'], ['v']),
|
||||
wholeWordInflection('iba', 'ir', ['v'], ['v']),
|
||||
wholeWordInflection('íbamos', 'ir', ['v'], ['v']),
|
||||
wholeWordInflection('ibais', 'ir', ['v'], ['v']),
|
||||
wholeWordInflection('iban', 'ir', ['v'], ['v']),
|
||||
wholeWordInflection('veía', 'ver', ['v'], ['v']),
|
||||
wholeWordInflection('veías', 'ver', ['v'], ['v']),
|
||||
wholeWordInflection('veía', 'ver', ['v'], ['v']),
|
||||
wholeWordInflection('veíamos', 'ver', ['v'], ['v']),
|
||||
wholeWordInflection('veíais', 'ver', ['v'], ['v']),
|
||||
wholeWordInflection('veían', 'ver', ['v'], ['v']),
|
||||
],
|
||||
},
|
||||
'progressive': {
|
||||
name: 'progressive',
|
||||
description: 'Progressive form of a verb',
|
||||
rules: [
|
||||
// e->i for -ir
|
||||
{
|
||||
type: 'other',
|
||||
isInflected: /i([a-z]*)(iendo)$/,
|
||||
deinflect: (term) => term.replace(/i/, 'e').replace(/(iendo)$/, 'ir'),
|
||||
conditionsIn: ['v_ir'],
|
||||
conditionsOut: ['v_ir'],
|
||||
},
|
||||
// o->u for -er
|
||||
{
|
||||
type: 'other',
|
||||
isInflected: /u([a-z]*)(iendo)$/,
|
||||
deinflect: (term) => term.replace(/u/, 'o').replace(/(iendo)$/, 'er'),
|
||||
conditionsIn: ['v_er'],
|
||||
conditionsOut: ['v_er'],
|
||||
},
|
||||
// o->u for -ir
|
||||
{
|
||||
type: 'other',
|
||||
isInflected: /u([a-z]*)(iendo)$/,
|
||||
deinflect: (term) => term.replace(/u/, 'o').replace(/(iendo)$/, 'ir'),
|
||||
conditionsIn: ['v_ir'],
|
||||
conditionsOut: ['v_ir'],
|
||||
},
|
||||
// regular
|
||||
suffixInflection('ando', 'ar', ['v_ar'], ['v_ar']),
|
||||
suffixInflection('iendo', 'er', ['v_er'], ['v_er']),
|
||||
suffixInflection('iendo', 'ir', ['v_ir'], ['v_ir']),
|
||||
// vowel before the ending (-yendo)
|
||||
suffixInflection('ayendo', 'aer', ['v_er'], ['v_er']), // traer -> trayendo, caer -> cayendo
|
||||
suffixInflection('eyendo', 'eer', ['v_er'], ['v_er']), // leer -> leyendo
|
||||
suffixInflection('uyendo', 'uir', ['v_ir'], ['v_ir']), // huir -> huyendo
|
||||
// irregular
|
||||
wholeWordInflection('oyendo', 'oír', ['v'], ['v']),
|
||||
wholeWordInflection('yendo', 'ir', ['v'], ['v']),
|
||||
],
|
||||
},
|
||||
'imperative': {
|
||||
name: 'imperative',
|
||||
description: 'Imperative form of a verb',
|
||||
rules: [
|
||||
{
|
||||
type: 'other',
|
||||
isInflected: /ie([a-z]*)(a|e|en)$/,
|
||||
deinflect: (term) => term.replace(/ie/, 'e').replace(/(a|e|en)$/, 'ar'),
|
||||
conditionsIn: ['v_ar'],
|
||||
conditionsOut: ['v_ar'],
|
||||
},
|
||||
{
|
||||
type: 'other',
|
||||
isInflected: /ie([a-z]*)(e|a|an)$/,
|
||||
deinflect: (term) => term.replace(/ie/, 'e').replace(/(e|a|an)$/, 'er'),
|
||||
conditionsIn: ['v_er'],
|
||||
conditionsOut: ['v_er'],
|
||||
},
|
||||
{
|
||||
type: 'other',
|
||||
isInflected: /ie([a-z]*)(e|a|an)$/,
|
||||
deinflect: (term) => term.replace(/ie/, 'e').replace(/(e|a|an)$/, 'ir'),
|
||||
conditionsIn: ['v_ir'],
|
||||
conditionsOut: ['v_ir'],
|
||||
},
|
||||
{
|
||||
type: 'other',
|
||||
isInflected: /ue([a-z]*)(a|e|en)$/,
|
||||
deinflect: (term) => {
|
||||
if (term.startsWith('jue')) {
|
||||
return term.replace(/ue/, 'u').replace(/(a|ue|uen)$/, 'ar');
|
||||
}
|
||||
return term.replace(/ue/, 'o').replace(/(a|e|en)$/, 'ar');
|
||||
},
|
||||
conditionsIn: ['v_ar'],
|
||||
conditionsOut: ['v_ar'],
|
||||
},
|
||||
{
|
||||
type: 'other',
|
||||
isInflected: /ue([a-z]*)(e|a|an)$/,
|
||||
deinflect: (term) => {
|
||||
if (term.startsWith('hue')) {
|
||||
return term.replace(/hue/, 'o').replace(/(e|a|an)$/, 'er');
|
||||
}
|
||||
return term.replace(/ue/, 'o').replace(/(e|a|an)$/, 'er');
|
||||
},
|
||||
conditionsIn: ['v_er'],
|
||||
conditionsOut: ['v_er'],
|
||||
},
|
||||
{
|
||||
type: 'other',
|
||||
isInflected: /ue([a-z]*)(e|a|an)$/,
|
||||
deinflect: (term) => term.replace(/ue/, 'o').replace(/(e|a|an)$/, 'ir'),
|
||||
conditionsIn: ['v_ir'],
|
||||
conditionsOut: ['v_ir'],
|
||||
},
|
||||
{
|
||||
type: 'other',
|
||||
isInflected: /i([a-z]*)(e|a|an)$/,
|
||||
deinflect: (term) => term.replace(/i/, 'e').replace(/(e|a|an)$/, 'ir'),
|
||||
conditionsIn: ['v_ir'],
|
||||
conditionsOut: ['v_ir'],
|
||||
},
|
||||
// -ar verbs
|
||||
suffixInflection('a', 'ar', ['v_ar'], ['v_ar']),
|
||||
suffixInflection('emos', 'ar', ['v_ar'], ['v_ar']),
|
||||
suffixInflection('ad', 'ar', ['v_ar'], ['v_ar']),
|
||||
// -er verbs
|
||||
suffixInflection('e', 'er', ['v_er'], ['v_er']),
|
||||
suffixInflection('amos', 'ar', ['v_er'], ['v_er']),
|
||||
suffixInflection('ed', 'er', ['v_er'], ['v_er']),
|
||||
// -ir verbs
|
||||
suffixInflection('e', 'ir', ['v_ir'], ['v_ir']),
|
||||
suffixInflection('amos', 'ar', ['v_ir'], ['v_ir']),
|
||||
suffixInflection('id', 'ir', ['v_ir'], ['v_ir']),
|
||||
// irregular verbs
|
||||
wholeWordInflection('diga', 'decir', ['v'], ['v']),
|
||||
wholeWordInflection('sé', 'ser', ['v'], ['v']),
|
||||
wholeWordInflection('ve', 'ir', ['v'], ['v']),
|
||||
wholeWordInflection('ten', 'tener', ['v'], ['v']),
|
||||
wholeWordInflection('ven', 'venir', ['v'], ['v']),
|
||||
wholeWordInflection('haz', 'hacer', ['v'], ['v']),
|
||||
wholeWordInflection('di', 'decir', ['v'], ['v']),
|
||||
wholeWordInflection('pon', 'poner', ['v'], ['v']),
|
||||
wholeWordInflection('sal', 'salir', ['v'], ['v']),
|
||||
// negative commands
|
||||
// -ar verbs
|
||||
suffixInflection('es', 'ar', ['v_ar'], ['v_ar']),
|
||||
suffixInflection('emos', 'ar', ['v_ar'], ['v_ar']),
|
||||
suffixInflection('éis', 'ar', ['v_ar'], ['v_ar']),
|
||||
// -er verbs
|
||||
suffixInflection('as', 'er', ['v_er'], ['v_er']),
|
||||
suffixInflection('amos', 'er', ['v_er'], ['v_er']),
|
||||
suffixInflection('áis', 'er', ['v_er'], ['v_er']),
|
||||
// -ir verbs
|
||||
suffixInflection('as', 'ir', ['v_ir'], ['v_ir']),
|
||||
suffixInflection('amos', 'ir', ['v_ir'], ['v_ir']),
|
||||
suffixInflection('áis', 'ir', ['v_ir'], ['v_ir']),
|
||||
],
|
||||
},
|
||||
'conditional': {
|
||||
name: 'conditional',
|
||||
description: 'Conditional form of a verb',
|
||||
rules: [
|
||||
suffixInflection('ía', '', ['v'], ['v']),
|
||||
suffixInflection('ías', '', ['v'], ['v']),
|
||||
suffixInflection('ía', '', ['v'], ['v']),
|
||||
suffixInflection('íamos', '', ['v'], ['v']),
|
||||
suffixInflection('íais', '', ['v'], ['v']),
|
||||
suffixInflection('ían', '', ['v'], ['v']),
|
||||
// irregular verbs
|
||||
wholeWordInflection('diría', 'decir', ['v'], ['v']),
|
||||
wholeWordInflection('dirías', 'decir', ['v'], ['v']),
|
||||
wholeWordInflection('diría', 'decir', ['v'], ['v']),
|
||||
wholeWordInflection('diríamos', 'decir', ['v'], ['v']),
|
||||
wholeWordInflection('diríais', 'decir', ['v'], ['v']),
|
||||
wholeWordInflection('dirían', 'decir', ['v'], ['v']),
|
||||
wholeWordInflection('haría', 'hacer', ['v'], ['v']),
|
||||
wholeWordInflection('harías', 'hacer', ['v'], ['v']),
|
||||
wholeWordInflection('haría', 'hacer', ['v'], ['v']),
|
||||
wholeWordInflection('haríamos', 'hacer', ['v'], ['v']),
|
||||
wholeWordInflection('haríais', 'hacer', ['v'], ['v']),
|
||||
wholeWordInflection('harían', 'hacer', ['v'], ['v']),
|
||||
wholeWordInflection('pondría', 'poner', ['v'], ['v']),
|
||||
wholeWordInflection('pondrías', 'poner', ['v'], ['v']),
|
||||
wholeWordInflection('pondría', 'poner', ['v'], ['v']),
|
||||
wholeWordInflection('pondríamos', 'poner', ['v'], ['v']),
|
||||
wholeWordInflection('pondríais', 'poner', ['v'], ['v']),
|
||||
wholeWordInflection('pondrían', 'poner', ['v'], ['v']),
|
||||
wholeWordInflection('saldría', 'salir', ['v'], ['v']),
|
||||
wholeWordInflection('saldrías', 'salir', ['v'], ['v']),
|
||||
wholeWordInflection('saldría', 'salir', ['v'], ['v']),
|
||||
wholeWordInflection('saldríamos', 'salir', ['v'], ['v']),
|
||||
wholeWordInflection('saldríais', 'salir', ['v'], ['v']),
|
||||
wholeWordInflection('saldrían', 'salir', ['v'], ['v']),
|
||||
wholeWordInflection('tendría', 'tener', ['v'], ['v']),
|
||||
wholeWordInflection('tendrías', 'tener', ['v'], ['v']),
|
||||
wholeWordInflection('tendría', 'tener', ['v'], ['v']),
|
||||
wholeWordInflection('tendríamos', 'tener', ['v'], ['v']),
|
||||
wholeWordInflection('tendríais', 'tener', ['v'], ['v']),
|
||||
wholeWordInflection('tendrían', 'tener', ['v'], ['v']),
|
||||
wholeWordInflection('vendría', 'venir', ['v'], ['v']),
|
||||
wholeWordInflection('vendrías', 'venir', ['v'], ['v']),
|
||||
wholeWordInflection('vendría', 'venir', ['v'], ['v']),
|
||||
wholeWordInflection('vendríamos', 'venir', ['v'], ['v']),
|
||||
wholeWordInflection('vendríais', 'venir', ['v'], ['v']),
|
||||
wholeWordInflection('vendrían', 'venir', ['v'], ['v']),
|
||||
wholeWordInflection('querría', 'querer', ['v'], ['v']),
|
||||
wholeWordInflection('querrías', 'querer', ['v'], ['v']),
|
||||
wholeWordInflection('querría', 'querer', ['v'], ['v']),
|
||||
wholeWordInflection('querríamos', 'querer', ['v'], ['v']),
|
||||
wholeWordInflection('querríais', 'querer', ['v'], ['v']),
|
||||
wholeWordInflection('querrían', 'querer', ['v'], ['v']),
|
||||
wholeWordInflection('podría', 'poder', ['v'], ['v']),
|
||||
wholeWordInflection('podrías', 'poder', ['v'], ['v']),
|
||||
wholeWordInflection('podría', 'poder', ['v'], ['v']),
|
||||
wholeWordInflection('podríamos', 'poder', ['v'], ['v']),
|
||||
wholeWordInflection('podríais', 'poder', ['v'], ['v']),
|
||||
wholeWordInflection('podrían', 'poder', ['v'], ['v']),
|
||||
wholeWordInflection('sabría', 'saber', ['v'], ['v']),
|
||||
wholeWordInflection('sabrías', 'saber', ['v'], ['v']),
|
||||
wholeWordInflection('sabría', 'saber', ['v'], ['v']),
|
||||
wholeWordInflection('sabríamos', 'saber', ['v'], ['v']),
|
||||
wholeWordInflection('sabríais', 'saber', ['v'], ['v']),
|
||||
wholeWordInflection('sabrían', 'saber', ['v'], ['v']),
|
||||
],
|
||||
},
|
||||
'future': {
|
||||
name: 'future',
|
||||
description: 'Future form of a verb',
|
||||
rules: [
|
||||
suffixInflection('é', '', ['v'], ['v']),
|
||||
suffixInflection('ás', '', ['v'], ['v']),
|
||||
suffixInflection('á', '', ['v'], ['v']),
|
||||
suffixInflection('emos', '', ['v'], ['v']),
|
||||
suffixInflection('éis', '', ['v'], ['v']),
|
||||
suffixInflection('án', '', ['v'], ['v']),
|
||||
// irregular verbs
|
||||
suffixInflection('diré', 'decir', ['v'], ['v']),
|
||||
suffixInflection('dirás', 'decir', ['v'], ['v']),
|
||||
suffixInflection('dirá', 'decir', ['v'], ['v']),
|
||||
suffixInflection('diremos', 'decir', ['v'], ['v']),
|
||||
suffixInflection('diréis', 'decir', ['v'], ['v']),
|
||||
suffixInflection('dirán', 'decir', ['v'], ['v']),
|
||||
wholeWordInflection('haré', 'hacer', ['v'], ['v']),
|
||||
wholeWordInflection('harás', 'hacer', ['v'], ['v']),
|
||||
wholeWordInflection('hará', 'hacer', ['v'], ['v']),
|
||||
wholeWordInflection('haremos', 'hacer', ['v'], ['v']),
|
||||
wholeWordInflection('haréis', 'hacer', ['v'], ['v']),
|
||||
wholeWordInflection('harán', 'hacer', ['v'], ['v']),
|
||||
suffixInflection('pondré', 'poner', ['v'], ['v']),
|
||||
suffixInflection('pondrás', 'poner', ['v'], ['v']),
|
||||
suffixInflection('pondrá', 'poner', ['v'], ['v']),
|
||||
suffixInflection('pondremos', 'poner', ['v'], ['v']),
|
||||
suffixInflection('pondréis', 'poner', ['v'], ['v']),
|
||||
suffixInflection('pondrán', 'poner', ['v'], ['v']),
|
||||
wholeWordInflection('saldré', 'salir', ['v'], ['v']),
|
||||
wholeWordInflection('saldrás', 'salir', ['v'], ['v']),
|
||||
wholeWordInflection('saldrá', 'salir', ['v'], ['v']),
|
||||
wholeWordInflection('saldremos', 'salir', ['v'], ['v']),
|
||||
wholeWordInflection('saldréis', 'salir', ['v'], ['v']),
|
||||
wholeWordInflection('saldrán', 'salir', ['v'], ['v']),
|
||||
suffixInflection('tendré', 'tener', ['v'], ['v']),
|
||||
suffixInflection('tendrás', 'tener', ['v'], ['v']),
|
||||
suffixInflection('tendrá', 'tener', ['v'], ['v']),
|
||||
suffixInflection('tendremos', 'tener', ['v'], ['v']),
|
||||
suffixInflection('tendréis', 'tener', ['v'], ['v']),
|
||||
suffixInflection('tendrán', 'tener', ['v'], ['v']),
|
||||
suffixInflection('vendré', 'venir', ['v'], ['v']),
|
||||
suffixInflection('vendrás', 'venir', ['v'], ['v']),
|
||||
suffixInflection('vendrá', 'venir', ['v'], ['v']),
|
||||
suffixInflection('vendremos', 'venir', ['v'], ['v']),
|
||||
suffixInflection('vendréis', 'venir', ['v'], ['v']),
|
||||
suffixInflection('vendrán', 'venir', ['v'], ['v']),
|
||||
],
|
||||
},
|
||||
'present subjunctive': {
|
||||
name: 'present subjunctive',
|
||||
description: 'Present subjunctive form of a verb',
|
||||
rules: [
|
||||
// STEM-CHANGING RULES FIRST
|
||||
// e->ie for -ar
|
||||
{
|
||||
type: 'other',
|
||||
isInflected: /ie([a-z]*)(e|es|e|en)$/,
|
||||
deinflect: (term) => term.replace(/ie/, 'e').replace(/(e|es|e|en)$/, 'ar'),
|
||||
conditionsIn: ['v_ar'],
|
||||
conditionsOut: ['v_ar'],
|
||||
},
|
||||
// e->ie for -er
|
||||
{
|
||||
type: 'other',
|
||||
isInflected: /ie([a-z]*)(a|as|a|an)$/,
|
||||
deinflect: (term) => term.replace(/ie/, 'e').replace(/(a|as|a|an)$/, 'er'),
|
||||
conditionsIn: ['v_er'],
|
||||
conditionsOut: ['v_er'],
|
||||
},
|
||||
// e->ie for -ir
|
||||
{
|
||||
type: 'other',
|
||||
isInflected: /ie([a-z]*)(a|as|a|an)$/,
|
||||
deinflect: (term) => term.replace(/ie/, 'e').replace(/(a|as|a|an)$/, 'ir'),
|
||||
conditionsIn: ['v_ir'],
|
||||
conditionsOut: ['v_ir'],
|
||||
},
|
||||
// o->ue for -ar
|
||||
{
|
||||
type: 'other',
|
||||
isInflected: /ue([a-z]*)(e|es|e|en)$/,
|
||||
deinflect: (term) => {
|
||||
// "jugar" (u->ue)
|
||||
if (term.startsWith('jue')) {
|
||||
return term.replace(/ue/, 'u').replace(/(ue|ues|ue|uen)$/, 'ar');
|
||||
}
|
||||
return term.replace(/ue/, 'o').replace(/(e|es|e|en)$/, 'ar');
|
||||
},
|
||||
conditionsIn: ['v_ar'],
|
||||
conditionsOut: ['v_ar'],
|
||||
},
|
||||
// o->ue for -er
|
||||
{
|
||||
type: 'other',
|
||||
isInflected: /ue([a-z]*)(a|as|a|an)$/,
|
||||
deinflect: (term) => {
|
||||
if (term.startsWith('hue')) {
|
||||
return term.replace(/hue/, 'o').replace(/(a|as|a|an)$/, 'er');
|
||||
}
|
||||
return term.replace(/ue/, 'o').replace(/(a|as|a|an)$/, 'er');
|
||||
},
|
||||
conditionsIn: ['v_er'],
|
||||
conditionsOut: ['v_er'],
|
||||
},
|
||||
// o->ue for -ir
|
||||
{
|
||||
type: 'other',
|
||||
isInflected: /ue([a-z]*)(a|as|a|an)$/,
|
||||
deinflect: (term) => term.replace(/ue/, 'o').replace(/(a|as|a|an)$/, 'ir'),
|
||||
conditionsIn: ['v_ir'],
|
||||
conditionsOut: ['v_ir'],
|
||||
},
|
||||
// e->i for -ir
|
||||
{
|
||||
type: 'other',
|
||||
isInflected: /i([a-z]*)(a|as|a|an)$/,
|
||||
deinflect: (term) => term.replace(/i/, 'e').replace(/(a|as|a|an)$/, 'ir'),
|
||||
conditionsIn: ['v_ir'],
|
||||
conditionsOut: ['v_ir'],
|
||||
},
|
||||
// -ar verbs
|
||||
suffixInflection('e', 'ar', ['v_ar'], ['v_ar']),
|
||||
suffixInflection('es', 'ar', ['v_ar'], ['v_ar']),
|
||||
suffixInflection('e', 'ar', ['v_ar'], ['v_ar']),
|
||||
suffixInflection('emos', 'ar', ['v_ar'], ['v_ar']),
|
||||
suffixInflection('éis', 'ar', ['v_ar'], ['v_ar']),
|
||||
suffixInflection('en', 'ar', ['v_ar'], ['v_ar']),
|
||||
// -er verbs
|
||||
suffixInflection('a', 'er', ['v_er'], ['v_er']),
|
||||
suffixInflection('as', 'er', ['v_er'], ['v_er']),
|
||||
suffixInflection('a', 'er', ['v_er'], ['v_er']),
|
||||
suffixInflection('amos', 'er', ['v_er'], ['v_er']),
|
||||
suffixInflection('áis', 'er', ['v_er'], ['v_er']),
|
||||
suffixInflection('an', 'er', ['v_er'], ['v_er']),
|
||||
// -ir verbs
|
||||
suffixInflection('a', 'ir', ['v_ir'], ['v_ir']),
|
||||
suffixInflection('as', 'ir', ['v_ir'], ['v_ir']),
|
||||
suffixInflection('a', 'ir', ['v_ir'], ['v_ir']),
|
||||
suffixInflection('amos', 'ir', ['v_ir'], ['v_ir']),
|
||||
suffixInflection('áis', 'ir', ['v_ir'], ['v_ir']),
|
||||
suffixInflection('an', 'ir', ['v_ir'], ['v_ir']),
|
||||
// irregular verbs
|
||||
wholeWordInflection('dé', 'dar', ['v'], ['v']),
|
||||
wholeWordInflection('des', 'dar', ['v'], ['v']),
|
||||
wholeWordInflection('dé', 'dar', ['v'], ['v']),
|
||||
wholeWordInflection('demos', 'dar', ['v'], ['v']),
|
||||
wholeWordInflection('deis', 'dar', ['v'], ['v']),
|
||||
wholeWordInflection('den', 'dar', ['v'], ['v']),
|
||||
wholeWordInflection('esté', 'estar', ['v'], ['v']),
|
||||
wholeWordInflection('estés', 'estar', ['v'], ['v']),
|
||||
wholeWordInflection('esté', 'estar', ['v'], ['v']),
|
||||
wholeWordInflection('estemos', 'estar', ['v'], ['v']),
|
||||
wholeWordInflection('estéis', 'estar', ['v'], ['v']),
|
||||
wholeWordInflection('estén', 'estar', ['v'], ['v']),
|
||||
wholeWordInflection('sea', 'ser', ['v'], ['v']),
|
||||
wholeWordInflection('seas', 'ser', ['v'], ['v']),
|
||||
wholeWordInflection('sea', 'ser', ['v'], ['v']),
|
||||
wholeWordInflection('seamos', 'ser', ['v'], ['v']),
|
||||
wholeWordInflection('seáis', 'ser', ['v'], ['v']),
|
||||
wholeWordInflection('sean', 'ser', ['v'], ['v']),
|
||||
wholeWordInflection('vaya', 'ir', ['v'], ['v']),
|
||||
wholeWordInflection('vayas', 'ir', ['v'], ['v']),
|
||||
wholeWordInflection('vaya', 'ir', ['v'], ['v']),
|
||||
wholeWordInflection('vayamos', 'ir', ['v'], ['v']),
|
||||
wholeWordInflection('vayáis', 'ir', ['v'], ['v']),
|
||||
wholeWordInflection('vayan', 'ir', ['v'], ['v']),
|
||||
wholeWordInflection('haya', 'haber', ['v'], ['v']),
|
||||
wholeWordInflection('hayas', 'haber', ['v'], ['v']),
|
||||
wholeWordInflection('haya', 'haber', ['v'], ['v']),
|
||||
wholeWordInflection('hayamos', 'haber', ['v'], ['v']),
|
||||
wholeWordInflection('hayáis', 'haber', ['v'], ['v']),
|
||||
wholeWordInflection('hayan', 'haber', ['v'], ['v']),
|
||||
wholeWordInflection('sepa', 'saber', ['v'], ['v']),
|
||||
wholeWordInflection('sepas', 'saber', ['v'], ['v']),
|
||||
wholeWordInflection('sepa', 'saber', ['v'], ['v']),
|
||||
wholeWordInflection('sepamos', 'saber', ['v'], ['v']),
|
||||
wholeWordInflection('sepáis', 'saber', ['v'], ['v']),
|
||||
wholeWordInflection('sepan', 'saber', ['v'], ['v']),
|
||||
],
|
||||
},
|
||||
'imperfect subjunctive': {
|
||||
name: 'imperfect subjunctive',
|
||||
description: 'Imperfect subjunctive form of a verb',
|
||||
rules: [
|
||||
// -ar verbs
|
||||
suffixInflection('ara', 'ar', ['v_ar'], ['v_ar']),
|
||||
suffixInflection('ase', 'ar', ['v_ar'], ['v_ar']),
|
||||
suffixInflection('aras', 'ar', ['v_ar'], ['v_ar']),
|
||||
suffixInflection('ases', 'ar', ['v_ar'], ['v_ar']),
|
||||
suffixInflection('ara', 'ar', ['v_ar'], ['v_ar']),
|
||||
suffixInflection('ase', 'ar', ['v_ar'], ['v_ar']),
|
||||
suffixInflection('áramos', 'ar', ['v_ar'], ['v_ar']),
|
||||
suffixInflection('ásemos', 'ar', ['v_ar'], ['v_ar']),
|
||||
suffixInflection('arais', 'ar', ['v_ar'], ['v_ar']),
|
||||
suffixInflection('aseis', 'ar', ['v_ar'], ['v_ar']),
|
||||
suffixInflection('aran', 'ar', ['v_ar'], ['v_ar']),
|
||||
suffixInflection('asen', 'ar', ['v_ar'], ['v_ar']),
|
||||
// -er verbs
|
||||
suffixInflection('iera', 'er', ['v_er'], ['v_er']),
|
||||
suffixInflection('iese', 'er', ['v_er'], ['v_er']),
|
||||
suffixInflection('ieras', 'er', ['v_er'], ['v_er']),
|
||||
suffixInflection('ieses', 'er', ['v_er'], ['v_er']),
|
||||
suffixInflection('iera', 'er', ['v_er'], ['v_er']),
|
||||
suffixInflection('iese', 'er', ['v_er'], ['v_er']),
|
||||
suffixInflection('iéramos', 'er', ['v_er'], ['v_er']),
|
||||
suffixInflection('iésemos', 'er', ['v_er'], ['v_er']),
|
||||
suffixInflection('ierais', 'er', ['v_er'], ['v_er']),
|
||||
suffixInflection('ieseis', 'er', ['v_er'], ['v_er']),
|
||||
suffixInflection('ieran', 'er', ['v_er'], ['v_er']),
|
||||
suffixInflection('iesen', 'er', ['v_er'], ['v_er']),
|
||||
// -ir verbs
|
||||
suffixInflection('iera', 'ir', ['v_ir'], ['v_ir']),
|
||||
suffixInflection('iese', 'ir', ['v_ir'], ['v_ir']),
|
||||
suffixInflection('ieras', 'ir', ['v_ir'], ['v_ir']),
|
||||
suffixInflection('ieses', 'ir', ['v_ir'], ['v_ir']),
|
||||
suffixInflection('iera', 'ir', ['v_ir'], ['v_ir']),
|
||||
suffixInflection('iese', 'ir', ['v_ir'], ['v_ir']),
|
||||
suffixInflection('iéramos', 'ir', ['v_ir'], ['v_ir']),
|
||||
suffixInflection('iésemos', 'ir', ['v_ir'], ['v_ir']),
|
||||
suffixInflection('ierais', 'ir', ['v_ir'], ['v_ir']),
|
||||
suffixInflection('ieseis', 'ir', ['v_ir'], ['v_ir']),
|
||||
suffixInflection('ieran', 'ir', ['v_ir'], ['v_ir']),
|
||||
suffixInflection('iesen', 'ir', ['v_ir'], ['v_ir']),
|
||||
// irregular verbs
|
||||
wholeWordInflection('fuera', 'ser', ['v'], ['v']),
|
||||
wholeWordInflection('fuese', 'ser', ['v'], ['v']),
|
||||
wholeWordInflection('fueras', 'ser', ['v'], ['v']),
|
||||
wholeWordInflection('fueses', 'ser', ['v'], ['v']),
|
||||
wholeWordInflection('fuera', 'ser', ['v'], ['v']),
|
||||
wholeWordInflection('fuese', 'ser', ['v'], ['v']),
|
||||
wholeWordInflection('fuéramos', 'ser', ['v'], ['v']),
|
||||
wholeWordInflection('fuésemos', 'ser', ['v'], ['v']),
|
||||
wholeWordInflection('fuerais', 'ser', ['v'], ['v']),
|
||||
wholeWordInflection('fueseis', 'ser', ['v'], ['v']),
|
||||
wholeWordInflection('fueran', 'ser', ['v'], ['v']),
|
||||
wholeWordInflection('fuesen', 'ser', ['v'], ['v']),
|
||||
wholeWordInflection('fuera', 'ir', ['v'], ['v']),
|
||||
wholeWordInflection('fuese', 'ir', ['v'], ['v']),
|
||||
wholeWordInflection('fueras', 'ir', ['v'], ['v']),
|
||||
wholeWordInflection('fueses', 'ir', ['v'], ['v']),
|
||||
wholeWordInflection('fuera', 'ir', ['v'], ['v']),
|
||||
wholeWordInflection('fuese', 'ir', ['v'], ['v']),
|
||||
wholeWordInflection('fuéramos', 'ir', ['v'], ['v']),
|
||||
wholeWordInflection('fuésemos', 'ir', ['v'], ['v']),
|
||||
wholeWordInflection('fuerais', 'ir', ['v'], ['v']),
|
||||
wholeWordInflection('fueseis', 'ir', ['v'], ['v']),
|
||||
wholeWordInflection('fueran', 'ir', ['v'], ['v']),
|
||||
wholeWordInflection('fuesen', 'ir', ['v'], ['v']),
|
||||
],
|
||||
},
|
||||
'participle': {
|
||||
name: 'participle',
|
||||
description: 'Participle form of a verb',
|
||||
rules: [
|
||||
// -ar verbs
|
||||
suffixInflection('ado', 'ar', ['adj'], ['v_ar']),
|
||||
// -er verbs
|
||||
suffixInflection('ido', 'er', ['adj'], ['v_er']),
|
||||
// -ir verbs
|
||||
suffixInflection('ido', 'ir', ['adj'], ['v_ir']),
|
||||
// irregular verbs
|
||||
suffixInflection('oído', 'oír', ['adj'], ['v']),
|
||||
wholeWordInflection('dicho', 'decir', ['adj'], ['v']),
|
||||
wholeWordInflection('escrito', 'escribir', ['adj'], ['v']),
|
||||
wholeWordInflection('hecho', 'hacer', ['adj'], ['v']),
|
||||
wholeWordInflection('muerto', 'morir', ['adj'], ['v']),
|
||||
wholeWordInflection('puesto', 'poner', ['adj'], ['v']),
|
||||
wholeWordInflection('roto', 'romper', ['adj'], ['v']),
|
||||
wholeWordInflection('visto', 'ver', ['adj'], ['v']),
|
||||
wholeWordInflection('vuelto', 'volver', ['adj'], ['v']),
|
||||
],
|
||||
},
|
||||
'reflexive': {
|
||||
name: 'reflexive',
|
||||
description: 'Reflexive form of a verb',
|
||||
rules: [
|
||||
suffixInflection('arse', 'ar', ['v_ar'], ['v_ar']),
|
||||
suffixInflection('erse', 'er', ['v_er'], ['v_er']),
|
||||
suffixInflection('irse', 'ir', ['v_ir'], ['v_ir']),
|
||||
],
|
||||
},
|
||||
'pronoun substitution': {
|
||||
name: 'pronoun substitution',
|
||||
description: 'Substituted pronoun of a reflexive verb',
|
||||
rules: [
|
||||
suffixInflection('arme', 'arse', ['v_ar'], ['v_ar']),
|
||||
suffixInflection('arte', 'arse', ['v_ar'], ['v_ar']),
|
||||
suffixInflection('arnos', 'arse', ['v_er'], ['v_er']),
|
||||
suffixInflection('erme', 'erse', ['v_er'], ['v_er']),
|
||||
suffixInflection('erte', 'erse', ['v_er'], ['v_er']),
|
||||
suffixInflection('ernos', 'erse', ['v_er'], ['v_er']),
|
||||
suffixInflection('irme', 'irse', ['v_ir'], ['v_ir']),
|
||||
suffixInflection('irte', 'irse', ['v_ir'], ['v_ir']),
|
||||
suffixInflection('irnos', 'irse', ['v_ir'], ['v_ir']),
|
||||
],
|
||||
},
|
||||
'pronominal': {
|
||||
// me despertar -> despertarse
|
||||
name: 'pronominal',
|
||||
description: 'Pronominal form of a verb',
|
||||
rules: [
|
||||
{
|
||||
type: 'other',
|
||||
isInflected: new RegExp(REFLEXIVE_PATTERN),
|
||||
deinflect: (term) => {
|
||||
return term.replace(REFLEXIVE_PATTERN, (_match, _pronoun, verb, ending) => `${verb}${ending}se`);
|
||||
},
|
||||
conditionsIn: ['v'],
|
||||
conditionsOut: ['v'],
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
};
|
||||
34
vendor/yomitan/js/language/fr/french-text-preprocessors.js
vendored
Normal file
34
vendor/yomitan/js/language/fr/french-text-preprocessors.js
vendored
Normal file
@@ -0,0 +1,34 @@
|
||||
/*
|
||||
* Copyright (C) 2024-2025 Yomitan Authors
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
|
||||
/** @type {import('language').BidirectionalConversionPreprocessor} */
|
||||
export const apostropheVariants = {
|
||||
name: 'Search for apostrophe variants',
|
||||
description: '\' → ’ and vice versa',
|
||||
options: ['off', 'direct', 'inverse'],
|
||||
process: (str, setting) => {
|
||||
switch (setting) {
|
||||
case 'off':
|
||||
return str;
|
||||
case 'direct':
|
||||
return str.replace(/'/g, '’');
|
||||
case 'inverse':
|
||||
return str.replace(/’/g, '\'');
|
||||
}
|
||||
},
|
||||
};
|
||||
3858
vendor/yomitan/js/language/fr/french-transforms.js
vendored
Normal file
3858
vendor/yomitan/js/language/fr/french-transforms.js
vendored
Normal file
File diff suppressed because it is too large
Load Diff
78
vendor/yomitan/js/language/ga/irish-transforms.js
vendored
Normal file
78
vendor/yomitan/js/language/ga/irish-transforms.js
vendored
Normal file
@@ -0,0 +1,78 @@
|
||||
/*
|
||||
* Copyright (C) 2024-2025 Yomitan Authors
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
import {prefixInflection} from '../language-transforms.js';
|
||||
|
||||
/** @typedef {keyof typeof conditions} Condition */
|
||||
|
||||
const eclipsisPrefixInflections = [
|
||||
prefixInflection('mb', 'b', ['n'], ['n']), // 'mbean'
|
||||
prefixInflection('gc', 'c', ['n'], ['n']), // 'gclann'
|
||||
prefixInflection('nd', 'd', ['n'], ['n']), // 'ndul'
|
||||
prefixInflection('bhf', 'f', ['n'], ['n']), // bhfear
|
||||
prefixInflection('ng', 'g', ['n'], ['n']), // nGaeilge
|
||||
prefixInflection('bp', 'p', ['n'], ['n']), // bpáiste
|
||||
prefixInflection('dt', 't', ['n'], ['n']), // dtriail
|
||||
];
|
||||
|
||||
const conditions = {
|
||||
v: {
|
||||
name: 'Verb',
|
||||
isDictionaryForm: true,
|
||||
subConditions: ['v_phr'],
|
||||
},
|
||||
v_phr: {
|
||||
name: 'Phrasal verb',
|
||||
isDictionaryForm: true,
|
||||
},
|
||||
n: {
|
||||
name: 'Noun',
|
||||
isDictionaryForm: true,
|
||||
subConditions: ['np', 'ns'],
|
||||
},
|
||||
np: {
|
||||
name: 'Noun plural',
|
||||
isDictionaryForm: true,
|
||||
},
|
||||
ns: {
|
||||
name: 'Noun singular',
|
||||
isDictionaryForm: true,
|
||||
},
|
||||
adj: {
|
||||
name: 'Adjective',
|
||||
isDictionaryForm: true,
|
||||
},
|
||||
adv: {
|
||||
name: 'Adverb',
|
||||
isDictionaryForm: true,
|
||||
},
|
||||
};
|
||||
|
||||
/** @type {import('language-transformer').LanguageTransformDescriptor<Condition>} */
|
||||
export const irishTransforms = {
|
||||
language: 'ga',
|
||||
conditions,
|
||||
transforms: {
|
||||
eclipsis: {
|
||||
name: 'eclipsis',
|
||||
description: 'eclipsis form of a noun',
|
||||
rules: [
|
||||
...eclipsisPrefixInflections,
|
||||
],
|
||||
},
|
||||
},
|
||||
};
|
||||
106
vendor/yomitan/js/language/grc/ancient-greek-processors.js
vendored
Normal file
106
vendor/yomitan/js/language/grc/ancient-greek-processors.js
vendored
Normal file
@@ -0,0 +1,106 @@
|
||||
/*
|
||||
* Copyright (C) 2025 Yomitan Authors
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
import {basicTextProcessorOptions, removeAlphabeticDiacritics} from '../text-processors.js';
|
||||
|
||||
/** @type {import('language').TextProcessor<boolean>} */
|
||||
export const convertLatinToGreek = {
|
||||
name: 'Convert latin characters to greek',
|
||||
description: 'a → α, A → Α, b → β, B → Β, etc.',
|
||||
options: basicTextProcessorOptions,
|
||||
process: (str, setting) => {
|
||||
return setting ? latinToGreek(str) : str;
|
||||
},
|
||||
};
|
||||
|
||||
/**
|
||||
* @param {string} latin
|
||||
* @returns {string}
|
||||
*/
|
||||
export function latinToGreek(latin) {
|
||||
latin = removeAlphabeticDiacritics.process(latin, true);
|
||||
|
||||
const singleMap = {
|
||||
a: 'α',
|
||||
b: 'β',
|
||||
g: 'γ',
|
||||
d: 'δ',
|
||||
e: 'ε',
|
||||
z: 'ζ',
|
||||
ē: 'η',
|
||||
i: 'ι',
|
||||
k: 'κ',
|
||||
l: 'λ',
|
||||
m: 'μ',
|
||||
n: 'ν',
|
||||
x: 'ξ',
|
||||
o: 'ο',
|
||||
p: 'π',
|
||||
r: 'ρ',
|
||||
s: 'σ',
|
||||
t: 'τ',
|
||||
u: 'υ',
|
||||
ō: 'ω',
|
||||
A: 'Α',
|
||||
B: 'Β',
|
||||
G: 'Γ',
|
||||
D: 'Δ',
|
||||
E: 'Ε',
|
||||
Z: 'Ζ',
|
||||
Ē: 'Η',
|
||||
I: 'Ι',
|
||||
K: 'Κ',
|
||||
L: 'Λ',
|
||||
M: 'Μ',
|
||||
N: 'Ν',
|
||||
X: 'Ξ',
|
||||
O: 'Ο',
|
||||
P: 'Π',
|
||||
R: 'Ρ',
|
||||
S: 'Σ',
|
||||
T: 'Τ',
|
||||
U: 'Υ',
|
||||
Ō: 'Ω',
|
||||
};
|
||||
|
||||
const doubleMap = {
|
||||
th: 'θ',
|
||||
ph: 'φ',
|
||||
ch: 'χ',
|
||||
ps: 'ψ',
|
||||
Th: 'Θ',
|
||||
Ph: 'Φ',
|
||||
Ch: 'Χ',
|
||||
Ps: 'Ψ',
|
||||
};
|
||||
|
||||
let result = latin;
|
||||
|
||||
for (const [double, greek] of Object.entries(doubleMap)) {
|
||||
result = result.replace(new RegExp(double, 'g'), greek);
|
||||
}
|
||||
|
||||
// Handle basic character replacements
|
||||
for (const [single, greek] of Object.entries(singleMap)) {
|
||||
result = result.replace(new RegExp(single, 'g'), greek);
|
||||
}
|
||||
|
||||
// Handle final sigma
|
||||
result = result.replace(/σ$/, 'ς');
|
||||
|
||||
return result;
|
||||
}
|
||||
207
vendor/yomitan/js/language/grc/ancient-greek-transforms.js
vendored
Normal file
207
vendor/yomitan/js/language/grc/ancient-greek-transforms.js
vendored
Normal file
@@ -0,0 +1,207 @@
|
||||
/*
|
||||
* Copyright (C) 2025 Yomitan Authors
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
import {suffixInflection} from '../language-transforms.js';
|
||||
|
||||
const conditions = {
|
||||
v: {
|
||||
name: 'Verb',
|
||||
isDictionaryForm: true,
|
||||
},
|
||||
n: {
|
||||
name: 'Noun',
|
||||
isDictionaryForm: true,
|
||||
},
|
||||
adj: {
|
||||
name: 'Adjective',
|
||||
isDictionaryForm: true,
|
||||
},
|
||||
};
|
||||
|
||||
/** @type {import('language-transformer').LanguageTransformDescriptor<keyof typeof conditions>} */
|
||||
export const ancientGreekTransforms = {
|
||||
language: 'grc',
|
||||
conditions,
|
||||
transforms: {
|
||||
// inflections
|
||||
// verbs - active voice
|
||||
'2nd person singular present active indicative': {
|
||||
name: '2nd person singular present active indicative',
|
||||
rules: [
|
||||
suffixInflection('εις', 'ω', [], ['v']),
|
||||
suffixInflection('εις', 'εω', [], ['v']),
|
||||
],
|
||||
},
|
||||
'3rd person singular present active indicative': {
|
||||
name: '3rd person singular present active indicative',
|
||||
rules: [
|
||||
suffixInflection('ει', 'ω', [], ['v']),
|
||||
suffixInflection('ει', 'εω', [], ['v']),
|
||||
],
|
||||
},
|
||||
'1st person plural present active indicative': {
|
||||
name: '1st person plural present active indicative',
|
||||
rules: [
|
||||
suffixInflection('ομεν', 'ω', [], ['v']),
|
||||
],
|
||||
},
|
||||
'2nd person plural present active indicative': {
|
||||
name: '2nd person plural present active indicative',
|
||||
rules: [
|
||||
suffixInflection('ετε', 'ω', [], ['v']),
|
||||
],
|
||||
},
|
||||
'3rd person plural present active indicative': {
|
||||
name: '3rd person plural present active indicative',
|
||||
rules: [
|
||||
suffixInflection('ουσι', 'ω', [], ['v']),
|
||||
suffixInflection('ουσιν', 'ω', [], ['v']),
|
||||
],
|
||||
},
|
||||
// verbs - middle voice
|
||||
'2nd person singular present middle indicative': {
|
||||
name: '2nd person singular present middle indicative',
|
||||
rules: [
|
||||
suffixInflection('ῃ', 'ομαι', [], ['v']),
|
||||
suffixInflection('ει', 'ομαι', [], ['v']),
|
||||
],
|
||||
},
|
||||
'3rd person singular present middle indicative': {
|
||||
name: '3rd person singular present middle indicative',
|
||||
rules: [
|
||||
suffixInflection('εται', 'ομαι', [], ['v']),
|
||||
],
|
||||
},
|
||||
'1st person plural present middle indicative': {
|
||||
name: '1st person plural present middle indicative',
|
||||
rules: [
|
||||
suffixInflection('ομεθα', 'ομαι', [], ['v']),
|
||||
],
|
||||
},
|
||||
'2nd person plural present middle indicative': {
|
||||
name: '2nd person plural present middle indicative',
|
||||
rules: [
|
||||
suffixInflection('εσθε', 'ομαι', [], ['v']),
|
||||
],
|
||||
},
|
||||
'3rd person plural present middle indicative': {
|
||||
name: '3rd person plural present middle indicative',
|
||||
rules: [
|
||||
suffixInflection('ονται', 'ομαι', [], ['v']),
|
||||
],
|
||||
},
|
||||
// nouns
|
||||
'genitive singular': {
|
||||
name: 'genitive singular',
|
||||
rules: [
|
||||
suffixInflection('ου', 'ος', [], ['n']),
|
||||
suffixInflection('ας', 'α', [], ['n']),
|
||||
suffixInflection('ου', 'ας', [], ['n']),
|
||||
suffixInflection('ου', 'ον', [], ['n']),
|
||||
suffixInflection('ης', 'η', [], ['n']),
|
||||
],
|
||||
},
|
||||
'dative singular': {
|
||||
name: 'dative singular',
|
||||
rules: [
|
||||
suffixInflection('ω', 'ος', [], ['n']),
|
||||
suffixInflection('α', 'ας', [], ['n']),
|
||||
suffixInflection('ω', 'ον', [], ['n']),
|
||||
],
|
||||
},
|
||||
'accusative singular': {
|
||||
name: 'accusative singular',
|
||||
rules: [
|
||||
suffixInflection('ον', 'ος', [], ['n']),
|
||||
suffixInflection('αν', 'α', [], ['n']),
|
||||
suffixInflection('αν', 'ας', [], ['n']),
|
||||
suffixInflection('ην', 'η', [], ['n']),
|
||||
],
|
||||
},
|
||||
'vocative singular': {
|
||||
name: 'vocative singular',
|
||||
rules: [
|
||||
suffixInflection('ε', 'ος', [], ['n']),
|
||||
suffixInflection('α', 'ας', [], ['n']),
|
||||
suffixInflection('η', 'η', [], ['n']),
|
||||
],
|
||||
},
|
||||
'nominative plural': {
|
||||
name: 'nominative plural',
|
||||
rules: [
|
||||
suffixInflection('οι', 'ος', [], ['n']),
|
||||
suffixInflection('αι', 'α', [], ['n']),
|
||||
suffixInflection('αι', 'ας', [], ['n']),
|
||||
suffixInflection('α', 'ον', [], ['n']),
|
||||
suffixInflection('αι', 'η', [], ['n']),
|
||||
],
|
||||
},
|
||||
'genitive plural': {
|
||||
name: 'genitive plural',
|
||||
rules: [
|
||||
suffixInflection('ων', 'ος', [], ['n']),
|
||||
suffixInflection('ων', 'α', [], ['n']),
|
||||
suffixInflection('ων', 'ας', [], ['n']),
|
||||
suffixInflection('ων', 'ον', [], ['n']),
|
||||
suffixInflection('ων', 'η', [], ['n']),
|
||||
],
|
||||
},
|
||||
'dative plural': {
|
||||
name: 'dative plural',
|
||||
rules: [
|
||||
suffixInflection('οις', 'ος', [], ['n']),
|
||||
suffixInflection('αις', 'α', [], ['n']),
|
||||
suffixInflection('αις', 'ας', [], ['n']),
|
||||
suffixInflection('οις', 'ον', [], ['n']),
|
||||
suffixInflection('αις', 'η', [], ['n']),
|
||||
],
|
||||
},
|
||||
'accusative plural': {
|
||||
name: 'accusative plural',
|
||||
rules: [
|
||||
suffixInflection('ους', 'ος', [], ['n']),
|
||||
suffixInflection('ας', 'α', [], ['n']),
|
||||
suffixInflection('α', 'ον', [], ['n']),
|
||||
suffixInflection('ας', 'η', [], ['n']),
|
||||
],
|
||||
},
|
||||
'vocative plural': {
|
||||
name: 'vocative plural',
|
||||
rules: [
|
||||
suffixInflection('οι', 'ος', [], ['n']),
|
||||
suffixInflection('αι', 'α', [], ['n']),
|
||||
suffixInflection('αι', 'ας', [], ['n']),
|
||||
suffixInflection('α', 'ον', [], ['n']),
|
||||
suffixInflection('αι', 'η', [], ['n']),
|
||||
],
|
||||
},
|
||||
// adjectives
|
||||
'accusative singular masculine': {
|
||||
name: 'accusative singular masculine',
|
||||
rules: [
|
||||
suffixInflection('ον', 'ος', [], ['adj']),
|
||||
],
|
||||
},
|
||||
// word formation
|
||||
'nominalization': {
|
||||
name: 'nominalization',
|
||||
rules: [
|
||||
suffixInflection('ος', 'εω', [], ['v']),
|
||||
],
|
||||
},
|
||||
},
|
||||
};
|
||||
45
vendor/yomitan/js/language/ime-utilities.js
vendored
Normal file
45
vendor/yomitan/js/language/ime-utilities.js
vendored
Normal file
@@ -0,0 +1,45 @@
|
||||
/*
|
||||
* Copyright (C) 2025 Yomitan Authors
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Mobile browsers report that keyboards are composing even when they are clearly sending normal input.
|
||||
* This conflicts with detection of desktop IME composing which is reported correctly.
|
||||
* If the composing input is a single alphabetic letter, it is almost certainly a mobile keyboard pretending to be composing.
|
||||
* This is not foolproof. For example a Japanese IME could try to convert `えい` to `A` which would show as "fake composing". But this is unlikely.
|
||||
* @param {InputEvent} event
|
||||
* @returns {boolean}
|
||||
*/
|
||||
export function isFakeComposing(event) {
|
||||
return !!event.data?.match(/^[A-Za-z]$/);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {InputEvent} event
|
||||
* @param {string} platform
|
||||
* @param {string} browser
|
||||
* @returns {boolean}
|
||||
*/
|
||||
export function isComposing(event, platform, browser) {
|
||||
// Desktop Composing
|
||||
if (event.isComposing && platform !== 'android') { return true; }
|
||||
|
||||
// Android Composing
|
||||
// eslint-disable-next-line sonarjs/prefer-single-boolean-return
|
||||
if (event.isComposing && !isFakeComposing(event) && platform === 'android' && browser !== 'firefox-mobile') { return true; }
|
||||
|
||||
return false;
|
||||
}
|
||||
618
vendor/yomitan/js/language/ja/japanese-kana-romaji-dicts.js
vendored
Normal file
618
vendor/yomitan/js/language/ja/japanese-kana-romaji-dicts.js
vendored
Normal file
@@ -0,0 +1,618 @@
|
||||
/*
|
||||
* Copyright (C) 2025 Yomitan Authors
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
// Mozc's default Romaji to Hiragana list referenced to create ROMAJI_TO_HIRAGANA
|
||||
// https://github.com/google/mozc/blob/035668c3452fa98ac09462fd2cf556948964aad7/src/data/preedit/romanji-hiragana.tsv
|
||||
export const ROMAJI_TO_HIRAGANA = {
|
||||
// Double letters - these **must** always be matched first or further down matches may cause inserting `っ` from double letters to require extra logic
|
||||
// There **must** be an entry for every accepted double letter
|
||||
// To not disturb further matches, an extra letter must be appended after the `っ`
|
||||
'qq': 'っq',
|
||||
'vv': 'っv',
|
||||
'll': 'っl',
|
||||
'xx': 'っx',
|
||||
'kk': 'っk',
|
||||
'gg': 'っg',
|
||||
'ss': 'っs',
|
||||
'zz': 'っz',
|
||||
'jj': 'っj',
|
||||
'tt': 'っt',
|
||||
'dd': 'っd',
|
||||
'hh': 'っh',
|
||||
'ff': 'っf',
|
||||
'bb': 'っb',
|
||||
'pp': 'っp',
|
||||
'mm': 'っm',
|
||||
'yy': 'っy',
|
||||
'rr': 'っr',
|
||||
'ww': 'っw',
|
||||
'cc': 'っc',
|
||||
|
||||
// Length 4 - longest matches
|
||||
'hwyu': 'ふゅ',
|
||||
'xtsu': 'っ',
|
||||
'ltsu': 'っ',
|
||||
|
||||
// Length 3
|
||||
'vya': 'ゔゃ',
|
||||
'vyi': 'ゔぃ',
|
||||
'vyu': 'ゔゅ',
|
||||
'vye': 'ゔぇ',
|
||||
'vyo': 'ゔょ',
|
||||
'kya': 'きゃ',
|
||||
'kyi': 'きぃ',
|
||||
'kyu': 'きゅ',
|
||||
'kye': 'きぇ',
|
||||
'kyo': 'きょ',
|
||||
'gya': 'ぎゃ',
|
||||
'gyi': 'ぎぃ',
|
||||
'gyu': 'ぎゅ',
|
||||
'gye': 'ぎぇ',
|
||||
'gyo': 'ぎょ',
|
||||
'sya': 'しゃ',
|
||||
'syi': 'しぃ',
|
||||
'syu': 'しゅ',
|
||||
'sye': 'しぇ',
|
||||
'syo': 'しょ',
|
||||
'sha': 'しゃ',
|
||||
'shi': 'し',
|
||||
'shu': 'しゅ',
|
||||
'she': 'しぇ',
|
||||
'sho': 'しょ',
|
||||
'zya': 'じゃ',
|
||||
'zyi': 'じぃ',
|
||||
'zyu': 'じゅ',
|
||||
'zye': 'じぇ',
|
||||
'zyo': 'じょ',
|
||||
'tya': 'ちゃ',
|
||||
'tyi': 'ちぃ',
|
||||
'tyu': 'ちゅ',
|
||||
'tye': 'ちぇ',
|
||||
'tyo': 'ちょ',
|
||||
'cha': 'ちゃ',
|
||||
'chi': 'ち',
|
||||
'chu': 'ちゅ',
|
||||
'che': 'ちぇ',
|
||||
'cho': 'ちょ',
|
||||
'cya': 'ちゃ',
|
||||
'cyi': 'ちぃ',
|
||||
'cyu': 'ちゅ',
|
||||
'cye': 'ちぇ',
|
||||
'cyo': 'ちょ',
|
||||
'dya': 'ぢゃ',
|
||||
'dyi': 'ぢぃ',
|
||||
'dyu': 'ぢゅ',
|
||||
'dye': 'ぢぇ',
|
||||
'dyo': 'ぢょ',
|
||||
'tsa': 'つぁ',
|
||||
'tsi': 'つぃ',
|
||||
'tse': 'つぇ',
|
||||
'tso': 'つぉ',
|
||||
'tha': 'てゃ',
|
||||
'thi': 'てぃ',
|
||||
'thu': 'てゅ',
|
||||
'the': 'てぇ',
|
||||
'tho': 'てょ',
|
||||
'dha': 'でゃ',
|
||||
'dhi': 'でぃ',
|
||||
'dhu': 'でゅ',
|
||||
'dhe': 'でぇ',
|
||||
'dho': 'でょ',
|
||||
'twa': 'とぁ',
|
||||
'twi': 'とぃ',
|
||||
'twu': 'とぅ',
|
||||
'twe': 'とぇ',
|
||||
'two': 'とぉ',
|
||||
'dwa': 'どぁ',
|
||||
'dwi': 'どぃ',
|
||||
'dwu': 'どぅ',
|
||||
'dwe': 'どぇ',
|
||||
'dwo': 'どぉ',
|
||||
'nya': 'にゃ',
|
||||
'nyi': 'にぃ',
|
||||
'nyu': 'にゅ',
|
||||
'nye': 'にぇ',
|
||||
'nyo': 'にょ',
|
||||
'hya': 'ひゃ',
|
||||
'hyi': 'ひぃ',
|
||||
'hyu': 'ひゅ',
|
||||
'hye': 'ひぇ',
|
||||
'hyo': 'ひょ',
|
||||
'bya': 'びゃ',
|
||||
'byi': 'びぃ',
|
||||
'byu': 'びゅ',
|
||||
'bye': 'びぇ',
|
||||
'byo': 'びょ',
|
||||
'pya': 'ぴゃ',
|
||||
'pyi': 'ぴぃ',
|
||||
'pyu': 'ぴゅ',
|
||||
'pye': 'ぴぇ',
|
||||
'pyo': 'ぴょ',
|
||||
'fya': 'ふゃ',
|
||||
'fyu': 'ふゅ',
|
||||
'fyo': 'ふょ',
|
||||
'hwa': 'ふぁ',
|
||||
'hwi': 'ふぃ',
|
||||
'hwe': 'ふぇ',
|
||||
'hwo': 'ふぉ',
|
||||
'mya': 'みゃ',
|
||||
'myi': 'みぃ',
|
||||
'myu': 'みゅ',
|
||||
'mye': 'みぇ',
|
||||
'myo': 'みょ',
|
||||
'rya': 'りゃ',
|
||||
'ryi': 'りぃ',
|
||||
'ryu': 'りゅ',
|
||||
'rye': 'りぇ',
|
||||
'ryo': 'りょ',
|
||||
'lyi': 'ぃ',
|
||||
'xyi': 'ぃ',
|
||||
'lye': 'ぇ',
|
||||
'xye': 'ぇ',
|
||||
'xka': 'ヵ',
|
||||
'xke': 'ヶ',
|
||||
'lka': 'ヵ',
|
||||
'lke': 'ヶ',
|
||||
'kwa': 'くぁ',
|
||||
'kwi': 'くぃ',
|
||||
'kwu': 'くぅ',
|
||||
'kwe': 'くぇ',
|
||||
'kwo': 'くぉ',
|
||||
'gwa': 'ぐぁ',
|
||||
'gwi': 'ぐぃ',
|
||||
'gwu': 'ぐぅ',
|
||||
'gwe': 'ぐぇ',
|
||||
'gwo': 'ぐぉ',
|
||||
'swa': 'すぁ',
|
||||
'swi': 'すぃ',
|
||||
'swu': 'すぅ',
|
||||
'swe': 'すぇ',
|
||||
'swo': 'すぉ',
|
||||
'zwa': 'ずぁ',
|
||||
'zwi': 'ずぃ',
|
||||
'zwu': 'ずぅ',
|
||||
'zwe': 'ずぇ',
|
||||
'zwo': 'ずぉ',
|
||||
'jya': 'じゃ',
|
||||
'jyi': 'じぃ',
|
||||
'jyu': 'じゅ',
|
||||
'jye': 'じぇ',
|
||||
'jyo': 'じょ',
|
||||
'tsu': 'つ',
|
||||
'xtu': 'っ',
|
||||
'ltu': 'っ',
|
||||
'xya': 'ゃ',
|
||||
'lya': 'ゃ',
|
||||
'wyi': 'ゐ',
|
||||
'xyu': 'ゅ',
|
||||
'lyu': 'ゅ',
|
||||
'wye': 'ゑ',
|
||||
'xyo': 'ょ',
|
||||
'lyo': 'ょ',
|
||||
'xwa': 'ゎ',
|
||||
'lwa': 'ゎ',
|
||||
'wha': 'うぁ',
|
||||
'whi': 'うぃ',
|
||||
'whu': 'う',
|
||||
'whe': 'うぇ',
|
||||
'who': 'うぉ',
|
||||
|
||||
// Length 2
|
||||
'nn': 'ん',
|
||||
'n\'': 'ん',
|
||||
'va': 'ゔぁ',
|
||||
'vi': 'ゔぃ',
|
||||
'vu': 'ゔ',
|
||||
've': 'ゔぇ',
|
||||
'vo': 'ゔぉ',
|
||||
'fa': 'ふぁ',
|
||||
'fi': 'ふぃ',
|
||||
'fe': 'ふぇ',
|
||||
'fo': 'ふぉ',
|
||||
'xn': 'ん',
|
||||
'wu': 'う',
|
||||
'xa': 'ぁ',
|
||||
'xi': 'ぃ',
|
||||
'xu': 'ぅ',
|
||||
'xe': 'ぇ',
|
||||
'xo': 'ぉ',
|
||||
'la': 'ぁ',
|
||||
'li': 'ぃ',
|
||||
'lu': 'ぅ',
|
||||
'le': 'ぇ',
|
||||
'lo': 'ぉ',
|
||||
'ye': 'いぇ',
|
||||
'ka': 'か',
|
||||
'ki': 'き',
|
||||
'ku': 'く',
|
||||
'ke': 'け',
|
||||
'ko': 'こ',
|
||||
'ga': 'が',
|
||||
'gi': 'ぎ',
|
||||
'gu': 'ぐ',
|
||||
'ge': 'げ',
|
||||
'go': 'ご',
|
||||
'sa': 'さ',
|
||||
'si': 'し',
|
||||
'su': 'す',
|
||||
'se': 'せ',
|
||||
'so': 'そ',
|
||||
'ca': 'か',
|
||||
'ci': 'し',
|
||||
'cu': 'く',
|
||||
'ce': 'せ',
|
||||
'co': 'こ',
|
||||
'qa': 'くぁ',
|
||||
'qi': 'くぃ',
|
||||
'qu': 'く',
|
||||
'qe': 'くぇ',
|
||||
'qo': 'くぉ',
|
||||
'za': 'ざ',
|
||||
'zi': 'じ',
|
||||
'zu': 'ず',
|
||||
'ze': 'ぜ',
|
||||
'zo': 'ぞ',
|
||||
'ja': 'じゃ',
|
||||
'ji': 'じ',
|
||||
'ju': 'じゅ',
|
||||
'je': 'じぇ',
|
||||
'jo': 'じょ',
|
||||
'ta': 'た',
|
||||
'ti': 'ち',
|
||||
'tu': 'つ',
|
||||
'te': 'て',
|
||||
'to': 'と',
|
||||
'da': 'だ',
|
||||
'di': 'ぢ',
|
||||
'du': 'づ',
|
||||
'de': 'で',
|
||||
'do': 'ど',
|
||||
'na': 'な',
|
||||
'ni': 'に',
|
||||
'nu': 'ぬ',
|
||||
'ne': 'ね',
|
||||
'no': 'の',
|
||||
'ha': 'は',
|
||||
'hi': 'ひ',
|
||||
'hu': 'ふ',
|
||||
'fu': 'ふ',
|
||||
'he': 'へ',
|
||||
'ho': 'ほ',
|
||||
'ba': 'ば',
|
||||
'bi': 'び',
|
||||
'bu': 'ぶ',
|
||||
'be': 'べ',
|
||||
'bo': 'ぼ',
|
||||
'pa': 'ぱ',
|
||||
'pi': 'ぴ',
|
||||
'pu': 'ぷ',
|
||||
'pe': 'ぺ',
|
||||
'po': 'ぽ',
|
||||
'ma': 'ま',
|
||||
'mi': 'み',
|
||||
'mu': 'む',
|
||||
'me': 'め',
|
||||
'mo': 'も',
|
||||
'ya': 'や',
|
||||
'yu': 'ゆ',
|
||||
'yo': 'よ',
|
||||
'ra': 'ら',
|
||||
'ri': 'り',
|
||||
'ru': 'る',
|
||||
're': 'れ',
|
||||
'ro': 'ろ',
|
||||
'wa': 'わ',
|
||||
'wi': 'うぃ',
|
||||
'we': 'うぇ',
|
||||
'wo': 'を',
|
||||
|
||||
// Length 1 - shortest matches
|
||||
'a': 'あ',
|
||||
'i': 'い',
|
||||
'u': 'う',
|
||||
'e': 'え',
|
||||
'o': 'お',
|
||||
|
||||
// Length 1 Special/Symbols
|
||||
'.': '。',
|
||||
',': '、',
|
||||
':': ':',
|
||||
'/': '・',
|
||||
'!': '!',
|
||||
'?': '?',
|
||||
'~': '〜',
|
||||
'-': 'ー',
|
||||
'‘': '「',
|
||||
'’': '」',
|
||||
'“': '『',
|
||||
'”': '』',
|
||||
'[': '[',
|
||||
']': ']',
|
||||
'(': '(',
|
||||
')': ')',
|
||||
'{': '{',
|
||||
'}': '}',
|
||||
' ': ' ',
|
||||
|
||||
// n -> ん is a special case.
|
||||
'n': 'ん',
|
||||
};
|
||||
|
||||
export const HIRAGANA_TO_ROMAJI = {
|
||||
// Length 2
|
||||
'んい': 'n\'i',
|
||||
'ゔぁ': 'va',
|
||||
'ゔぃ': 'vi',
|
||||
'ゔぉ': 'vo',
|
||||
'ゔゃ': 'vya',
|
||||
'ゔゅ': 'vyu',
|
||||
'ゔぇ': 've',
|
||||
'ゔょ': 'vyo',
|
||||
'きゃ': 'kya',
|
||||
'きぃ': 'kyi',
|
||||
'きゅ': 'kyu',
|
||||
'きぇ': 'kye',
|
||||
'きょ': 'kyo',
|
||||
'ぎゃ': 'gya',
|
||||
'ぎぃ': 'gyi',
|
||||
'ぎゅ': 'gyu',
|
||||
'ぎぇ': 'gye',
|
||||
'ぎょ': 'gyo',
|
||||
'しゃ': 'sha',
|
||||
'しぃ': 'syi',
|
||||
'しゅ': 'shu',
|
||||
'しぇ': 'she',
|
||||
'しょ': 'sho',
|
||||
'ちゃ': 'cya',
|
||||
'ちゅ': 'chu',
|
||||
'ちぇ': 'che',
|
||||
'ちょ': 'cho',
|
||||
'ちぃ': 'cyi',
|
||||
'ぢゃ': 'dya',
|
||||
'ぢぃ': 'dyi',
|
||||
'ぢゅ': 'dyu',
|
||||
'ぢぇ': 'dye',
|
||||
'ぢょ': 'dyo',
|
||||
'つぁ': 'tsa',
|
||||
'つぃ': 'tsi',
|
||||
'つぇ': 'tse',
|
||||
'つぉ': 'tso',
|
||||
'てゃ': 'tha',
|
||||
'てぃ': 'thi',
|
||||
'てゅ': 'thu',
|
||||
'てぇ': 'the',
|
||||
'てょ': 'tho',
|
||||
'でゃ': 'dha',
|
||||
'でぃ': 'dhi',
|
||||
'でゅ': 'dhu',
|
||||
'でぇ': 'dhe',
|
||||
'でょ': 'dho',
|
||||
'とぁ': 'twa',
|
||||
'とぃ': 'twi',
|
||||
'とぅ': 'twu',
|
||||
'とぇ': 'twe',
|
||||
'とぉ': 'two',
|
||||
'どぁ': 'dwa',
|
||||
'どぃ': 'dwi',
|
||||
'どぅ': 'dwu',
|
||||
'どぇ': 'dwe',
|
||||
'どぉ': 'dwo',
|
||||
'にゃ': 'nya',
|
||||
'にぃ': 'nyi',
|
||||
'にゅ': 'nyu',
|
||||
'にぇ': 'nye',
|
||||
'にょ': 'nyo',
|
||||
'ひゃ': 'hya',
|
||||
'ひぃ': 'hyi',
|
||||
'ひゅ': 'hyu',
|
||||
'ひぇ': 'hye',
|
||||
'ひょ': 'hyo',
|
||||
'びゃ': 'bya',
|
||||
'びぃ': 'byi',
|
||||
'びゅ': 'byu',
|
||||
'びぇ': 'bye',
|
||||
'びょ': 'byo',
|
||||
'ぴゃ': 'pya',
|
||||
'ぴぃ': 'pyi',
|
||||
'ぴゅ': 'pyu',
|
||||
'ぴぇ': 'pye',
|
||||
'ぴょ': 'pyo',
|
||||
'ふゃ': 'fya',
|
||||
'ふょ': 'fyo',
|
||||
'ふぁ': 'fa',
|
||||
'ふゅ': 'fyu',
|
||||
'ふぃ': 'fi',
|
||||
'ふぇ': 'fe',
|
||||
'ふぉ': 'fo',
|
||||
'みゃ': 'mya',
|
||||
'みぃ': 'myi',
|
||||
'みゅ': 'myu',
|
||||
'みぇ': 'mye',
|
||||
'みょ': 'myo',
|
||||
'りゃ': 'rya',
|
||||
'りぃ': 'ryi',
|
||||
'りゅ': 'ryu',
|
||||
'りぇ': 'rye',
|
||||
'りょ': 'ryo',
|
||||
'くぁ': 'kwa',
|
||||
'くぃ': 'kwi',
|
||||
'くぅ': 'kwu',
|
||||
'くぇ': 'kwe',
|
||||
'くぉ': 'kwo',
|
||||
'ぐぁ': 'gwa',
|
||||
'ぐぃ': 'gwi',
|
||||
'ぐぅ': 'gwu',
|
||||
'ぐぇ': 'gwe',
|
||||
'ぐぉ': 'gwo',
|
||||
'すぁ': 'swa',
|
||||
'すぃ': 'swi',
|
||||
'すぅ': 'swu',
|
||||
'すぇ': 'swe',
|
||||
'すぉ': 'swo',
|
||||
'ずぁ': 'zwa',
|
||||
'ずぃ': 'zwi',
|
||||
'ずぅ': 'zwu',
|
||||
'ずぇ': 'zwe',
|
||||
'ずぉ': 'zwo',
|
||||
'じゃ': 'ja',
|
||||
'じぃ': 'jyi',
|
||||
'じゅ': 'ju',
|
||||
'じぇ': 'je',
|
||||
'じょ': 'jo',
|
||||
'うぁ': 'wha',
|
||||
'うぃ': 'wi',
|
||||
'うぇ': 'we',
|
||||
'うぉ': 'who',
|
||||
'いぇ': 'ye',
|
||||
|
||||
// Length 1
|
||||
'ん': 'n',
|
||||
'あ': 'a',
|
||||
'い': 'i',
|
||||
'う': 'u',
|
||||
'え': 'e',
|
||||
'お': 'o',
|
||||
'ゔ': 'vu',
|
||||
'か': 'ka',
|
||||
'き': 'ki',
|
||||
'く': 'ku',
|
||||
'け': 'ke',
|
||||
'こ': 'ko',
|
||||
'が': 'ga',
|
||||
'ぎ': 'gi',
|
||||
'ぐ': 'gu',
|
||||
'げ': 'ge',
|
||||
'ご': 'go',
|
||||
'さ': 'sa',
|
||||
'し': 'shi',
|
||||
'す': 'su',
|
||||
'せ': 'se',
|
||||
'そ': 'so',
|
||||
'ざ': 'za',
|
||||
'じ': 'ji',
|
||||
'ず': 'zu',
|
||||
'ぜ': 'ze',
|
||||
'ぞ': 'zo',
|
||||
'た': 'ta',
|
||||
'ち': 'chi',
|
||||
'つ': 'tsu',
|
||||
'て': 'te',
|
||||
'と': 'to',
|
||||
'だ': 'da',
|
||||
'ぢ': 'di',
|
||||
'づ': 'du',
|
||||
'で': 'de',
|
||||
'ど': 'do',
|
||||
'な': 'na',
|
||||
'に': 'ni',
|
||||
'ぬ': 'nu',
|
||||
'ね': 'ne',
|
||||
'の': 'no',
|
||||
'は': 'ha',
|
||||
'ひ': 'hi',
|
||||
'ふ': 'fu',
|
||||
'へ': 'he',
|
||||
'ほ': 'ho',
|
||||
'ば': 'ba',
|
||||
'び': 'bi',
|
||||
'ぶ': 'bu',
|
||||
'べ': 'be',
|
||||
'ぼ': 'bo',
|
||||
'ぱ': 'pa',
|
||||
'ぴ': 'pi',
|
||||
'ぷ': 'pu',
|
||||
'ぺ': 'pe',
|
||||
'ぽ': 'po',
|
||||
'ま': 'ma',
|
||||
'み': 'mi',
|
||||
'む': 'mu',
|
||||
'め': 'me',
|
||||
'も': 'mo',
|
||||
'や': 'ya',
|
||||
'ゆ': 'yu',
|
||||
'よ': 'yo',
|
||||
'ら': 'ra',
|
||||
'り': 'ri',
|
||||
'る': 'ru',
|
||||
'れ': 're',
|
||||
'ろ': 'ro',
|
||||
'わ': 'wa',
|
||||
'ゐ': 'wi',
|
||||
'ゑ': 'we',
|
||||
'を': 'wo',
|
||||
|
||||
// Length 1 Special/Symbols
|
||||
'。': '.',
|
||||
'、': ',',
|
||||
':': ':',
|
||||
'・': '/',
|
||||
'!': '!',
|
||||
'?': '?',
|
||||
'〜': '~',
|
||||
'ー': '-',
|
||||
'「': '‘',
|
||||
'」': '’',
|
||||
'『': '“',
|
||||
'』': '”',
|
||||
'[': '[',
|
||||
']': ']',
|
||||
'(': '(',
|
||||
')': ')',
|
||||
'{': '{',
|
||||
'}': '}',
|
||||
' ': ' ',
|
||||
|
||||
// Length 1 Small - Even though these are usually represented with `x` or `l` prepending them, in romaji it makes the most sense to not do that
|
||||
'ゃ': 'ya',
|
||||
'ゅ': 'yu',
|
||||
'ょ': 'yo',
|
||||
'ゎ': 'wa',
|
||||
'ぁ': 'a',
|
||||
'ぃ': 'i',
|
||||
'ぅ': 'u',
|
||||
'ぇ': 'e',
|
||||
'ぉ': 'o',
|
||||
'ヵ': 'ka',
|
||||
'ヶ': 'ke',
|
||||
|
||||
// Double letters - these **must** always be matched last or they will break previous maches
|
||||
'っq': 'qq',
|
||||
'っv': 'vv',
|
||||
'っx': 'xx',
|
||||
'っk': 'kk',
|
||||
'っg': 'gg',
|
||||
'っs': 'ss',
|
||||
'っz': 'zz',
|
||||
'っj': 'jj',
|
||||
'っt': 'tt',
|
||||
'っd': 'dd',
|
||||
'っh': 'hh',
|
||||
'っf': 'ff',
|
||||
'っb': 'bb',
|
||||
'っp': 'pp',
|
||||
'っm': 'mm',
|
||||
'っy': 'yy',
|
||||
'っr': 'rr',
|
||||
'っw': 'ww',
|
||||
'っc': 'cc',
|
||||
|
||||
// `っん` is a special case
|
||||
'っn': 'n',
|
||||
|
||||
// single `っ` is weird, some converters just remove it, some leave the `っ` in kana, some replace with `xtsu` or `ltsu`
|
||||
'っ': '',
|
||||
};
|
||||
118
vendor/yomitan/js/language/ja/japanese-text-preprocessors.js
vendored
Normal file
118
vendor/yomitan/js/language/ja/japanese-text-preprocessors.js
vendored
Normal file
@@ -0,0 +1,118 @@
|
||||
/*
|
||||
* Copyright (C) 2024-2025 Yomitan Authors
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
import {basicTextProcessorOptions} from '../text-processors.js';
|
||||
import {convertAlphabeticToKana} from './japanese-wanakana.js';
|
||||
import {
|
||||
collapseEmphaticSequences as collapseEmphaticSequencesFunction,
|
||||
convertAlphanumericToFullWidth,
|
||||
convertFullWidthAlphanumericToNormal,
|
||||
convertHalfWidthKanaToFullWidth,
|
||||
convertHiraganaToKatakana as convertHiraganaToKatakanaFunction,
|
||||
convertKatakanaToHiragana as convertKatakanaToHiraganaFunction,
|
||||
normalizeCJKCompatibilityCharacters as normalizeCJKCompatibilityCharactersFunction,
|
||||
normalizeCombiningCharacters as normalizeCombiningCharactersFunction,
|
||||
} from './japanese.js';
|
||||
import {convertVariants} from '../../../lib/kanji-processor.js';
|
||||
|
||||
/** @type {import('language').TextProcessor<boolean>} */
|
||||
export const convertHalfWidthCharacters = {
|
||||
name: 'Convert half width characters to full width',
|
||||
description: 'ヨミチャン → ヨミチャン',
|
||||
options: basicTextProcessorOptions,
|
||||
process: (str, setting) => (setting ? convertHalfWidthKanaToFullWidth(str) : str),
|
||||
};
|
||||
|
||||
/** @type {import('language').TextProcessor<boolean>} */
|
||||
export const alphabeticToHiragana = {
|
||||
name: 'Convert alphabetic characters to hiragana',
|
||||
description: 'yomichan → よみちゃん',
|
||||
options: basicTextProcessorOptions,
|
||||
process: (str, setting) => (setting ? convertAlphabeticToKana(str) : str),
|
||||
};
|
||||
|
||||
/** @type {import('language').BidirectionalConversionPreprocessor} */
|
||||
export const alphanumericWidthVariants = {
|
||||
name: 'Convert between alphabetic width variants',
|
||||
description: 'yomitan → yomitan and vice versa',
|
||||
options: ['off', 'direct', 'inverse'],
|
||||
process: (str, setting) => {
|
||||
switch (setting) {
|
||||
case 'off':
|
||||
return str;
|
||||
case 'direct':
|
||||
return convertFullWidthAlphanumericToNormal(str);
|
||||
case 'inverse':
|
||||
return convertAlphanumericToFullWidth(str);
|
||||
}
|
||||
},
|
||||
};
|
||||
|
||||
/** @type {import('language').BidirectionalConversionPreprocessor} */
|
||||
export const convertHiraganaToKatakana = {
|
||||
name: 'Convert hiragana to katakana',
|
||||
description: 'よみちゃん → ヨミチャン and vice versa',
|
||||
options: ['off', 'direct', 'inverse'],
|
||||
process: (str, setting) => {
|
||||
switch (setting) {
|
||||
case 'off':
|
||||
return str;
|
||||
case 'direct':
|
||||
return convertHiraganaToKatakanaFunction(str);
|
||||
case 'inverse':
|
||||
return convertKatakanaToHiraganaFunction(str);
|
||||
}
|
||||
},
|
||||
};
|
||||
|
||||
/** @type {import('language').TextProcessor<[collapseEmphatic: boolean, collapseEmphaticFull: boolean]>} */
|
||||
export const collapseEmphaticSequences = {
|
||||
name: 'Collapse emphatic character sequences',
|
||||
description: 'すっっごーーい → すっごーい / すごい',
|
||||
options: [[false, false], [true, false], [true, true]],
|
||||
process: (str, setting) => {
|
||||
const [collapseEmphatic, collapseEmphaticFull] = setting;
|
||||
if (collapseEmphatic) {
|
||||
str = collapseEmphaticSequencesFunction(str, collapseEmphaticFull);
|
||||
}
|
||||
return str;
|
||||
},
|
||||
};
|
||||
|
||||
/** @type {import('language').TextProcessor<boolean>} */
|
||||
export const normalizeCombiningCharacters = {
|
||||
name: 'Normalize combining characters',
|
||||
description: 'ド → ド (U+30C8 U+3099 → U+30C9)',
|
||||
options: basicTextProcessorOptions,
|
||||
process: (str, setting) => (setting ? normalizeCombiningCharactersFunction(str) : str),
|
||||
};
|
||||
|
||||
/** @type {import('language').TextProcessor<boolean>} */
|
||||
export const normalizeCJKCompatibilityCharacters = {
|
||||
name: 'Normalize CJK Compatibility Characters',
|
||||
description: '㌀ → アパート',
|
||||
options: basicTextProcessorOptions,
|
||||
process: (str, setting) => (setting ? normalizeCJKCompatibilityCharactersFunction(str) : str),
|
||||
};
|
||||
|
||||
/** @type {import('language').TextProcessor<boolean>} */
|
||||
export const standardizeKanji = {
|
||||
name: 'Convert kanji variants to their modern standard form',
|
||||
description: '萬 → 万',
|
||||
options: basicTextProcessorOptions,
|
||||
process: (str, setting) => (setting ? convertVariants(str) : str),
|
||||
};
|
||||
1749
vendor/yomitan/js/language/ja/japanese-transforms.js
vendored
Normal file
1749
vendor/yomitan/js/language/ja/japanese-transforms.js
vendored
Normal file
File diff suppressed because it is too large
Load Diff
149
vendor/yomitan/js/language/ja/japanese-wanakana.js
vendored
Normal file
149
vendor/yomitan/js/language/ja/japanese-wanakana.js
vendored
Normal file
@@ -0,0 +1,149 @@
|
||||
/*
|
||||
* Copyright (C) 2024-2025 Yomitan Authors
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
import {HIRAGANA_TO_ROMAJI, ROMAJI_TO_HIRAGANA} from './japanese-kana-romaji-dicts.js';
|
||||
import {convertHiraganaToKatakana} from './japanese.js';
|
||||
|
||||
/**
|
||||
* @param {string} text
|
||||
* @returns {string}
|
||||
*/
|
||||
export function convertToHiragana(text) {
|
||||
let newText = text.toLowerCase();
|
||||
for (const [romaji, kana] of Object.entries(ROMAJI_TO_HIRAGANA)) {
|
||||
newText = newText.replaceAll(romaji, kana);
|
||||
}
|
||||
return fillSokuonGaps(newText);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} text
|
||||
* @param {number} selectionStart
|
||||
* @returns {import('language').KanaIMEOutput}
|
||||
*/
|
||||
export function convertToKanaIME(text, selectionStart) {
|
||||
const prevSelectionStart = selectionStart;
|
||||
const prevLength = text.length;
|
||||
let kanaString = '';
|
||||
|
||||
// If the user starts typing a single `n`, hide it from the converter. (This only applies when using the converter as an IME)
|
||||
// The converter must only allow the n to become ん when the user's text cursor is at least one character ahead of it.
|
||||
// If `n` occurs directly behind the user's text cursor, it should be hidden from the converter.
|
||||
// If `ny` occurs directly behind the user's text cursor, it must also be hidden from the converter as the user may be trying to type `nya` `nyi` `nyu` `nye` `nyo`.
|
||||
// Examples (`|` shall be the user's text cursor):
|
||||
// `たn|` does not convert to `たん|`. The `n` should be hidden from the converter and `た` should only be sent.
|
||||
// `n|の` also does not convert to `ん|の`. Even though the cursor is not at the end of the line, the `n` should still be hidden since it is directly behind the user's text cursor.
|
||||
// `ny|` does not convert to `んy|`. The `ny` must be hidden since the user may be trying to type something starting with `ny` such as `nya`.
|
||||
// `たnt|` does convert to `たんt|`. The user's text cursor is one character ahead of the `n` so it does not need to be hidden and can be converted.
|
||||
// `nとあ|` also converts to `んとあ|` The user's text cursor is two characters away from the `n`.
|
||||
// `なno|` will still convert to `なの` instead of `なんお` without issue since the `no` -> `の` conversion will be found before `n` -> `ん` and `o` -> `お`.
|
||||
// `nn|` will still convert to `ん` instead of `んん` since `nn` -> `ん` will be found before `n` -> `ん`.
|
||||
// If the user pastes in a long string of `n` such as `nnnnn|` it should leave the last `n` and convert to `んんn`
|
||||
const textLowered = text.toLowerCase();
|
||||
if (textLowered[prevSelectionStart - 1] === 'n' && textLowered.slice(0, prevSelectionStart - 1).replaceAll('nn', '').at(-1) !== 'n') {
|
||||
const n = text.slice(prevSelectionStart - 1, prevSelectionStart);
|
||||
const beforeN = text.slice(0, prevSelectionStart - 1);
|
||||
const afterN = text.slice(prevSelectionStart);
|
||||
kanaString = convertToKana(beforeN) + n + convertToKana(afterN);
|
||||
} else if (textLowered.slice(prevSelectionStart - 2, prevSelectionStart) === 'ny') {
|
||||
const ny = text.slice(prevSelectionStart - 2, prevSelectionStart);
|
||||
const beforeN = text.slice(0, prevSelectionStart - 2);
|
||||
const afterN = text.slice(prevSelectionStart);
|
||||
kanaString = convertToKana(beforeN) + ny + convertToKana(afterN);
|
||||
} else {
|
||||
kanaString = convertToKana(text);
|
||||
}
|
||||
|
||||
const selectionOffset = kanaString.length - prevLength;
|
||||
|
||||
return {kanaString, newSelectionStart: prevSelectionStart + selectionOffset};
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} text
|
||||
* @returns {string}
|
||||
*/
|
||||
export function convertToKana(text) {
|
||||
let newText = text;
|
||||
for (const [romaji, kana] of Object.entries(ROMAJI_TO_HIRAGANA)) {
|
||||
newText = newText.replaceAll(romaji, kana);
|
||||
// Uppercase text converts to katakana
|
||||
newText = newText.replaceAll(romaji.toUpperCase(), convertHiraganaToKatakana(kana).toUpperCase());
|
||||
}
|
||||
return fillSokuonGaps(newText);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} text
|
||||
* @returns {string}
|
||||
* Fills gaps in sokuons that replaceAll using ROMAJI_TO_HIRAGANA will miss due to it not running iteratively
|
||||
* Example: `ttttttttttsu` -> `っっっっっっっっっつ` would become `ttttttttttsu` -> `っtっtっtっtっつ` without filling the gaps
|
||||
*/
|
||||
function fillSokuonGaps(text) {
|
||||
return text.replaceAll(/っ[a-z](?=っ)/g, 'っっ').replaceAll(/ッ[A-Z](?=ッ)/g, 'ッッ');
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} text
|
||||
* @returns {string}
|
||||
*/
|
||||
export function convertToRomaji(text) {
|
||||
let newText = text;
|
||||
for (const [kana, romaji] of Object.entries(HIRAGANA_TO_ROMAJI)) {
|
||||
newText = newText.replaceAll(kana, romaji);
|
||||
newText = newText.replaceAll(convertHiraganaToKatakana(kana), romaji);
|
||||
}
|
||||
return newText;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} text
|
||||
* @returns {string}
|
||||
*/
|
||||
export function convertAlphabeticToKana(text) {
|
||||
let part = '';
|
||||
let result = '';
|
||||
|
||||
for (const char of text) {
|
||||
// Note: 0x61 is the character code for 'a'
|
||||
let c = /** @type {number} */ (char.codePointAt(0));
|
||||
if (c >= 0x41 && c <= 0x5a) { // ['A', 'Z']
|
||||
c += (0x61 - 0x41);
|
||||
} else if (c >= 0x61 && c <= 0x7a) { // ['a', 'z']
|
||||
// NOP; c += (0x61 - 0x61);
|
||||
} else if (c >= 0xff21 && c <= 0xff3a) { // ['A', 'Z'] fullwidth
|
||||
c += (0x61 - 0xff21);
|
||||
} else if (c >= 0xff41 && c <= 0xff5a) { // ['a', 'z'] fullwidth
|
||||
c += (0x61 - 0xff41);
|
||||
} else if (c === 0x2d || c === 0xff0d) { // '-' or fullwidth dash
|
||||
c = 0x2d; // '-'
|
||||
} else {
|
||||
if (part.length > 0) {
|
||||
result += convertToHiragana(part);
|
||||
part = '';
|
||||
}
|
||||
result += char;
|
||||
continue;
|
||||
}
|
||||
part += String.fromCodePoint(c);
|
||||
}
|
||||
|
||||
if (part.length > 0) {
|
||||
result += convertToHiragana(part);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
813
vendor/yomitan/js/language/ja/japanese.js
vendored
Normal file
813
vendor/yomitan/js/language/ja/japanese.js
vendored
Normal file
@@ -0,0 +1,813 @@
|
||||
/*
|
||||
* Copyright (C) 2024-2025 Yomitan Authors
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
import {CJK_COMPATIBILITY, CJK_IDEOGRAPH_RANGES, CJK_PUNCTUATION_RANGE, FULLWIDTH_CHARACTER_RANGES, isCodePointInRange, isCodePointInRanges} from '../CJK-util.js';
|
||||
|
||||
|
||||
const HIRAGANA_SMALL_TSU_CODE_POINT = 0x3063;
|
||||
const KATAKANA_SMALL_TSU_CODE_POINT = 0x30c3;
|
||||
const KATAKANA_SMALL_KA_CODE_POINT = 0x30f5;
|
||||
const KATAKANA_SMALL_KE_CODE_POINT = 0x30f6;
|
||||
const KANA_PROLONGED_SOUND_MARK_CODE_POINT = 0x30fc;
|
||||
|
||||
/** @type {import('CJK-util').CodepointRange} */
|
||||
const HIRAGANA_RANGE = [0x3040, 0x309f];
|
||||
/** @type {import('CJK-util').CodepointRange} */
|
||||
const KATAKANA_RANGE = [0x30a0, 0x30ff];
|
||||
|
||||
/** @type {import('CJK-util').CodepointRange} */
|
||||
const HIRAGANA_CONVERSION_RANGE = [0x3041, 0x3096];
|
||||
/** @type {import('CJK-util').CodepointRange} */
|
||||
const KATAKANA_CONVERSION_RANGE = [0x30a1, 0x30f6];
|
||||
|
||||
/** @type {import('CJK-util').CodepointRange[]} */
|
||||
const KANA_RANGES = [HIRAGANA_RANGE, KATAKANA_RANGE];
|
||||
|
||||
/**
|
||||
* Japanese character ranges, roughly ordered in order of expected frequency.
|
||||
* @type {import('CJK-util').CodepointRange[]}
|
||||
*/
|
||||
const JAPANESE_RANGES = [
|
||||
HIRAGANA_RANGE,
|
||||
KATAKANA_RANGE,
|
||||
|
||||
...CJK_IDEOGRAPH_RANGES,
|
||||
|
||||
[0xff66, 0xff9f], // Halfwidth katakana
|
||||
|
||||
[0x30fb, 0x30fc], // Katakana punctuation
|
||||
[0xff61, 0xff65], // Kana punctuation
|
||||
|
||||
CJK_PUNCTUATION_RANGE,
|
||||
...FULLWIDTH_CHARACTER_RANGES,
|
||||
];
|
||||
|
||||
const SMALL_KANA_SET = new Set('ぁぃぅぇぉゃゅょゎァィゥェォャュョヮ');
|
||||
|
||||
const HALFWIDTH_KATAKANA_MAPPING = new Map([
|
||||
['・', '・--'],
|
||||
['ヲ', 'ヲヺ-'],
|
||||
['ァ', 'ァ--'],
|
||||
['ィ', 'ィ--'],
|
||||
['ゥ', 'ゥ--'],
|
||||
['ェ', 'ェ--'],
|
||||
['ォ', 'ォ--'],
|
||||
['ャ', 'ャ--'],
|
||||
['ュ', 'ュ--'],
|
||||
['ョ', 'ョ--'],
|
||||
['ッ', 'ッ--'],
|
||||
['ー', 'ー--'],
|
||||
['ア', 'ア--'],
|
||||
['イ', 'イ--'],
|
||||
['ウ', 'ウヴ-'],
|
||||
['エ', 'エ--'],
|
||||
['オ', 'オ--'],
|
||||
['カ', 'カガ-'],
|
||||
['キ', 'キギ-'],
|
||||
['ク', 'クグ-'],
|
||||
['ケ', 'ケゲ-'],
|
||||
['コ', 'コゴ-'],
|
||||
['サ', 'サザ-'],
|
||||
['シ', 'シジ-'],
|
||||
['ス', 'スズ-'],
|
||||
['セ', 'セゼ-'],
|
||||
['ソ', 'ソゾ-'],
|
||||
['タ', 'タダ-'],
|
||||
['チ', 'チヂ-'],
|
||||
['ツ', 'ツヅ-'],
|
||||
['テ', 'テデ-'],
|
||||
['ト', 'トド-'],
|
||||
['ナ', 'ナ--'],
|
||||
['ニ', 'ニ--'],
|
||||
['ヌ', 'ヌ--'],
|
||||
['ネ', 'ネ--'],
|
||||
['ノ', 'ノ--'],
|
||||
['ハ', 'ハバパ'],
|
||||
['ヒ', 'ヒビピ'],
|
||||
['フ', 'フブプ'],
|
||||
['ヘ', 'ヘベペ'],
|
||||
['ホ', 'ホボポ'],
|
||||
['マ', 'マ--'],
|
||||
['ミ', 'ミ--'],
|
||||
['ム', 'ム--'],
|
||||
['メ', 'メ--'],
|
||||
['モ', 'モ--'],
|
||||
['ヤ', 'ヤ--'],
|
||||
['ユ', 'ユ--'],
|
||||
['ヨ', 'ヨ--'],
|
||||
['ラ', 'ラ--'],
|
||||
['リ', 'リ--'],
|
||||
['ル', 'ル--'],
|
||||
['レ', 'レ--'],
|
||||
['ロ', 'ロ--'],
|
||||
['ワ', 'ワ--'],
|
||||
['ン', 'ン--'],
|
||||
]);
|
||||
|
||||
const VOWEL_TO_KANA_MAPPING = new Map([
|
||||
['a', 'ぁあかがさざただなはばぱまゃやらゎわヵァアカガサザタダナハバパマャヤラヮワヵヷ'],
|
||||
['i', 'ぃいきぎしじちぢにひびぴみりゐィイキギシジチヂニヒビピミリヰヸ'],
|
||||
['u', 'ぅうくぐすずっつづぬふぶぷむゅゆるゥウクグスズッツヅヌフブプムュユルヴ'],
|
||||
['e', 'ぇえけげせぜてでねへべぺめれゑヶェエケゲセゼテデネヘベペメレヱヶヹ'],
|
||||
['o', 'ぉおこごそぞとどのほぼぽもょよろをォオコゴソゾトドノホボポモョヨロヲヺ'],
|
||||
['', 'のノ'],
|
||||
]);
|
||||
|
||||
/** @type {Map<string, string>} */
|
||||
const KANA_TO_VOWEL_MAPPING = new Map();
|
||||
for (const [vowel, characters] of VOWEL_TO_KANA_MAPPING) {
|
||||
for (const character of characters) {
|
||||
KANA_TO_VOWEL_MAPPING.set(character, vowel);
|
||||
}
|
||||
}
|
||||
|
||||
const kana = 'うゔ-かが-きぎ-くぐ-けげ-こご-さざ-しじ-すず-せぜ-そぞ-ただ-ちぢ-つづ-てで-とど-はばぱひびぴふぶぷへべぺほぼぽワヷ-ヰヸ-ウヴ-ヱヹ-ヲヺ-カガ-キギ-クグ-ケゲ-コゴ-サザ-シジ-スズ-セゼ-ソゾ-タダ-チヂ-ツヅ-テデ-トド-ハバパヒビピフブプヘベペホボポ';
|
||||
/** @type {Map<string, {character: string, type: import('japanese-util').DiacriticType}>} */
|
||||
const DIACRITIC_MAPPING = new Map();
|
||||
for (let i = 0, ii = kana.length; i < ii; i += 3) {
|
||||
const character = kana[i];
|
||||
const dakuten = kana[i + 1];
|
||||
const handakuten = kana[i + 2];
|
||||
DIACRITIC_MAPPING.set(dakuten, {character, type: 'dakuten'});
|
||||
if (handakuten !== '-') {
|
||||
DIACRITIC_MAPPING.set(handakuten, {character, type: 'handakuten'});
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} previousCharacter
|
||||
* @returns {?string}
|
||||
*/
|
||||
function getProlongedHiragana(previousCharacter) {
|
||||
switch (KANA_TO_VOWEL_MAPPING.get(previousCharacter)) {
|
||||
case 'a': return 'あ';
|
||||
case 'i': return 'い';
|
||||
case 'u': return 'う';
|
||||
case 'e': return 'え';
|
||||
case 'o': return 'う';
|
||||
default: return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} text
|
||||
* @param {string} reading
|
||||
* @returns {import('japanese-util').FuriganaSegment}
|
||||
*/
|
||||
function createFuriganaSegment(text, reading) {
|
||||
return {text, reading};
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} reading
|
||||
* @param {string} readingNormalized
|
||||
* @param {import('japanese-util').FuriganaGroup[]} groups
|
||||
* @param {number} groupsStart
|
||||
* @returns {?(import('japanese-util').FuriganaSegment[])}
|
||||
*/
|
||||
function segmentizeFurigana(reading, readingNormalized, groups, groupsStart) {
|
||||
const groupCount = groups.length - groupsStart;
|
||||
if (groupCount <= 0) {
|
||||
return reading.length === 0 ? [] : null;
|
||||
}
|
||||
|
||||
const group = groups[groupsStart];
|
||||
const {isKana, text} = group;
|
||||
const textLength = text.length;
|
||||
if (isKana) {
|
||||
const {textNormalized} = group;
|
||||
if (textNormalized !== null && readingNormalized.startsWith(textNormalized)) {
|
||||
const segments = segmentizeFurigana(
|
||||
reading.substring(textLength),
|
||||
readingNormalized.substring(textLength),
|
||||
groups,
|
||||
groupsStart + 1,
|
||||
);
|
||||
if (segments !== null) {
|
||||
if (reading.startsWith(text)) {
|
||||
segments.unshift(createFuriganaSegment(text, ''));
|
||||
} else {
|
||||
segments.unshift(...getFuriganaKanaSegments(text, reading));
|
||||
}
|
||||
return segments;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
} else {
|
||||
let result = null;
|
||||
for (let i = reading.length; i >= textLength; --i) {
|
||||
const segments = segmentizeFurigana(
|
||||
reading.substring(i),
|
||||
readingNormalized.substring(i),
|
||||
groups,
|
||||
groupsStart + 1,
|
||||
);
|
||||
if (segments !== null) {
|
||||
if (result !== null) {
|
||||
// More than one way to segmentize the tail; mark as ambiguous
|
||||
return null;
|
||||
}
|
||||
const segmentReading = reading.substring(0, i);
|
||||
segments.unshift(createFuriganaSegment(text, segmentReading));
|
||||
result = segments;
|
||||
}
|
||||
// There is only one way to segmentize the last non-kana group
|
||||
if (groupCount === 1) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} text
|
||||
* @param {string} reading
|
||||
* @returns {import('japanese-util').FuriganaSegment[]}
|
||||
*/
|
||||
function getFuriganaKanaSegments(text, reading) {
|
||||
const textLength = text.length;
|
||||
const newSegments = [];
|
||||
let start = 0;
|
||||
let state = (reading[0] === text[0]);
|
||||
for (let i = 1; i < textLength; ++i) {
|
||||
const newState = (reading[i] === text[i]);
|
||||
if (state === newState) { continue; }
|
||||
newSegments.push(createFuriganaSegment(text.substring(start, i), state ? '' : reading.substring(start, i)));
|
||||
state = newState;
|
||||
start = i;
|
||||
}
|
||||
newSegments.push(createFuriganaSegment(text.substring(start, textLength), state ? '' : reading.substring(start, textLength)));
|
||||
return newSegments;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} text1
|
||||
* @param {string} text2
|
||||
* @returns {number}
|
||||
*/
|
||||
function getStemLength(text1, text2) {
|
||||
const minLength = Math.min(text1.length, text2.length);
|
||||
if (minLength === 0) { return 0; }
|
||||
|
||||
let i = 0;
|
||||
while (true) {
|
||||
const char1 = /** @type {number} */ (text1.codePointAt(i));
|
||||
const char2 = /** @type {number} */ (text2.codePointAt(i));
|
||||
if (char1 !== char2) { break; }
|
||||
const charLength = String.fromCodePoint(char1).length;
|
||||
i += charLength;
|
||||
if (i >= minLength) {
|
||||
if (i > minLength) {
|
||||
i -= charLength; // Don't consume partial UTF16 surrogate characters
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
return i;
|
||||
}
|
||||
|
||||
|
||||
// Character code testing functions
|
||||
|
||||
/**
|
||||
* @param {number} codePoint
|
||||
* @returns {boolean}
|
||||
*/
|
||||
export function isCodePointKanji(codePoint) {
|
||||
return isCodePointInRanges(codePoint, CJK_IDEOGRAPH_RANGES);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {number} codePoint
|
||||
* @returns {boolean}
|
||||
*/
|
||||
export function isCodePointKana(codePoint) {
|
||||
return isCodePointInRanges(codePoint, KANA_RANGES);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {number} codePoint
|
||||
* @returns {boolean}
|
||||
*/
|
||||
export function isCodePointJapanese(codePoint) {
|
||||
return isCodePointInRanges(codePoint, JAPANESE_RANGES);
|
||||
}
|
||||
|
||||
|
||||
// String testing functions
|
||||
|
||||
/**
|
||||
* @param {string} str
|
||||
* @returns {boolean}
|
||||
*/
|
||||
export function isStringEntirelyKana(str) {
|
||||
if (str.length === 0) { return false; }
|
||||
for (const c of str) {
|
||||
if (!isCodePointInRanges(/** @type {number} */ (c.codePointAt(0)), KANA_RANGES)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} str
|
||||
* @returns {boolean}
|
||||
*/
|
||||
export function isStringPartiallyJapanese(str) {
|
||||
if (str.length === 0) { return false; }
|
||||
for (const c of str) {
|
||||
if (isCodePointInRanges(/** @type {number} */ (c.codePointAt(0)), JAPANESE_RANGES)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
// Mora functions
|
||||
|
||||
/**
|
||||
* @param {number} moraIndex
|
||||
* @param {number | string} pitchAccentValue
|
||||
* @returns {boolean}
|
||||
*/
|
||||
export function isMoraPitchHigh(moraIndex, pitchAccentValue) {
|
||||
if (typeof pitchAccentValue === 'string') {
|
||||
return pitchAccentValue[moraIndex] === 'H';
|
||||
}
|
||||
switch (pitchAccentValue) {
|
||||
case 0: return (moraIndex > 0);
|
||||
case 1: return (moraIndex < 1);
|
||||
default: return (moraIndex > 0 && moraIndex < pitchAccentValue);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} text
|
||||
* @param {number | string} pitchAccentValue
|
||||
* @param {boolean} isVerbOrAdjective
|
||||
* @returns {?import('japanese-util').PitchCategory}
|
||||
*/
|
||||
export function getPitchCategory(text, pitchAccentValue, isVerbOrAdjective) {
|
||||
const pitchAccentDownstepPosition = typeof pitchAccentValue === 'string' ? getDownstepPositions(pitchAccentValue)[0] : pitchAccentValue;
|
||||
if (pitchAccentDownstepPosition === 0) {
|
||||
return 'heiban';
|
||||
}
|
||||
if (isVerbOrAdjective) {
|
||||
return pitchAccentDownstepPosition > 0 ? 'kifuku' : null;
|
||||
}
|
||||
if (pitchAccentDownstepPosition === 1) {
|
||||
return 'atamadaka';
|
||||
}
|
||||
if (pitchAccentDownstepPosition > 1) {
|
||||
return pitchAccentDownstepPosition >= getKanaMoraCount(text) ? 'odaka' : 'nakadaka';
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} pitchString
|
||||
* @returns {number[]}
|
||||
*/
|
||||
export function getDownstepPositions(pitchString) {
|
||||
const downsteps = [];
|
||||
const moraCount = pitchString.length;
|
||||
for (let i = 0; i < moraCount; i++) {
|
||||
if (i > 0 && pitchString[i - 1] === 'H' && pitchString[i] === 'L') {
|
||||
downsteps.push(i);
|
||||
}
|
||||
}
|
||||
if (downsteps.length === 0) {
|
||||
downsteps.push(pitchString.startsWith('L') ? 0 : -1);
|
||||
}
|
||||
return downsteps;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} text
|
||||
* @returns {string[]}
|
||||
*/
|
||||
export function getKanaMorae(text) {
|
||||
const morae = [];
|
||||
let i;
|
||||
for (const c of text) {
|
||||
if (SMALL_KANA_SET.has(c) && (i = morae.length) > 0) {
|
||||
morae[i - 1] += c;
|
||||
} else {
|
||||
morae.push(c);
|
||||
}
|
||||
}
|
||||
return morae;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} text
|
||||
* @returns {number}
|
||||
*/
|
||||
export function getKanaMoraCount(text) {
|
||||
let moraCount = 0;
|
||||
for (const c of text) {
|
||||
if (!(SMALL_KANA_SET.has(c) && moraCount > 0)) {
|
||||
++moraCount;
|
||||
}
|
||||
}
|
||||
return moraCount;
|
||||
}
|
||||
|
||||
|
||||
// Conversion functions
|
||||
|
||||
/**
|
||||
* @param {string} text
|
||||
* @param {boolean} [keepProlongedSoundMarks]
|
||||
* @returns {string}
|
||||
*/
|
||||
export function convertKatakanaToHiragana(text, keepProlongedSoundMarks = false) {
|
||||
let result = '';
|
||||
const offset = (HIRAGANA_CONVERSION_RANGE[0] - KATAKANA_CONVERSION_RANGE[0]);
|
||||
for (let char of text) {
|
||||
const codePoint = /** @type {number} */ (char.codePointAt(0));
|
||||
switch (codePoint) {
|
||||
case KATAKANA_SMALL_KA_CODE_POINT:
|
||||
case KATAKANA_SMALL_KE_CODE_POINT:
|
||||
// No change
|
||||
break;
|
||||
case KANA_PROLONGED_SOUND_MARK_CODE_POINT:
|
||||
if (!keepProlongedSoundMarks && result.length > 0) {
|
||||
const char2 = getProlongedHiragana(result[result.length - 1]);
|
||||
if (char2 !== null) { char = char2; }
|
||||
}
|
||||
break;
|
||||
default:
|
||||
if (isCodePointInRange(codePoint, KATAKANA_CONVERSION_RANGE)) {
|
||||
char = String.fromCodePoint(codePoint + offset);
|
||||
}
|
||||
break;
|
||||
}
|
||||
result += char;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} text
|
||||
* @returns {string}
|
||||
*/
|
||||
export function convertHiraganaToKatakana(text) {
|
||||
let result = '';
|
||||
const offset = (KATAKANA_CONVERSION_RANGE[0] - HIRAGANA_CONVERSION_RANGE[0]);
|
||||
for (let char of text) {
|
||||
const codePoint = /** @type {number} */ (char.codePointAt(0));
|
||||
if (isCodePointInRange(codePoint, HIRAGANA_CONVERSION_RANGE)) {
|
||||
char = String.fromCodePoint(codePoint + offset);
|
||||
}
|
||||
result += char;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} text
|
||||
* @returns {string}
|
||||
*/
|
||||
export function convertAlphanumericToFullWidth(text) {
|
||||
let result = '';
|
||||
for (const char of text) {
|
||||
let c = /** @type {number} */ (char.codePointAt(0));
|
||||
if (c >= 0x30 && c <= 0x39) { // ['0', '9']
|
||||
c += 0xff10 - 0x30; // 0xff10 = '0' full width
|
||||
} else if (c >= 0x41 && c <= 0x5a) { // ['A', 'Z']
|
||||
c += 0xff21 - 0x41; // 0xff21 = 'A' full width
|
||||
} else if (c >= 0x61 && c <= 0x7a) { // ['a', 'z']
|
||||
c += 0xff41 - 0x61; // 0xff41 = 'a' full width
|
||||
}
|
||||
result += String.fromCodePoint(c);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} text
|
||||
* @returns {string}
|
||||
*/
|
||||
export function convertFullWidthAlphanumericToNormal(text) {
|
||||
let result = '';
|
||||
const length = text.length;
|
||||
for (let i = 0; i < length; i++) {
|
||||
let c = /** @type {number} */ (text[i].codePointAt(0));
|
||||
if (c >= 0xff10 && c <= 0xff19) { // ['0', '9']
|
||||
c -= 0xff10 - 0x30; // 0x30 = '0'
|
||||
} else if (c >= 0xff21 && c <= 0xff3a) { // ['A', 'Z']
|
||||
c -= 0xff21 - 0x41; // 0x41 = 'A'
|
||||
} else if (c >= 0xff41 && c <= 0xff5a) { // ['a', 'z']
|
||||
c -= 0xff41 - 0x61; // 0x61 = 'a'
|
||||
}
|
||||
result += String.fromCodePoint(c);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} text
|
||||
* @returns {string}
|
||||
*/
|
||||
export function convertHalfWidthKanaToFullWidth(text) {
|
||||
let result = '';
|
||||
|
||||
// This function is safe to use charCodeAt instead of codePointAt, since all
|
||||
// the relevant characters are represented with a single UTF-16 character code.
|
||||
for (let i = 0, ii = text.length; i < ii; ++i) {
|
||||
const c = text[i];
|
||||
const mapping = HALFWIDTH_KATAKANA_MAPPING.get(c);
|
||||
if (typeof mapping !== 'string') {
|
||||
result += c;
|
||||
continue;
|
||||
}
|
||||
|
||||
let index = 0;
|
||||
switch (text.charCodeAt(i + 1)) {
|
||||
case 0xff9e: // Dakuten
|
||||
index = 1;
|
||||
break;
|
||||
case 0xff9f: // Handakuten
|
||||
index = 2;
|
||||
break;
|
||||
}
|
||||
|
||||
let c2 = mapping[index];
|
||||
if (index > 0) {
|
||||
if (c2 === '-') { // Invalid
|
||||
index = 0;
|
||||
c2 = mapping[0];
|
||||
} else {
|
||||
++i;
|
||||
}
|
||||
}
|
||||
|
||||
result += c2;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} character
|
||||
* @returns {?{character: string, type: import('japanese-util').DiacriticType}}
|
||||
*/
|
||||
export function getKanaDiacriticInfo(character) {
|
||||
const info = DIACRITIC_MAPPING.get(character);
|
||||
return typeof info !== 'undefined' ? {character: info.character, type: info.type} : null;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {number} codePoint
|
||||
* @returns {boolean}
|
||||
*/
|
||||
function dakutenAllowed(codePoint) {
|
||||
// To reduce processing time some characters which shouldn't have dakuten but are highly unlikely to have a combining character attached are included
|
||||
// かがきぎくぐけげこごさざしじすずせぜそぞただちぢっつづてでとはばぱひびぴふぶぷへべぺほ
|
||||
// カガキギクグケゲコゴサザシジスズセゼソゾタダチヂッツヅテデトハバパヒビピフブプヘベペホ
|
||||
return ((codePoint >= 0x304B && codePoint <= 0x3068) ||
|
||||
(codePoint >= 0x306F && codePoint <= 0x307B) ||
|
||||
(codePoint >= 0x30AB && codePoint <= 0x30C8) ||
|
||||
(codePoint >= 0x30CF && codePoint <= 0x30DB));
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {number} codePoint
|
||||
* @returns {boolean}
|
||||
*/
|
||||
function handakutenAllowed(codePoint) {
|
||||
// To reduce processing time some characters which shouldn't have handakuten but are highly unlikely to have a combining character attached are included
|
||||
// はばぱひびぴふぶぷへべぺほ
|
||||
// ハバパヒビピフブプヘベペホ
|
||||
return ((codePoint >= 0x306F && codePoint <= 0x307B) ||
|
||||
(codePoint >= 0x30CF && codePoint <= 0x30DB));
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} text
|
||||
* @returns {string}
|
||||
*/
|
||||
export function normalizeCombiningCharacters(text) {
|
||||
let result = '';
|
||||
let i = text.length - 1;
|
||||
// Ignoring the first character is intentional, it cannot combine with anything
|
||||
while (i > 0) {
|
||||
if (text[i] === '\u3099') {
|
||||
const dakutenCombinee = text[i - 1].codePointAt(0);
|
||||
if (dakutenCombinee && dakutenAllowed(dakutenCombinee)) {
|
||||
result = String.fromCodePoint(dakutenCombinee + 1) + result;
|
||||
i -= 2;
|
||||
continue;
|
||||
}
|
||||
} else if (text[i] === '\u309A') {
|
||||
const handakutenCombinee = text[i - 1].codePointAt(0);
|
||||
if (handakutenCombinee && handakutenAllowed(handakutenCombinee)) {
|
||||
result = String.fromCodePoint(handakutenCombinee + 2) + result;
|
||||
i -= 2;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
result = text[i] + result;
|
||||
i--;
|
||||
}
|
||||
// i === -1 when first two characters are combined
|
||||
if (i === 0) {
|
||||
result = text[0] + result;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} text
|
||||
* @returns {string}
|
||||
*/
|
||||
export function normalizeCJKCompatibilityCharacters(text) {
|
||||
let result = '';
|
||||
for (let i = 0; i < text.length; i++) {
|
||||
const codePoint = text[i].codePointAt(0);
|
||||
result += codePoint && isCodePointInRange(codePoint, CJK_COMPATIBILITY) ? text[i].normalize('NFKD') : text[i];
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
// Furigana distribution
|
||||
|
||||
/**
|
||||
* @param {string} term
|
||||
* @param {string} reading
|
||||
* @returns {import('japanese-util').FuriganaSegment[]}
|
||||
*/
|
||||
export function distributeFurigana(term, reading) {
|
||||
if (reading === term) {
|
||||
// Same
|
||||
return [createFuriganaSegment(term, '')];
|
||||
}
|
||||
|
||||
/** @type {import('japanese-util').FuriganaGroup[]} */
|
||||
const groups = [];
|
||||
/** @type {?import('japanese-util').FuriganaGroup} */
|
||||
let groupPre = null;
|
||||
let isKanaPre = null;
|
||||
for (const c of term) {
|
||||
const codePoint = /** @type {number} */ (c.codePointAt(0));
|
||||
const isKana = isCodePointKana(codePoint);
|
||||
if (isKana === isKanaPre) {
|
||||
/** @type {import('japanese-util').FuriganaGroup} */ (groupPre).text += c;
|
||||
} else {
|
||||
groupPre = {isKana, text: c, textNormalized: null};
|
||||
groups.push(groupPre);
|
||||
isKanaPre = isKana;
|
||||
}
|
||||
}
|
||||
for (const group of groups) {
|
||||
if (group.isKana) {
|
||||
group.textNormalized = convertKatakanaToHiragana(group.text);
|
||||
}
|
||||
}
|
||||
|
||||
const readingNormalized = convertKatakanaToHiragana(reading);
|
||||
const segments = segmentizeFurigana(reading, readingNormalized, groups, 0);
|
||||
if (segments !== null) {
|
||||
return segments;
|
||||
}
|
||||
|
||||
// Fallback
|
||||
return [createFuriganaSegment(term, reading)];
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} term
|
||||
* @param {string} reading
|
||||
* @param {string} source
|
||||
* @returns {import('japanese-util').FuriganaSegment[]}
|
||||
*/
|
||||
export function distributeFuriganaInflected(term, reading, source) {
|
||||
const termNormalized = convertKatakanaToHiragana(term);
|
||||
const readingNormalized = convertKatakanaToHiragana(reading);
|
||||
const sourceNormalized = convertKatakanaToHiragana(source);
|
||||
|
||||
let mainText = term;
|
||||
let stemLength = getStemLength(termNormalized, sourceNormalized);
|
||||
|
||||
// Check if source is derived from the reading instead of the term
|
||||
const readingStemLength = getStemLength(readingNormalized, sourceNormalized);
|
||||
if (readingStemLength > 0 && readingStemLength >= stemLength) {
|
||||
mainText = reading;
|
||||
stemLength = readingStemLength;
|
||||
reading = `${source.substring(0, stemLength)}${reading.substring(stemLength)}`;
|
||||
}
|
||||
|
||||
const segments = [];
|
||||
if (stemLength > 0) {
|
||||
mainText = `${source.substring(0, stemLength)}${mainText.substring(stemLength)}`;
|
||||
const segments2 = distributeFurigana(mainText, reading);
|
||||
let consumed = 0;
|
||||
for (const segment of segments2) {
|
||||
const {text} = segment;
|
||||
const start = consumed;
|
||||
consumed += text.length;
|
||||
if (consumed < stemLength) {
|
||||
segments.push(segment);
|
||||
} else if (consumed === stemLength) {
|
||||
segments.push(segment);
|
||||
break;
|
||||
} else {
|
||||
if (start < stemLength) {
|
||||
segments.push(createFuriganaSegment(mainText.substring(start, stemLength), ''));
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (stemLength < source.length) {
|
||||
const remainder = source.substring(stemLength);
|
||||
const segmentCount = segments.length;
|
||||
if (segmentCount > 0 && segments[segmentCount - 1].reading.length === 0) {
|
||||
// Append to the last segment if it has an empty reading
|
||||
segments[segmentCount - 1].text += remainder;
|
||||
} else {
|
||||
// Otherwise, create a new segment
|
||||
segments.push(createFuriganaSegment(remainder, ''));
|
||||
}
|
||||
}
|
||||
|
||||
return segments;
|
||||
}
|
||||
|
||||
|
||||
// Miscellaneous
|
||||
|
||||
/**
|
||||
* @param {number} codePoint
|
||||
* @returns {boolean}
|
||||
*/
|
||||
export function isEmphaticCodePoint(codePoint) {
|
||||
return (
|
||||
codePoint === HIRAGANA_SMALL_TSU_CODE_POINT ||
|
||||
codePoint === KATAKANA_SMALL_TSU_CODE_POINT ||
|
||||
codePoint === KANA_PROLONGED_SOUND_MARK_CODE_POINT
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} text
|
||||
* @param {boolean} fullCollapse
|
||||
* @returns {string}
|
||||
*/
|
||||
export function collapseEmphaticSequences(text, fullCollapse) {
|
||||
let left = 0;
|
||||
while (left < text.length && isEmphaticCodePoint(/** @type {number} */ (text.codePointAt(left)))) {
|
||||
++left;
|
||||
}
|
||||
let right = text.length - 1;
|
||||
while (right >= 0 && isEmphaticCodePoint(/** @type {number} */ (text.codePointAt(right)))) {
|
||||
--right;
|
||||
}
|
||||
// Whole string is emphatic
|
||||
if (left > right) {
|
||||
return text;
|
||||
}
|
||||
|
||||
const leadingEmphatics = text.substring(0, left);
|
||||
const trailingEmphatics = text.substring(right + 1);
|
||||
let middle = '';
|
||||
let currentCollapsedCodePoint = -1;
|
||||
|
||||
for (let i = left; i <= right; ++i) {
|
||||
const char = text[i];
|
||||
const codePoint = /** @type {number} */ (char.codePointAt(0));
|
||||
if (isEmphaticCodePoint(codePoint)) {
|
||||
if (currentCollapsedCodePoint !== codePoint) {
|
||||
currentCollapsedCodePoint = codePoint;
|
||||
if (!fullCollapse) {
|
||||
middle += char;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
currentCollapsedCodePoint = -1;
|
||||
middle += char;
|
||||
}
|
||||
}
|
||||
|
||||
return leadingEmphatics + middle + trailingEmphatics;
|
||||
}
|
||||
93
vendor/yomitan/js/language/ka/georgian-transforms.js
vendored
Normal file
93
vendor/yomitan/js/language/ka/georgian-transforms.js
vendored
Normal file
@@ -0,0 +1,93 @@
|
||||
/*
|
||||
* Copyright (C) 2025 Yomitan Authors
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
import {suffixInflection} from '../language-transforms.js';
|
||||
|
||||
const suffixes = [
|
||||
'ები',
|
||||
'ებს',
|
||||
'ებების', // plural suffixes
|
||||
'მა', // ergative
|
||||
'ს', // dative
|
||||
'ის', // genitive
|
||||
'ით', // instrumental
|
||||
'ად', // adverbial
|
||||
'ო', // vocative
|
||||
'ში',
|
||||
'ზე',
|
||||
'შია',
|
||||
'ზეა',
|
||||
];
|
||||
|
||||
// Stem completion (for consonant endings)
|
||||
const stemCompletionRules = [
|
||||
suffixInflection('გნ', 'გნი', ['n', 'adj'], ['n', 'adj']),
|
||||
suffixInflection('ნ', 'ნი', ['n', 'adj'], ['n', 'adj']),
|
||||
];
|
||||
|
||||
// Vowel restoration example (optional, extend as needed)
|
||||
const vowelRestorationRules = [
|
||||
suffixInflection('გ', 'გა', ['n', 'adj'], ['n', 'adj']),
|
||||
];
|
||||
|
||||
export const georgianTransforms = {
|
||||
language: 'kat',
|
||||
conditions: {
|
||||
v: {
|
||||
name: 'Verb',
|
||||
isDictionaryForm: true,
|
||||
},
|
||||
n: {
|
||||
name: 'Noun',
|
||||
isDictionaryForm: true,
|
||||
subConditions: ['np', 'ns'],
|
||||
},
|
||||
np: {
|
||||
name: 'Noun plural',
|
||||
isDictionaryForm: true,
|
||||
},
|
||||
ns: {
|
||||
name: 'Noun singular',
|
||||
isDictionaryForm: true,
|
||||
},
|
||||
adj: {
|
||||
name: 'Adjective',
|
||||
isDictionaryForm: true,
|
||||
},
|
||||
adv: {
|
||||
name: 'Adverb',
|
||||
isDictionaryForm: true,
|
||||
},
|
||||
},
|
||||
transforms: {
|
||||
nounAdjSuffixStripping: {
|
||||
name: 'noun-adj-suffix-stripping',
|
||||
description: 'Strip Georgian noun and adjective declension suffixes',
|
||||
rules: suffixes.map((suffix) => suffixInflection(suffix, '', ['n', 'adj'], ['n', 'adj'])),
|
||||
},
|
||||
nounAdjStemCompletion: {
|
||||
name: 'noun-adj-stem-completion',
|
||||
description: 'Restore nominative suffix -ი for consonant-ending noun/adjective stems',
|
||||
rules: stemCompletionRules,
|
||||
},
|
||||
vowelRestoration: {
|
||||
name: 'vowel-restoration',
|
||||
description: 'Restore truncated vowels if applicable',
|
||||
rules: vowelRestorationRules,
|
||||
},
|
||||
},
|
||||
};
|
||||
38
vendor/yomitan/js/language/ko/korean-text-processors.js
vendored
Normal file
38
vendor/yomitan/js/language/ko/korean-text-processors.js
vendored
Normal file
@@ -0,0 +1,38 @@
|
||||
/*
|
||||
* Copyright (C) 2024-2025 Yomitan Authors
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
import {Hangul} from '../../../lib/hangul-js.js';
|
||||
|
||||
/** @type {import('language').TextProcessor<boolean>} */
|
||||
export const disassembleHangul = {
|
||||
name: 'Disassemble Hangul',
|
||||
description: 'Disassemble Hangul characters into jamo.',
|
||||
options: [true], // Could probably also be set to [false, true], but this way it is always on
|
||||
process: (str) => {
|
||||
return Hangul.disassemble(str, false).join('');
|
||||
},
|
||||
};
|
||||
|
||||
/** @type {import('language').TextProcessor<boolean>} */
|
||||
export const reassembleHangul = {
|
||||
name: 'Reassemble Hangul',
|
||||
description: 'Reassemble Hangul characters from jamo.',
|
||||
options: [true], // Could probably also be set to [false, true], but this way it is always on
|
||||
process: (str) => {
|
||||
return Hangul.assemble(str);
|
||||
},
|
||||
};
|
||||
5040
vendor/yomitan/js/language/ko/korean-transforms.js
vendored
Normal file
5040
vendor/yomitan/js/language/ko/korean-transforms.js
vendored
Normal file
File diff suppressed because it is too large
Load Diff
56
vendor/yomitan/js/language/ko/korean.js
vendored
Normal file
56
vendor/yomitan/js/language/ko/korean.js
vendored
Normal file
@@ -0,0 +1,56 @@
|
||||
/*
|
||||
* Copyright (C) 2024-2025 Yomitan Authors
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
import {CJK_IDEOGRAPH_RANGES, CJK_PUNCTUATION_RANGE, FULLWIDTH_CHARACTER_RANGES, isCodePointInRanges} from '../CJK-util.js';
|
||||
|
||||
/** @type {import('CJK-util').CodepointRange} */
|
||||
const HANGUL_JAMO_RANGE = [0x1100, 0x11ff];
|
||||
/** @type {import('CJK-util').CodepointRange} */
|
||||
const HANGUL_COMPATIBILITY_JAMO_RANGE = [0x3130, 0x318f];
|
||||
/** @type {import('CJK-util').CodepointRange} */
|
||||
const HANGUL_SYLLABLES_RANGE = [0xac00, 0xd7af];
|
||||
/** @type {import('CJK-util').CodepointRange} */
|
||||
const HANGUL_JAMO_EXTENDED_A_RANGE = [0xa960, 0xa97f];
|
||||
/** @type {import('CJK-util').CodepointRange} */
|
||||
const HANGUL_JAMO_EXTENDED_B_RANGE = [0xd7b0, 0xd7ff];
|
||||
/** @type {import('CJK-util').CodepointRange} */
|
||||
const HANGUL_JAMO_HALF_WIDTH_RANGE = [0xffa0, 0xffdf];
|
||||
|
||||
/**
|
||||
* Korean character ranges, roughly ordered in order of expected frequency.
|
||||
* @type {import('CJK-util').CodepointRange[]}
|
||||
*/
|
||||
const KOREAN_RANGES = [
|
||||
...CJK_IDEOGRAPH_RANGES,
|
||||
CJK_PUNCTUATION_RANGE,
|
||||
...FULLWIDTH_CHARACTER_RANGES,
|
||||
|
||||
HANGUL_JAMO_RANGE,
|
||||
HANGUL_COMPATIBILITY_JAMO_RANGE,
|
||||
HANGUL_SYLLABLES_RANGE,
|
||||
HANGUL_JAMO_EXTENDED_A_RANGE,
|
||||
HANGUL_JAMO_EXTENDED_B_RANGE,
|
||||
HANGUL_JAMO_HALF_WIDTH_RANGE,
|
||||
];
|
||||
|
||||
/**
|
||||
* @param {number} codePoint
|
||||
* @returns {boolean}
|
||||
*/
|
||||
export function isCodePointKorean(codePoint) {
|
||||
return isCodePointInRanges(codePoint, KOREAN_RANGES);
|
||||
}
|
||||
39
vendor/yomitan/js/language/la/latin-text-preprocessors.js
vendored
Normal file
39
vendor/yomitan/js/language/la/latin-text-preprocessors.js
vendored
Normal file
@@ -0,0 +1,39 @@
|
||||
/*
|
||||
* Copyright (C) 2024-2025 Yomitan Authors
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
/** @type {import('language').BidirectionalConversionPreprocessor} */
|
||||
export const processDiphtongs = {
|
||||
name: 'Convert æ to ae',
|
||||
description: 'æ → ae, Æ → AE, œ → oe, Œ → OE',
|
||||
options: ['off', 'direct', 'inverse'],
|
||||
process: (str, setting) => {
|
||||
switch (setting) {
|
||||
case 'off':
|
||||
return str;
|
||||
case 'direct':
|
||||
return str.replace(/æ/g, 'ae')
|
||||
.replace(/Æ/g, 'AE')
|
||||
.replace(/œ/g, 'oe')
|
||||
.replace(/Œ/g, 'OE');
|
||||
case 'inverse':
|
||||
return str.replace(/ae/g, 'æ')
|
||||
.replace(/AE/g, 'Æ')
|
||||
.replace(/oe/g, 'œ')
|
||||
.replace(/OE/g, 'Œ');
|
||||
}
|
||||
},
|
||||
};
|
||||
164
vendor/yomitan/js/language/la/latin-transforms.js
vendored
Normal file
164
vendor/yomitan/js/language/la/latin-transforms.js
vendored
Normal file
@@ -0,0 +1,164 @@
|
||||
/*
|
||||
* Copyright (C) 2024-2025 Yomitan Authors
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
import {suffixInflection} from '../language-transforms.js';
|
||||
|
||||
// TODO: -ne suffix (estne, nonne)?
|
||||
|
||||
const conditions = {
|
||||
v: {
|
||||
name: 'Verb',
|
||||
isDictionaryForm: true,
|
||||
},
|
||||
n: {
|
||||
name: 'Noun',
|
||||
isDictionaryForm: true,
|
||||
subConditions: ['ns', 'np'],
|
||||
},
|
||||
ns: {
|
||||
name: 'Noun, singular',
|
||||
isDictionaryForm: true,
|
||||
subConditions: ['n1s', 'n2s', 'n3s', 'n4s', 'n5s'],
|
||||
},
|
||||
np: {
|
||||
name: 'Noun, plural',
|
||||
isDictionaryForm: true,
|
||||
subConditions: ['n1p', 'n2p', 'n3p', 'n4p', 'n5p'],
|
||||
},
|
||||
n1: {
|
||||
name: 'Noun, 1st declension',
|
||||
isDictionaryForm: true,
|
||||
subConditions: ['n1s', 'n1p'],
|
||||
},
|
||||
n1p: {
|
||||
name: 'Noun, 1st declension, plural',
|
||||
isDictionaryForm: true,
|
||||
},
|
||||
n1s: {
|
||||
name: 'Noun, 1st declension, singular',
|
||||
isDictionaryForm: true,
|
||||
},
|
||||
n2: {
|
||||
name: 'Noun, 2nd declension',
|
||||
isDictionaryForm: true,
|
||||
subConditions: ['n2s', 'n2p'],
|
||||
},
|
||||
n2p: {
|
||||
name: 'Noun, 2nd declension, plural',
|
||||
isDictionaryForm: true,
|
||||
},
|
||||
n2s: {
|
||||
name: 'Noun, 2nd declension, singular',
|
||||
isDictionaryForm: true,
|
||||
},
|
||||
n3: {
|
||||
name: 'Noun, 3rd declension',
|
||||
isDictionaryForm: true,
|
||||
subConditions: ['n3s', 'n3p'],
|
||||
},
|
||||
n3p: {
|
||||
name: 'Noun, 3rd declension, plural',
|
||||
isDictionaryForm: true,
|
||||
},
|
||||
n3s: {
|
||||
name: 'Noun, 3rd declension, singular',
|
||||
isDictionaryForm: true,
|
||||
},
|
||||
n4: {
|
||||
name: 'Noun, 4th declension',
|
||||
isDictionaryForm: true,
|
||||
subConditions: ['n4s', 'n4p'],
|
||||
},
|
||||
n4p: {
|
||||
name: 'Noun, 4th declension, plural',
|
||||
isDictionaryForm: true,
|
||||
},
|
||||
n4s: {
|
||||
name: 'Noun, 4th declension, singular',
|
||||
isDictionaryForm: true,
|
||||
},
|
||||
n5: {
|
||||
name: 'Noun, 5th declension',
|
||||
isDictionaryForm: true,
|
||||
subConditions: ['n5s', 'n5p'],
|
||||
},
|
||||
n5p: {
|
||||
name: 'Noun, 5th declension, plural',
|
||||
isDictionaryForm: true,
|
||||
},
|
||||
n5s: {
|
||||
name: 'Noun, 5th declension, singular',
|
||||
isDictionaryForm: true,
|
||||
},
|
||||
adj: {
|
||||
name: 'Adjective',
|
||||
isDictionaryForm: true,
|
||||
subConditions: ['adj3', 'adj12'],
|
||||
},
|
||||
adj12: {
|
||||
name: 'Adjective, 1st-2nd declension',
|
||||
isDictionaryForm: true,
|
||||
},
|
||||
adj3: {
|
||||
name: 'Adjective, 3rd declension',
|
||||
isDictionaryForm: true,
|
||||
},
|
||||
adv: {
|
||||
name: 'Adverb',
|
||||
isDictionaryForm: true,
|
||||
},
|
||||
};
|
||||
|
||||
/** @type {import('language-transformer').LanguageTransformDescriptor<keyof typeof conditions>} */
|
||||
export const latinTransforms = {
|
||||
language: 'la',
|
||||
conditions,
|
||||
transforms: {
|
||||
plural: {
|
||||
name: 'plural',
|
||||
description: 'Plural declension',
|
||||
rules: [
|
||||
suffixInflection('i', 'us', ['n2p'], ['n2s']),
|
||||
suffixInflection('i', 'us', ['adj12'], ['adj12']),
|
||||
suffixInflection('e', '', ['n1p'], ['n1s']),
|
||||
suffixInflection('ae', 'a', ['adj12'], ['adj12']),
|
||||
suffixInflection('a', 'um', ['adj12'], ['adj12']),
|
||||
],
|
||||
},
|
||||
feminine: {
|
||||
name: 'feminine',
|
||||
description: 'Adjective form',
|
||||
rules: [
|
||||
suffixInflection('a', 'us', ['adj12'], ['adj12']),
|
||||
],
|
||||
},
|
||||
neuter: {
|
||||
name: 'neuter',
|
||||
description: 'Adjective form',
|
||||
rules: [
|
||||
suffixInflection('um', 'us', ['adj12'], ['adj12']),
|
||||
],
|
||||
},
|
||||
ablative: {
|
||||
name: 'ablative',
|
||||
description: 'Ablative case',
|
||||
rules: [
|
||||
suffixInflection('o', 'um', ['n2s'], ['n2s']),
|
||||
],
|
||||
},
|
||||
},
|
||||
};
|
||||
559
vendor/yomitan/js/language/language-descriptors.js
vendored
Normal file
559
vendor/yomitan/js/language/language-descriptors.js
vendored
Normal file
@@ -0,0 +1,559 @@
|
||||
/*
|
||||
* Copyright (C) 2024-2025 Yomitan Authors
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
import {removeSyriacScriptDiacritics} from './aii/assyrian-neo-aramaic-text-preprocessors.js';
|
||||
import {
|
||||
addHamzaBottom,
|
||||
addHamzaTop,
|
||||
convertAlifMaqsuraToYaa,
|
||||
convertHaToTaMarbuta,
|
||||
normalizeUnicode,
|
||||
removeArabicScriptDiacritics,
|
||||
removeTatweel,
|
||||
} from './ar/arabic-text-preprocessors.js';
|
||||
import {arabicTransforms} from './ar/arabic-transforms.js';
|
||||
import {normalizeRadicalCharacters} from './CJK-util.js';
|
||||
import {eszettPreprocessor} from './de/german-text-preprocessors.js';
|
||||
import {germanTransforms} from './de/german-transforms.js';
|
||||
import {removeDoubleAcuteAccents} from './el/modern-greek-processors.js';
|
||||
import {englishTransforms} from './en/english-transforms.js';
|
||||
import {esperantoTransforms} from './eo/esperanto-transforms.js';
|
||||
import {spanishTransforms} from './es/spanish-transforms.js';
|
||||
import {apostropheVariants} from './fr/french-text-preprocessors.js';
|
||||
import {frenchTransforms} from './fr/french-transforms.js';
|
||||
import {irishTransforms} from './ga/irish-transforms.js';
|
||||
import {convertLatinToGreek} from './grc/ancient-greek-processors.js';
|
||||
import {ancientGreekTransforms} from './grc/ancient-greek-transforms.js';
|
||||
import {
|
||||
alphabeticToHiragana,
|
||||
alphanumericWidthVariants,
|
||||
collapseEmphaticSequences,
|
||||
convertHalfWidthCharacters,
|
||||
convertHiraganaToKatakana,
|
||||
normalizeCJKCompatibilityCharacters,
|
||||
normalizeCombiningCharacters,
|
||||
standardizeKanji,
|
||||
} from './ja/japanese-text-preprocessors.js';
|
||||
import {japaneseTransforms} from './ja/japanese-transforms.js';
|
||||
import {isStringPartiallyJapanese} from './ja/japanese.js';
|
||||
import {georgianTransforms} from './ka/georgian-transforms.js';
|
||||
import {disassembleHangul, reassembleHangul} from './ko/korean-text-processors.js';
|
||||
import {koreanTransforms} from './ko/korean-transforms.js';
|
||||
import {processDiphtongs} from './la/latin-text-preprocessors.js';
|
||||
import {latinTransforms} from './la/latin-transforms.js';
|
||||
import {removeRussianDiacritics, yoToE} from './ru/russian-text-preprocessors.js';
|
||||
import {oldIrishTransforms} from './sga/old-irish-transforms.js';
|
||||
import {removeSerboCroatianAccentMarks} from './sh/serbo-croatian-text-preprocessors.js';
|
||||
import {albanianTransforms} from './sq/albanian-transforms.js';
|
||||
import {capitalizeFirstLetter, decapitalize, removeAlphabeticDiacritics} from './text-processors.js';
|
||||
import {tagalogTransforms} from './tl/tagalog-transforms.js';
|
||||
import {normalizeDiacritics} from './vi/viet-text-preprocessors.js';
|
||||
import {convertFinalLetters, convertYiddishLigatures} from './yi/yiddish-text-postprocessors.js';
|
||||
import {combineYiddishLigatures, removeYiddishDiacritics} from './yi/yiddish-text-preprocessors.js';
|
||||
import {yiddishTransforms} from './yi/yiddish-transforms.js';
|
||||
import {isStringPartiallyChinese, normalizePinyin} from './zh/chinese.js';
|
||||
|
||||
const capitalizationPreprocessors = {
|
||||
decapitalize,
|
||||
capitalizeFirstLetter,
|
||||
};
|
||||
|
||||
/** @type {import('language-descriptors').LanguageDescriptorAny[]} */
|
||||
const languageDescriptors = [
|
||||
{
|
||||
iso: 'aii',
|
||||
iso639_3: 'aii',
|
||||
name: 'Assyrian Neo-Aramaic',
|
||||
exampleText: 'ܟܵܬܹܒ݂',
|
||||
textPreprocessors: {
|
||||
removeSyriacScriptDiacritics,
|
||||
},
|
||||
},
|
||||
{
|
||||
iso: 'ar',
|
||||
iso639_3: 'ara',
|
||||
name: 'Arabic (MSA)',
|
||||
exampleText: 'قَرَأَ',
|
||||
textPreprocessors: {
|
||||
removeArabicScriptDiacritics,
|
||||
removeTatweel,
|
||||
normalizeUnicode,
|
||||
addHamzaTop,
|
||||
addHamzaBottom,
|
||||
convertAlifMaqsuraToYaa,
|
||||
},
|
||||
languageTransforms: arabicTransforms,
|
||||
},
|
||||
{
|
||||
iso: 'arz',
|
||||
iso639_3: 'arz',
|
||||
name: 'Arabic (Egyptian)',
|
||||
exampleText: 'قَرَأَ',
|
||||
textPreprocessors: {
|
||||
removeArabicScriptDiacritics,
|
||||
removeTatweel,
|
||||
normalizeUnicode,
|
||||
addHamzaTop,
|
||||
addHamzaBottom,
|
||||
convertAlifMaqsuraToYaa,
|
||||
convertHaToTaMarbuta,
|
||||
},
|
||||
languageTransforms: arabicTransforms,
|
||||
},
|
||||
{
|
||||
iso: 'bg',
|
||||
iso639_3: 'bul',
|
||||
name: 'Bulgarian',
|
||||
exampleText: 'чета',
|
||||
textPreprocessors: capitalizationPreprocessors,
|
||||
},
|
||||
{
|
||||
iso: 'cs',
|
||||
iso639_3: 'ces',
|
||||
name: 'Czech',
|
||||
exampleText: 'číst',
|
||||
textPreprocessors: capitalizationPreprocessors,
|
||||
},
|
||||
{
|
||||
iso: 'da',
|
||||
iso639_3: 'dan',
|
||||
name: 'Danish',
|
||||
exampleText: 'læse',
|
||||
textPreprocessors: {
|
||||
...capitalizationPreprocessors,
|
||||
},
|
||||
},
|
||||
{
|
||||
iso: 'de',
|
||||
iso639_3: 'deu',
|
||||
name: 'German',
|
||||
exampleText: 'lesen',
|
||||
textPreprocessors: {
|
||||
...capitalizationPreprocessors,
|
||||
eszettPreprocessor,
|
||||
},
|
||||
languageTransforms: germanTransforms,
|
||||
},
|
||||
{
|
||||
iso: 'el',
|
||||
iso639_3: 'ell',
|
||||
name: 'Greek',
|
||||
exampleText: 'διαβάζω',
|
||||
textPreprocessors: {
|
||||
...capitalizationPreprocessors,
|
||||
removeDoubleAcuteAccents,
|
||||
},
|
||||
},
|
||||
{
|
||||
iso: 'en',
|
||||
iso639_3: 'eng',
|
||||
name: 'English',
|
||||
exampleText: 'read',
|
||||
textPreprocessors: capitalizationPreprocessors,
|
||||
languageTransforms: englishTransforms,
|
||||
},
|
||||
{
|
||||
iso: 'eo',
|
||||
iso639_3: 'epo',
|
||||
name: 'Esperanto',
|
||||
exampleText: 'legi',
|
||||
textPreprocessors: capitalizationPreprocessors,
|
||||
languageTransforms: esperantoTransforms,
|
||||
},
|
||||
{
|
||||
iso: 'es',
|
||||
iso639_3: 'spa',
|
||||
name: 'Spanish',
|
||||
exampleText: 'leer',
|
||||
textPreprocessors: capitalizationPreprocessors,
|
||||
languageTransforms: spanishTransforms,
|
||||
},
|
||||
{
|
||||
iso: 'et',
|
||||
iso639_3: 'est',
|
||||
name: 'Estonian',
|
||||
exampleText: 'lugema',
|
||||
textPreprocessors: capitalizationPreprocessors,
|
||||
},
|
||||
{
|
||||
iso: 'fa',
|
||||
iso639_3: 'fas',
|
||||
name: 'Persian',
|
||||
exampleText: 'خواندن',
|
||||
textPreprocessors: {
|
||||
removeArabicScriptDiacritics,
|
||||
},
|
||||
},
|
||||
{
|
||||
iso: 'fi',
|
||||
iso639_3: 'fin',
|
||||
name: 'Finnish',
|
||||
exampleText: 'lukea',
|
||||
textPreprocessors: capitalizationPreprocessors,
|
||||
},
|
||||
{
|
||||
iso: 'fr',
|
||||
iso639_3: 'fra',
|
||||
name: 'French',
|
||||
exampleText: 'lire',
|
||||
textPreprocessors: {
|
||||
...capitalizationPreprocessors,
|
||||
apostropheVariants,
|
||||
},
|
||||
languageTransforms: frenchTransforms,
|
||||
},
|
||||
{
|
||||
iso: 'ga',
|
||||
iso639_3: 'gle',
|
||||
name: 'Irish',
|
||||
exampleText: 'léigh',
|
||||
textPreprocessors: capitalizationPreprocessors,
|
||||
languageTransforms: irishTransforms,
|
||||
},
|
||||
{
|
||||
iso: 'grc',
|
||||
iso639_3: 'grc',
|
||||
name: 'Ancient Greek',
|
||||
exampleText: 'γράφω', /* 'to write' */
|
||||
textPreprocessors: {
|
||||
...capitalizationPreprocessors,
|
||||
removeAlphabeticDiacritics,
|
||||
convertLatinToGreek,
|
||||
},
|
||||
languageTransforms: ancientGreekTransforms,
|
||||
},
|
||||
{
|
||||
// no 2 letter iso for hawaiian
|
||||
iso: 'haw',
|
||||
iso639_3: 'haw',
|
||||
name: 'Hawaiian',
|
||||
exampleText: 'heluhelu',
|
||||
textPreprocessors: capitalizationPreprocessors,
|
||||
},
|
||||
{
|
||||
iso: 'he',
|
||||
iso639_3: 'heb',
|
||||
name: 'Hebrew',
|
||||
exampleText: 'קריאה',
|
||||
},
|
||||
{
|
||||
iso: 'hi',
|
||||
iso639_3: 'hin',
|
||||
name: 'Hindi',
|
||||
exampleText: 'पढ़ने के लिए',
|
||||
},
|
||||
{
|
||||
iso: 'hu',
|
||||
iso639_3: 'hun',
|
||||
name: 'Hungarian',
|
||||
exampleText: 'olvasni',
|
||||
textPreprocessors: capitalizationPreprocessors,
|
||||
},
|
||||
{
|
||||
iso: 'id',
|
||||
iso639_3: 'ind',
|
||||
name: 'Indonesian',
|
||||
exampleText: 'baca',
|
||||
textPreprocessors: {
|
||||
...capitalizationPreprocessors,
|
||||
removeAlphabeticDiacritics,
|
||||
},
|
||||
},
|
||||
{
|
||||
iso: 'it',
|
||||
iso639_3: 'ita',
|
||||
name: 'Italian',
|
||||
exampleText: 'leggere',
|
||||
textPreprocessors: {
|
||||
...capitalizationPreprocessors,
|
||||
removeAlphabeticDiacritics,
|
||||
},
|
||||
},
|
||||
{
|
||||
iso: 'la',
|
||||
iso639_3: 'lat',
|
||||
name: 'Latin',
|
||||
exampleText: 'legō',
|
||||
textPreprocessors: {
|
||||
...capitalizationPreprocessors,
|
||||
removeAlphabeticDiacritics,
|
||||
processDiphtongs,
|
||||
},
|
||||
languageTransforms: latinTransforms,
|
||||
},
|
||||
{
|
||||
iso: 'lo',
|
||||
iso639_3: 'lao',
|
||||
name: 'Lao',
|
||||
exampleText: 'ອ່ານ',
|
||||
},
|
||||
{
|
||||
iso: 'lv',
|
||||
iso639_3: 'lav',
|
||||
name: 'Latvian',
|
||||
exampleText: 'lasīt',
|
||||
textPreprocessors: capitalizationPreprocessors,
|
||||
},
|
||||
{
|
||||
iso: 'ja',
|
||||
iso639_3: 'jpn',
|
||||
name: 'Japanese',
|
||||
exampleText: '読め',
|
||||
isTextLookupWorthy: isStringPartiallyJapanese,
|
||||
textPreprocessors: {
|
||||
convertHalfWidthCharacters,
|
||||
alphabeticToHiragana,
|
||||
normalizeCombiningCharacters,
|
||||
normalizeCJKCompatibilityCharacters,
|
||||
normalizeRadicalCharacters,
|
||||
alphanumericWidthVariants,
|
||||
convertHiraganaToKatakana,
|
||||
collapseEmphaticSequences,
|
||||
standardizeKanji,
|
||||
},
|
||||
languageTransforms: japaneseTransforms,
|
||||
},
|
||||
{
|
||||
iso: 'ka',
|
||||
iso639_3: 'kat',
|
||||
name: 'Georgian',
|
||||
exampleText: 'კითხვა', // Georgian for “read”
|
||||
languageTransforms: georgianTransforms,
|
||||
},
|
||||
{
|
||||
iso: 'kn',
|
||||
iso639_3: 'kan',
|
||||
name: 'Kannada',
|
||||
exampleText: 'ಓದು',
|
||||
},
|
||||
{
|
||||
iso: 'km',
|
||||
iso639_3: 'khm',
|
||||
name: 'Khmer',
|
||||
exampleText: 'អាន',
|
||||
},
|
||||
{
|
||||
iso: 'ko',
|
||||
iso639_3: 'kor',
|
||||
name: 'Korean',
|
||||
exampleText: '읽어',
|
||||
textPreprocessors: {
|
||||
disassembleHangul,
|
||||
},
|
||||
textPostprocessors: {
|
||||
reassembleHangul,
|
||||
},
|
||||
languageTransforms: koreanTransforms,
|
||||
},
|
||||
{
|
||||
iso: 'mn',
|
||||
iso639_3: 'mon',
|
||||
name: 'Mongolian',
|
||||
exampleText: 'унших',
|
||||
textPreprocessors: capitalizationPreprocessors,
|
||||
},
|
||||
{
|
||||
iso: 'mt',
|
||||
iso639_3: 'mlt',
|
||||
name: 'Maltese',
|
||||
exampleText: 'kiteb',
|
||||
textPreprocessors: capitalizationPreprocessors,
|
||||
},
|
||||
{
|
||||
iso: 'nl',
|
||||
iso639_3: 'nld',
|
||||
name: 'Dutch',
|
||||
exampleText: 'lezen',
|
||||
textPreprocessors: capitalizationPreprocessors,
|
||||
},
|
||||
{
|
||||
iso: 'no',
|
||||
iso639_3: 'nor',
|
||||
name: 'Norwegian',
|
||||
exampleText: 'lese',
|
||||
textPreprocessors: {
|
||||
...capitalizationPreprocessors,
|
||||
},
|
||||
},
|
||||
{
|
||||
iso: 'pl',
|
||||
iso639_3: 'pol',
|
||||
name: 'Polish',
|
||||
exampleText: 'czytać',
|
||||
textPreprocessors: capitalizationPreprocessors,
|
||||
},
|
||||
{
|
||||
iso: 'pt',
|
||||
iso639_3: 'por',
|
||||
name: 'Portuguese',
|
||||
exampleText: 'ler',
|
||||
textPreprocessors: capitalizationPreprocessors,
|
||||
},
|
||||
{
|
||||
iso: 'ro',
|
||||
iso639_3: 'ron',
|
||||
name: 'Romanian',
|
||||
exampleText: 'citi',
|
||||
textPreprocessors: {
|
||||
...capitalizationPreprocessors,
|
||||
removeAlphabeticDiacritics,
|
||||
},
|
||||
},
|
||||
{
|
||||
iso: 'ru',
|
||||
iso639_3: 'rus',
|
||||
name: 'Russian',
|
||||
exampleText: 'читать',
|
||||
textPreprocessors: {
|
||||
...capitalizationPreprocessors,
|
||||
yoToE,
|
||||
removeRussianDiacritics,
|
||||
},
|
||||
},
|
||||
{
|
||||
iso: 'sga',
|
||||
iso639_3: 'sga',
|
||||
name: 'Old Irish',
|
||||
exampleText: 'légaid',
|
||||
textPreprocessors: {
|
||||
...capitalizationPreprocessors,
|
||||
removeAlphabeticDiacritics,
|
||||
},
|
||||
languageTransforms: oldIrishTransforms,
|
||||
},
|
||||
{
|
||||
iso: 'sh',
|
||||
iso639_3: 'hbs',
|
||||
name: 'Serbo-Croatian',
|
||||
exampleText: 'čìtati',
|
||||
textPreprocessors: {
|
||||
...capitalizationPreprocessors,
|
||||
removeSerboCroatianAccentMarks,
|
||||
},
|
||||
},
|
||||
{
|
||||
iso: 'sq',
|
||||
iso639_3: 'sqi',
|
||||
name: 'Albanian',
|
||||
exampleText: 'ndihmoj', /* 'to help' */
|
||||
textPreprocessors: capitalizationPreprocessors,
|
||||
languageTransforms: albanianTransforms,
|
||||
},
|
||||
{
|
||||
iso: 'sv',
|
||||
iso639_3: 'swe',
|
||||
name: 'Swedish',
|
||||
exampleText: 'läsa',
|
||||
textPreprocessors: capitalizationPreprocessors,
|
||||
},
|
||||
{
|
||||
iso: 'th',
|
||||
iso639_3: 'tha',
|
||||
name: 'Thai',
|
||||
exampleText: 'อ่าน',
|
||||
},
|
||||
{
|
||||
iso: 'tl',
|
||||
iso639_3: 'tgl',
|
||||
name: 'Tagalog',
|
||||
exampleText: 'basahin',
|
||||
textPreprocessors: {
|
||||
...capitalizationPreprocessors,
|
||||
removeAlphabeticDiacritics,
|
||||
},
|
||||
languageTransforms: tagalogTransforms,
|
||||
},
|
||||
{
|
||||
iso: 'tr',
|
||||
iso639_3: 'tur',
|
||||
name: 'Turkish',
|
||||
exampleText: 'okumak',
|
||||
textPreprocessors: capitalizationPreprocessors,
|
||||
},
|
||||
{
|
||||
iso: 'tok',
|
||||
iso639_3: 'tok',
|
||||
name: 'Toki Pona',
|
||||
exampleText: 'wile',
|
||||
textPreprocessors: capitalizationPreprocessors,
|
||||
},
|
||||
{
|
||||
iso: 'uk',
|
||||
iso639_3: 'ukr',
|
||||
name: 'Ukrainian',
|
||||
exampleText: 'читати',
|
||||
textPreprocessors: capitalizationPreprocessors,
|
||||
},
|
||||
{
|
||||
iso: 'vi',
|
||||
iso639_3: 'vie',
|
||||
name: 'Vietnamese',
|
||||
exampleText: 'đọc',
|
||||
textPreprocessors: {
|
||||
...capitalizationPreprocessors,
|
||||
normalizeDiacritics,
|
||||
},
|
||||
},
|
||||
{
|
||||
iso: 'cy',
|
||||
iso639_3: 'cym',
|
||||
name: 'Welsh',
|
||||
exampleText: 'ddarllen',
|
||||
textPreprocessors: capitalizationPreprocessors,
|
||||
},
|
||||
{
|
||||
iso: 'yi',
|
||||
iso639_3: 'yid',
|
||||
name: 'Yiddish',
|
||||
exampleText: 'באַשאַפֿן',
|
||||
textPreprocessors: {
|
||||
removeYiddishDiacritics,
|
||||
combineYiddishLigatures,
|
||||
},
|
||||
textPostprocessors: {
|
||||
convertFinalLetters,
|
||||
convertYiddishLigatures,
|
||||
},
|
||||
languageTransforms: yiddishTransforms,
|
||||
},
|
||||
{
|
||||
iso: 'yue',
|
||||
iso639_3: 'yue',
|
||||
name: 'Cantonese',
|
||||
exampleText: '讀',
|
||||
textPreprocessors: {
|
||||
normalizeRadicalCharacters,
|
||||
},
|
||||
},
|
||||
{
|
||||
iso: 'zh',
|
||||
iso639_3: 'zho',
|
||||
name: 'Chinese',
|
||||
exampleText: '读',
|
||||
isTextLookupWorthy: isStringPartiallyChinese,
|
||||
readingNormalizer: normalizePinyin,
|
||||
textPreprocessors: {
|
||||
normalizeRadicalCharacters,
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
/** @type {Map<string, import('language-descriptors').LanguageDescriptorAny>} */
|
||||
export const languageDescriptorMap = new Map();
|
||||
for (const languageDescriptor of languageDescriptors) {
|
||||
languageDescriptorMap.set(languageDescriptor.iso, languageDescriptor);
|
||||
}
|
||||
273
vendor/yomitan/js/language/language-transformer.js
vendored
Normal file
273
vendor/yomitan/js/language/language-transformer.js
vendored
Normal file
@@ -0,0 +1,273 @@
|
||||
/*
|
||||
* Copyright (C) 2024-2025 Yomitan Authors
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
import {log} from '../core/log.js';
|
||||
|
||||
export class LanguageTransformer {
|
||||
constructor() {
|
||||
/** @type {number} */
|
||||
this._nextFlagIndex = 0;
|
||||
/** @type {import('language-transformer-internal').Transform[]} */
|
||||
this._transforms = [];
|
||||
/** @type {Map<string, number>} */
|
||||
this._conditionTypeToConditionFlagsMap = new Map();
|
||||
/** @type {Map<string, number>} */
|
||||
this._partOfSpeechToConditionFlagsMap = new Map();
|
||||
}
|
||||
|
||||
/** */
|
||||
clear() {
|
||||
this._nextFlagIndex = 0;
|
||||
this._transforms = [];
|
||||
this._conditionTypeToConditionFlagsMap.clear();
|
||||
this._partOfSpeechToConditionFlagsMap.clear();
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {import('language-transformer').LanguageTransformDescriptor} descriptor
|
||||
* @throws {Error}
|
||||
*/
|
||||
addDescriptor(descriptor) {
|
||||
const {conditions, transforms} = descriptor;
|
||||
const conditionEntries = Object.entries(conditions);
|
||||
const {conditionFlagsMap, nextFlagIndex} = this._getConditionFlagsMap(conditionEntries, this._nextFlagIndex);
|
||||
|
||||
/** @type {import('language-transformer-internal').Transform[]} */
|
||||
const transforms2 = [];
|
||||
|
||||
for (const [transformId, transform] of Object.entries(transforms)) {
|
||||
const {name, description, rules} = transform;
|
||||
/** @type {import('language-transformer-internal').Rule[]} */
|
||||
const rules2 = [];
|
||||
for (let j = 0, jj = rules.length; j < jj; ++j) {
|
||||
const {type, isInflected, deinflect, conditionsIn, conditionsOut} = rules[j];
|
||||
const conditionFlagsIn = this._getConditionFlagsStrict(conditionFlagsMap, conditionsIn);
|
||||
if (conditionFlagsIn === null) { throw new Error(`Invalid conditionsIn for transform ${transformId}.rules[${j}]`); }
|
||||
const conditionFlagsOut = this._getConditionFlagsStrict(conditionFlagsMap, conditionsOut);
|
||||
if (conditionFlagsOut === null) { throw new Error(`Invalid conditionsOut for transform ${transformId}.rules[${j}]`); }
|
||||
rules2.push({
|
||||
type,
|
||||
isInflected,
|
||||
deinflect,
|
||||
conditionsIn: conditionFlagsIn,
|
||||
conditionsOut: conditionFlagsOut,
|
||||
});
|
||||
}
|
||||
const isInflectedTests = rules.map((rule) => rule.isInflected);
|
||||
const heuristic = new RegExp(isInflectedTests.map((regExp) => regExp.source).join('|'));
|
||||
transforms2.push({id: transformId, name, description, rules: rules2, heuristic});
|
||||
}
|
||||
|
||||
this._nextFlagIndex = nextFlagIndex;
|
||||
for (const transform of transforms2) {
|
||||
this._transforms.push(transform);
|
||||
}
|
||||
|
||||
for (const [type, {isDictionaryForm}] of conditionEntries) {
|
||||
const flags = conditionFlagsMap.get(type);
|
||||
if (typeof flags === 'undefined') { continue; } // This case should never happen
|
||||
this._conditionTypeToConditionFlagsMap.set(type, flags);
|
||||
if (isDictionaryForm) {
|
||||
this._partOfSpeechToConditionFlagsMap.set(type, flags);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string[]} partsOfSpeech
|
||||
* @returns {number}
|
||||
*/
|
||||
getConditionFlagsFromPartsOfSpeech(partsOfSpeech) {
|
||||
return this._getConditionFlags(this._partOfSpeechToConditionFlagsMap, partsOfSpeech);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string[]} conditionTypes
|
||||
* @returns {number}
|
||||
*/
|
||||
getConditionFlagsFromConditionTypes(conditionTypes) {
|
||||
return this._getConditionFlags(this._conditionTypeToConditionFlagsMap, conditionTypes);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} conditionType
|
||||
* @returns {number}
|
||||
*/
|
||||
getConditionFlagsFromConditionType(conditionType) {
|
||||
return this._getConditionFlags(this._conditionTypeToConditionFlagsMap, [conditionType]);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} sourceText
|
||||
* @returns {import('language-transformer-internal').TransformedText[]}
|
||||
*/
|
||||
transform(sourceText) {
|
||||
const results = [LanguageTransformer.createTransformedText(sourceText, 0, [])];
|
||||
for (let i = 0; i < results.length; ++i) {
|
||||
const {text, conditions, trace} = results[i];
|
||||
for (const transform of this._transforms) {
|
||||
if (!transform.heuristic.test(text)) { continue; }
|
||||
|
||||
const {id, rules} = transform;
|
||||
for (let j = 0, jj = rules.length; j < jj; ++j) {
|
||||
const rule = rules[j];
|
||||
if (!LanguageTransformer.conditionsMatch(conditions, rule.conditionsIn)) { continue; }
|
||||
const {isInflected, deinflect} = rule;
|
||||
if (!isInflected.test(text)) { continue; }
|
||||
|
||||
const isCycle = trace.some((frame) => frame.transform === id && frame.ruleIndex === j && frame.text === text);
|
||||
if (isCycle) {
|
||||
log.warn(new Error(`Cycle detected in transform[${id}] rule[${j}] for text: ${text}\nTrace: ${JSON.stringify(trace)}`));
|
||||
continue;
|
||||
}
|
||||
|
||||
results.push(LanguageTransformer.createTransformedText(
|
||||
deinflect(text),
|
||||
rule.conditionsOut,
|
||||
this._extendTrace(trace, {transform: id, ruleIndex: j, text}),
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string[]} inflectionRules
|
||||
* @returns {import('dictionary').InflectionRuleChain}
|
||||
*/
|
||||
getUserFacingInflectionRules(inflectionRules) {
|
||||
return inflectionRules.map((rule) => {
|
||||
const fullRule = this._transforms.find((transform) => transform.id === rule);
|
||||
if (typeof fullRule === 'undefined') { return {name: rule}; }
|
||||
const {name, description} = fullRule;
|
||||
return description ? {name, description} : {name};
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} text
|
||||
* @param {number} conditions
|
||||
* @param {import('language-transformer-internal').Trace} trace
|
||||
* @returns {import('language-transformer-internal').TransformedText}
|
||||
*/
|
||||
static createTransformedText(text, conditions, trace) {
|
||||
return {text, conditions, trace};
|
||||
}
|
||||
|
||||
/**
|
||||
* If `currentConditions` is `0`, then `nextConditions` is ignored and `true` is returned.
|
||||
* Otherwise, there must be at least one shared condition between `currentConditions` and `nextConditions`.
|
||||
* @param {number} currentConditions
|
||||
* @param {number} nextConditions
|
||||
* @returns {boolean}
|
||||
*/
|
||||
static conditionsMatch(currentConditions, nextConditions) {
|
||||
return currentConditions === 0 || (currentConditions & nextConditions) !== 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {import('language-transformer').ConditionMapEntries} conditions
|
||||
* @param {number} nextFlagIndex
|
||||
* @returns {{conditionFlagsMap: Map<string, number>, nextFlagIndex: number}}
|
||||
* @throws {Error}
|
||||
*/
|
||||
_getConditionFlagsMap(conditions, nextFlagIndex) {
|
||||
/** @type {Map<string, number>} */
|
||||
const conditionFlagsMap = new Map();
|
||||
/** @type {import('language-transformer').ConditionMapEntries} */
|
||||
let targets = conditions;
|
||||
while (targets.length > 0) {
|
||||
const nextTargets = [];
|
||||
for (const target of targets) {
|
||||
const [type, condition] = target;
|
||||
const {subConditions} = condition;
|
||||
let flags = 0;
|
||||
if (typeof subConditions === 'undefined') {
|
||||
if (nextFlagIndex >= 32) {
|
||||
// Flags greater than or equal to 32 don't work because JavaScript only supports up to 32-bit integer operations
|
||||
throw new Error('Maximum number of conditions was exceeded');
|
||||
}
|
||||
flags = 1 << nextFlagIndex;
|
||||
++nextFlagIndex;
|
||||
} else {
|
||||
const multiFlags = this._getConditionFlagsStrict(conditionFlagsMap, subConditions);
|
||||
if (multiFlags === null) {
|
||||
nextTargets.push(target);
|
||||
continue;
|
||||
} else {
|
||||
flags = multiFlags;
|
||||
}
|
||||
}
|
||||
conditionFlagsMap.set(type, flags);
|
||||
}
|
||||
if (nextTargets.length === targets.length) {
|
||||
// Cycle in subRule declaration
|
||||
throw new Error('Maximum number of conditions was exceeded');
|
||||
}
|
||||
targets = nextTargets;
|
||||
}
|
||||
return {conditionFlagsMap, nextFlagIndex};
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {Map<string, number>} conditionFlagsMap
|
||||
* @param {string[]} conditionTypes
|
||||
* @returns {?number}
|
||||
*/
|
||||
_getConditionFlagsStrict(conditionFlagsMap, conditionTypes) {
|
||||
let flags = 0;
|
||||
for (const conditionType of conditionTypes) {
|
||||
const flags2 = conditionFlagsMap.get(conditionType);
|
||||
if (typeof flags2 === 'undefined') {
|
||||
return null;
|
||||
}
|
||||
flags |= flags2;
|
||||
}
|
||||
return flags;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {Map<string, number>} conditionFlagsMap
|
||||
* @param {string[]} conditionTypes
|
||||
* @returns {number}
|
||||
*/
|
||||
_getConditionFlags(conditionFlagsMap, conditionTypes) {
|
||||
let flags = 0;
|
||||
for (const conditionType of conditionTypes) {
|
||||
let flags2 = conditionFlagsMap.get(conditionType);
|
||||
if (typeof flags2 === 'undefined') {
|
||||
flags2 = 0;
|
||||
}
|
||||
flags |= flags2;
|
||||
}
|
||||
return flags;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {import('language-transformer-internal').Trace} trace
|
||||
* @param {import('language-transformer-internal').TraceFrame} newFrame
|
||||
* @returns {import('language-transformer-internal').Trace}
|
||||
*/
|
||||
_extendTrace(trace, newFrame) {
|
||||
const newTrace = [newFrame];
|
||||
for (const {transform, ruleIndex, text} of trace) {
|
||||
newTrace.push({transform, ruleIndex, text});
|
||||
}
|
||||
return newTrace;
|
||||
}
|
||||
}
|
||||
75
vendor/yomitan/js/language/language-transforms.js
vendored
Normal file
75
vendor/yomitan/js/language/language-transforms.js
vendored
Normal file
@@ -0,0 +1,75 @@
|
||||
/*
|
||||
* Copyright (C) 2024-2025 Yomitan Authors
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @template {string} TCondition
|
||||
* @param {string} inflectedSuffix
|
||||
* @param {string} deinflectedSuffix
|
||||
* @param {TCondition[]} conditionsIn
|
||||
* @param {TCondition[]} conditionsOut
|
||||
* @returns {import('language-transformer').SuffixRule<TCondition>}
|
||||
*/
|
||||
export function suffixInflection(inflectedSuffix, deinflectedSuffix, conditionsIn, conditionsOut) {
|
||||
const suffixRegExp = new RegExp(inflectedSuffix + '$');
|
||||
return {
|
||||
type: 'suffix',
|
||||
isInflected: suffixRegExp,
|
||||
deinflected: deinflectedSuffix,
|
||||
deinflect: (text) => text.slice(0, -inflectedSuffix.length) + deinflectedSuffix,
|
||||
conditionsIn,
|
||||
conditionsOut,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* @template {string} TCondition
|
||||
* @param {string} inflectedPrefix
|
||||
* @param {string} deinflectedPrefix
|
||||
* @param {TCondition[]} conditionsIn
|
||||
* @param {TCondition[]} conditionsOut
|
||||
* @returns {import('language-transformer').Rule<TCondition>}
|
||||
*/
|
||||
export function prefixInflection(inflectedPrefix, deinflectedPrefix, conditionsIn, conditionsOut) {
|
||||
const prefixRegExp = new RegExp('^' + inflectedPrefix);
|
||||
return {
|
||||
type: 'prefix',
|
||||
isInflected: prefixRegExp,
|
||||
deinflect: (text) => deinflectedPrefix + text.slice(inflectedPrefix.length),
|
||||
conditionsIn,
|
||||
conditionsOut,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* @template {string} TCondition
|
||||
* @param {string} inflectedWord
|
||||
* @param {string} deinflectedWord
|
||||
* @param {TCondition[]} conditionsIn
|
||||
* @param {TCondition[]} conditionsOut
|
||||
* @returns {import('language-transformer').Rule<TCondition>}
|
||||
*/
|
||||
export function wholeWordInflection(inflectedWord, deinflectedWord, conditionsIn, conditionsOut) {
|
||||
const regex = new RegExp('^' + inflectedWord + '$');
|
||||
return {
|
||||
type: 'wholeWord',
|
||||
isInflected: regex,
|
||||
deinflect: () => deinflectedWord,
|
||||
conditionsIn,
|
||||
conditionsOut,
|
||||
};
|
||||
}
|
||||
93
vendor/yomitan/js/language/languages.js
vendored
Executable file
93
vendor/yomitan/js/language/languages.js
vendored
Executable file
@@ -0,0 +1,93 @@
|
||||
/*
|
||||
* Copyright (C) 2024-2025 Yomitan Authors
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
import {languageDescriptorMap} from './language-descriptors.js';
|
||||
|
||||
/**
|
||||
* @returns {import('language').LanguageSummary[]}
|
||||
*/
|
||||
export function getLanguageSummaries() {
|
||||
const results = [];
|
||||
for (const {name, iso, iso639_3, exampleText} of languageDescriptorMap.values()) {
|
||||
results.push({name, iso, iso639_3, exampleText});
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
/**
|
||||
* @returns {import('language').LanguageAndReadingNormalizer[]}
|
||||
*/
|
||||
export function getAllLanguageReadingNormalizers() {
|
||||
const results = [];
|
||||
for (const {iso, readingNormalizer} of languageDescriptorMap.values()) {
|
||||
if (typeof readingNormalizer === 'undefined') { continue; }
|
||||
results.push({iso, readingNormalizer});
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
/**
|
||||
* @returns {import('language').LanguageAndProcessors[]}
|
||||
* @throws {Error}
|
||||
*/
|
||||
export function getAllLanguageTextProcessors() {
|
||||
const results = [];
|
||||
for (const {iso, textPreprocessors = {}, textPostprocessors = {}} of languageDescriptorMap.values()) {
|
||||
/** @type {import('language').TextProcessorWithId<unknown>[]} */
|
||||
const textPreprocessorsArray = [];
|
||||
for (const [id, textPreprocessor] of Object.entries(textPreprocessors)) {
|
||||
textPreprocessorsArray.push({
|
||||
id,
|
||||
textProcessor: /** @type {import('language').TextProcessor<unknown>} */ (textPreprocessor),
|
||||
});
|
||||
}
|
||||
/** @type {import('language').TextProcessorWithId<unknown>[]} */
|
||||
const textPostprocessorsArray = [];
|
||||
for (const [id, textPostprocessor] of Object.entries(textPostprocessors)) {
|
||||
textPostprocessorsArray.push({
|
||||
id,
|
||||
textProcessor: /** @type {import('language').TextProcessor<unknown>} */ (textPostprocessor),
|
||||
});
|
||||
}
|
||||
results.push({iso, textPreprocessors: textPreprocessorsArray, textPostprocessors: textPostprocessorsArray});
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} text
|
||||
* @param {string} language
|
||||
* @returns {boolean}
|
||||
*/
|
||||
export function isTextLookupWorthy(text, language) {
|
||||
const descriptor = languageDescriptorMap.get(language);
|
||||
if (typeof descriptor === 'undefined') { return false; }
|
||||
return typeof descriptor.isTextLookupWorthy === 'undefined' || descriptor.isTextLookupWorthy(text);
|
||||
}
|
||||
|
||||
/**
|
||||
* @returns {import('language').LanguageAndTransforms[]}
|
||||
*/
|
||||
export function getAllLanguageTransformDescriptors() {
|
||||
const results = [];
|
||||
for (const {iso, languageTransforms} of languageDescriptorMap.values()) {
|
||||
if (languageTransforms) {
|
||||
results.push({iso, languageTransforms});
|
||||
}
|
||||
}
|
||||
return results;
|
||||
}
|
||||
90
vendor/yomitan/js/language/multi-language-transformer.js
vendored
Normal file
90
vendor/yomitan/js/language/multi-language-transformer.js
vendored
Normal file
@@ -0,0 +1,90 @@
|
||||
/*
|
||||
* Copyright (C) 2024-2025 Yomitan Authors
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
import {LanguageTransformer} from './language-transformer.js';
|
||||
import {getAllLanguageTransformDescriptors} from './languages.js';
|
||||
|
||||
export class MultiLanguageTransformer {
|
||||
constructor() {
|
||||
/** @type {Map<string, LanguageTransformer>} */
|
||||
this._languageTransformers = new Map();
|
||||
}
|
||||
|
||||
/** */
|
||||
prepare() {
|
||||
const languagesWithTransforms = getAllLanguageTransformDescriptors();
|
||||
for (const {languageTransforms: descriptor} of languagesWithTransforms) {
|
||||
const languageTransformer = new LanguageTransformer();
|
||||
languageTransformer.addDescriptor(descriptor);
|
||||
this._languageTransformers.set(descriptor.language, languageTransformer);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} language
|
||||
* @param {string[]} partsOfSpeech
|
||||
* @returns {number}
|
||||
*/
|
||||
getConditionFlagsFromPartsOfSpeech(language, partsOfSpeech) {
|
||||
const languageTransformer = this._languageTransformers.get(language);
|
||||
return typeof languageTransformer !== 'undefined' ? languageTransformer.getConditionFlagsFromPartsOfSpeech(partsOfSpeech) : 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} language
|
||||
* @param {string[]} conditionTypes
|
||||
* @returns {number}
|
||||
*/
|
||||
getConditionFlagsFromConditionTypes(language, conditionTypes) {
|
||||
const languageTransformer = this._languageTransformers.get(language);
|
||||
return typeof languageTransformer !== 'undefined' ? languageTransformer.getConditionFlagsFromConditionTypes(conditionTypes) : 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} language
|
||||
* @param {string} conditionType
|
||||
* @returns {number}
|
||||
*/
|
||||
getConditionFlagsFromConditionType(language, conditionType) {
|
||||
const languageTransformer = this._languageTransformers.get(language);
|
||||
return typeof languageTransformer !== 'undefined' ? languageTransformer.getConditionFlagsFromConditionType(conditionType) : 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} language
|
||||
* @param {string} sourceText
|
||||
* @returns {import('language-transformer-internal').TransformedText[]}
|
||||
*/
|
||||
transform(language, sourceText) {
|
||||
const languageTransformer = this._languageTransformers.get(language);
|
||||
if (typeof languageTransformer === 'undefined') { return [LanguageTransformer.createTransformedText(sourceText, 0, [])]; }
|
||||
return languageTransformer.transform(sourceText);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} language
|
||||
* @param {string[]} inflectionRules
|
||||
* @returns {import('dictionary').InflectionRuleChain}
|
||||
*/
|
||||
getUserFacingInflectionRules(language, inflectionRules) {
|
||||
const languageTransformer = this._languageTransformers.get(language);
|
||||
if (typeof languageTransformer === 'undefined') {
|
||||
return inflectionRules.map((rule) => ({name: rule}));
|
||||
}
|
||||
return languageTransformer.getUserFacingInflectionRules(inflectionRules);
|
||||
}
|
||||
}
|
||||
45
vendor/yomitan/js/language/ru/russian-text-preprocessors.js
vendored
Normal file
45
vendor/yomitan/js/language/ru/russian-text-preprocessors.js
vendored
Normal file
@@ -0,0 +1,45 @@
|
||||
/*
|
||||
* Copyright (C) 2024-2025 Yomitan Authors
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
import {basicTextProcessorOptions} from '../text-processors.js';
|
||||
|
||||
/** @type {import('language').TextProcessor<boolean>} */
|
||||
export const removeRussianDiacritics = {
|
||||
name: 'Remove diacritics',
|
||||
description: 'A\u0301 → A, a\u0301 → a',
|
||||
options: basicTextProcessorOptions,
|
||||
process: (str, setting) => {
|
||||
return setting ? str.replace(/\u0301/g, '') : str;
|
||||
},
|
||||
};
|
||||
|
||||
/** @type {import('language').BidirectionalConversionPreprocessor} */
|
||||
export const yoToE = {
|
||||
name: 'Convert "ё" to "е"',
|
||||
description: 'ё → е, Ё → Е and vice versa',
|
||||
options: ['off', 'direct', 'inverse'],
|
||||
process: (str, setting) => {
|
||||
switch (setting) {
|
||||
case 'off':
|
||||
return str;
|
||||
case 'direct':
|
||||
return str.replace(/ё/g, 'е').replace(/Ё/g, 'Е');
|
||||
case 'inverse':
|
||||
return str.replace(/е/g, 'ё').replace(/Е/g, 'Ё');
|
||||
}
|
||||
},
|
||||
};
|
||||
209
vendor/yomitan/js/language/sga/old-irish-transforms.js
vendored
Normal file
209
vendor/yomitan/js/language/sga/old-irish-transforms.js
vendored
Normal file
@@ -0,0 +1,209 @@
|
||||
/*
|
||||
* Copyright (C) 2024-2025 Yomitan Authors
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
import {prefixInflection, suffixInflection} from '../language-transforms.js';
|
||||
|
||||
/** @typedef {keyof typeof conditions} Condition */
|
||||
|
||||
/**
|
||||
* @param {boolean} notBeginning
|
||||
* @param {string} originalOrthography
|
||||
* @param {string} alternateOrthography
|
||||
* @param {Condition[]} conditionsIn
|
||||
* @param {Condition[]} conditionsOut
|
||||
* @returns {import('language-transformer').Rule<Condition>}
|
||||
*/
|
||||
function tryAlternateOrthography(notBeginning, originalOrthography, alternateOrthography, conditionsIn, conditionsOut) {
|
||||
const orthographyRegExp = notBeginning ? new RegExp('(?<!^)' + originalOrthography, 'g') : new RegExp(originalOrthography, 'g');
|
||||
return {
|
||||
type: 'other',
|
||||
isInflected: orthographyRegExp,
|
||||
deinflect: (text) => text.replace(orthographyRegExp, alternateOrthography),
|
||||
conditionsIn,
|
||||
conditionsOut,
|
||||
};
|
||||
}
|
||||
|
||||
const conditions = {};
|
||||
|
||||
/** @type {import('language-transformer').LanguageTransformDescriptor<Condition>} */
|
||||
export const oldIrishTransforms = {
|
||||
language: 'sga',
|
||||
conditions,
|
||||
transforms: {
|
||||
'nd for nn': {
|
||||
name: 'nd for nn',
|
||||
description: 'nd for nn',
|
||||
rules: [
|
||||
suffixInflection('nd', 'nn', [], []),
|
||||
],
|
||||
},
|
||||
'cg for c': {
|
||||
name: 'cg for c',
|
||||
description: 'cg for c',
|
||||
rules: [
|
||||
tryAlternateOrthography(false, 'cg', 'c', [], []),
|
||||
],
|
||||
},
|
||||
'td for t': {
|
||||
name: 'td for t',
|
||||
description: 'td for t',
|
||||
rules: [
|
||||
tryAlternateOrthography(false, 'td', 't', [], []),
|
||||
],
|
||||
},
|
||||
'pb for p': {
|
||||
name: 'pb for p',
|
||||
description: 'pb for p',
|
||||
rules: [
|
||||
tryAlternateOrthography(false, 'pb', 'p', [], []),
|
||||
],
|
||||
},
|
||||
'ǽ/æ for é': {
|
||||
name: 'ǽ/æ for é',
|
||||
description: 'ǽ/æ for é',
|
||||
rules: [
|
||||
tryAlternateOrthography(false, 'ǽ', 'é', [], []),
|
||||
tryAlternateOrthography(false, 'æ', 'é', [], []),
|
||||
],
|
||||
},
|
||||
'doubled vowel': {
|
||||
name: 'doubled vowel',
|
||||
description: 'Doubled Vowel',
|
||||
rules: [
|
||||
tryAlternateOrthography(true, 'aa', 'á', [], []),
|
||||
tryAlternateOrthography(true, 'ee', 'é', [], []),
|
||||
tryAlternateOrthography(true, 'ii', 'í', [], []),
|
||||
tryAlternateOrthography(true, 'oo', 'ó', [], []),
|
||||
tryAlternateOrthography(true, 'uu', 'ú', [], []),
|
||||
],
|
||||
},
|
||||
'doubled consonant': {
|
||||
name: 'doubled consonant',
|
||||
description: 'Doubled Consonant',
|
||||
rules: [
|
||||
tryAlternateOrthography(true, 'cc', 'c', [], []),
|
||||
tryAlternateOrthography(true, 'pp', 'p', [], []),
|
||||
tryAlternateOrthography(true, 'tt', 't', [], []),
|
||||
tryAlternateOrthography(true, 'gg', 'g', [], []),
|
||||
tryAlternateOrthography(true, 'bb', 'b', [], []),
|
||||
tryAlternateOrthography(true, 'dd', 'd', [], []),
|
||||
tryAlternateOrthography(true, 'rr', 'r', [], []),
|
||||
tryAlternateOrthography(true, 'll', 'l', [], []),
|
||||
tryAlternateOrthography(true, 'nn', 'n', [], []),
|
||||
tryAlternateOrthography(true, 'mm', 'm', [], []),
|
||||
tryAlternateOrthography(true, 'ss', 's', [], []),
|
||||
],
|
||||
},
|
||||
'lenited': {
|
||||
name: 'lenited',
|
||||
description: 'Non-Beginning Lenition',
|
||||
rules: [
|
||||
tryAlternateOrthography(true, 'ch', 'c', [], []),
|
||||
tryAlternateOrthography(true, 'ph', 'p', [], []),
|
||||
tryAlternateOrthography(true, 'th', 't', [], []),
|
||||
],
|
||||
},
|
||||
'lenited (Middle Irish)': {
|
||||
name: 'lenited (Middle Irish)',
|
||||
description: 'Non-Beginning Lenition (Middle Irish)',
|
||||
rules: [
|
||||
tryAlternateOrthography(true, 'gh', 'g', [], []),
|
||||
tryAlternateOrthography(true, 'bh', 'b', [], []),
|
||||
tryAlternateOrthography(true, 'dh', 'd', [], []),
|
||||
],
|
||||
},
|
||||
'[IM] nasalized': {
|
||||
name: '[IM] nasalized',
|
||||
description: 'Nasalized Word',
|
||||
rules: [
|
||||
prefixInflection('ng', 'g', [], []),
|
||||
prefixInflection('mb', 'b', [], []),
|
||||
prefixInflection('nd', 'd', [], []),
|
||||
prefixInflection('n-', '', [], []),
|
||||
prefixInflection('m-', '', [], []),
|
||||
],
|
||||
},
|
||||
'[IM] nasalized (Middle Irish)': {
|
||||
name: '[IM] nasalized (Middle Irish)',
|
||||
description: 'Nasalized Word (Middle Irish)',
|
||||
rules: [
|
||||
prefixInflection('gc', 'c', [], []),
|
||||
prefixInflection('bp', 'p', [], []),
|
||||
prefixInflection('dt', 'd', [], []),
|
||||
],
|
||||
},
|
||||
'[IM] lenited': {
|
||||
name: '[IM] lenited',
|
||||
description: 'Lenited Word',
|
||||
rules: [
|
||||
prefixInflection('ch', 'c', [], []),
|
||||
prefixInflection('ph', 'p', [], []),
|
||||
prefixInflection('th', 't', [], []),
|
||||
],
|
||||
},
|
||||
'[IM] lenited (Middle Irish)': {
|
||||
name: '[IM] lenited (Middle Irish)',
|
||||
description: 'Lenited Word (Middle Irish)',
|
||||
rules: [
|
||||
prefixInflection('gh', 'g', [], []),
|
||||
prefixInflection('bh', 'b', [], []),
|
||||
prefixInflection('dh', 'd', [], []),
|
||||
],
|
||||
},
|
||||
'[IM] aspirated': {
|
||||
name: '[IM] aspirated',
|
||||
description: 'Aspirated Word',
|
||||
rules: [
|
||||
prefixInflection('ha', 'a', [], []),
|
||||
prefixInflection('he', 'e', [], []),
|
||||
prefixInflection('hi', 'i', [], []),
|
||||
prefixInflection('ho', 'o', [], []),
|
||||
prefixInflection('hu', 'u', [], []),
|
||||
prefixInflection('h-', '', [], []),
|
||||
],
|
||||
},
|
||||
'[IM] geminated': {
|
||||
name: '[IM] geminated',
|
||||
description: 'Geminated Word',
|
||||
rules: [
|
||||
prefixInflection('cc', 'c', [], []),
|
||||
prefixInflection('pp', 'p', [], []),
|
||||
prefixInflection('tt', 't', [], []),
|
||||
prefixInflection('gg', 'g', [], []),
|
||||
prefixInflection('bb', 'b', [], []),
|
||||
prefixInflection('dd', 'd', [], []),
|
||||
prefixInflection('rr', 'r', [], []),
|
||||
prefixInflection('ll', 'l', [], []),
|
||||
prefixInflection('nn', 'n', [], []),
|
||||
prefixInflection('mm', 'm', [], []),
|
||||
prefixInflection('ss', 's', [], []),
|
||||
prefixInflection('c-c', 'c', [], []),
|
||||
prefixInflection('p-p', 'p', [], []),
|
||||
prefixInflection('t-t', 't', [], []),
|
||||
prefixInflection('g-g', 'g', [], []),
|
||||
prefixInflection('b-b', 'b', [], []),
|
||||
prefixInflection('d-d', 'd', [], []),
|
||||
prefixInflection('r-r', 'r', [], []),
|
||||
prefixInflection('l-l', 'l', [], []),
|
||||
prefixInflection('n-n', 'n', [], []),
|
||||
prefixInflection('m-m', 'm', [], []),
|
||||
prefixInflection('s-s', 's', [], []),
|
||||
],
|
||||
},
|
||||
},
|
||||
};
|
||||
31
vendor/yomitan/js/language/sh/serbo-croatian-text-preprocessors.js
vendored
Normal file
31
vendor/yomitan/js/language/sh/serbo-croatian-text-preprocessors.js
vendored
Normal file
@@ -0,0 +1,31 @@
|
||||
/*
|
||||
* Copyright (C) 2024-2025 Yomitan Authors
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
import {basicTextProcessorOptions} from '../text-processors.js';
|
||||
|
||||
/** @type {import('language').TextProcessor<boolean>} */
|
||||
export const removeSerboCroatianAccentMarks = {
|
||||
name: 'Remove diacritics',
|
||||
description: 'A\u0301 → A, a\u0301 → a',
|
||||
options: basicTextProcessorOptions,
|
||||
process: (str, setting) => (
|
||||
setting ?
|
||||
str.normalize('NFD').replace(/[aeiourAEIOUR][\u0300-\u036f]/g, (match) => match[0]) :
|
||||
str
|
||||
),
|
||||
|
||||
};
|
||||
355
vendor/yomitan/js/language/sq/albanian-transforms.js
vendored
Normal file
355
vendor/yomitan/js/language/sq/albanian-transforms.js
vendored
Normal file
@@ -0,0 +1,355 @@
|
||||
/*
|
||||
* Copyright (C) 2024-2025 Yomitan Authors
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
import {suffixInflection} from '../language-transforms.js';
|
||||
|
||||
/** @typedef {keyof typeof conditions} Condition */
|
||||
|
||||
/**
|
||||
* @param {string} inflectedSuffix
|
||||
* @param {string} deinflectedSuffix
|
||||
* @param {Condition[]} conditionsIn
|
||||
* @param {Condition[]} conditionsOut
|
||||
* @returns {import('language-transformer').Rule<Condition>}
|
||||
*/
|
||||
function conjugationIISuffixInflection(inflectedSuffix, deinflectedSuffix, conditionsIn, conditionsOut) {
|
||||
return {
|
||||
...suffixInflection(inflectedSuffix, deinflectedSuffix, conditionsIn, conditionsOut),
|
||||
type: 'other',
|
||||
isInflected: new RegExp('.*[^j]' + inflectedSuffix + '$'),
|
||||
};
|
||||
}
|
||||
|
||||
const conditions = {
|
||||
v: {
|
||||
name: 'Verb',
|
||||
isDictionaryForm: true,
|
||||
},
|
||||
n: {
|
||||
name: 'Noun',
|
||||
isDictionaryForm: true,
|
||||
subConditions: ['np', 'ns'],
|
||||
},
|
||||
np: {
|
||||
name: 'Noun plural',
|
||||
isDictionaryForm: true,
|
||||
},
|
||||
ns: {
|
||||
name: 'Noun singular',
|
||||
isDictionaryForm: true,
|
||||
},
|
||||
adj: {
|
||||
name: 'Adjective',
|
||||
isDictionaryForm: true,
|
||||
},
|
||||
adv: {
|
||||
name: 'Adverb',
|
||||
isDictionaryForm: true,
|
||||
},
|
||||
};
|
||||
|
||||
/** @type {import('language-transformer').LanguageTransformDescriptor<Condition>} */
|
||||
export const albanianTransforms = {
|
||||
language: 'sq',
|
||||
conditions,
|
||||
transforms: {
|
||||
// Nouns
|
||||
'definite': {
|
||||
name: 'definite',
|
||||
description: 'Definite form of a noun',
|
||||
rules: [
|
||||
// Masculine
|
||||
suffixInflection('ku', 'k', [], ['n']),
|
||||
suffixInflection('gu', 'g', [], ['n']),
|
||||
suffixInflection('hu', 'h', [], ['n']),
|
||||
suffixInflection('au', 'a', [], ['n']),
|
||||
suffixInflection('iu', 'i', [], ['n']),
|
||||
suffixInflection('eu', 'e', [], ['n']),
|
||||
suffixInflection('i', 'ë', [], ['n']),
|
||||
suffixInflection('i', '', [], ['n']),
|
||||
suffixInflection('ri', '', [], ['n']),
|
||||
suffixInflection('oi', 'ua', [], ['n']),
|
||||
// Feminine
|
||||
suffixInflection('a', 'ë', [], ['n']),
|
||||
suffixInflection('a', '', [], ['n']),
|
||||
suffixInflection('ja', 'e', [], ['n']),
|
||||
],
|
||||
},
|
||||
'singular definite accusative': {
|
||||
name: 'singular definite accusative',
|
||||
description: 'Singular definite accusative form of a noun',
|
||||
rules: [
|
||||
suffixInflection('n', '', [], ['n']),
|
||||
],
|
||||
},
|
||||
'plural': {
|
||||
name: 'plural',
|
||||
description: 'Plural form of a noun',
|
||||
rules: [
|
||||
suffixInflection('e', '', ['np'], ['ns']),
|
||||
suffixInflection('t', '', ['np'], ['ns']),
|
||||
],
|
||||
},
|
||||
// Verbs
|
||||
'present indicative second-person singular': {
|
||||
name: 'present indicative second-person singular',
|
||||
description: 'Present indicative second-person singular form of a verb',
|
||||
rules: [
|
||||
suffixInflection('on', 'oj', [], ['v']),
|
||||
suffixInflection('uan', 'uaj', [], ['v']),
|
||||
suffixInflection('n', 'j', [], ['v']),
|
||||
suffixInflection('hesh', 'hem', [], ['v']),
|
||||
],
|
||||
},
|
||||
'present indicative third-person singular': {
|
||||
name: 'present indicative third-person singular',
|
||||
description: 'Present indicative third-person singular form of a verb',
|
||||
rules: [
|
||||
suffixInflection('on', 'oj', [], ['v']),
|
||||
suffixInflection('uan', 'uaj', [], ['v']),
|
||||
suffixInflection('n', 'j', [], ['v']),
|
||||
suffixInflection('het', 'hem', [], ['v']),
|
||||
],
|
||||
},
|
||||
'present indicative first-person plural': {
|
||||
name: 'present indicative first-person plural',
|
||||
description: 'Present indicative first-person plural form of a verb',
|
||||
rules: [
|
||||
suffixInflection('më', '', [], ['v']),
|
||||
suffixInflection('im', '', [], ['v']),
|
||||
suffixInflection('hemi', 'hem', [], ['v']),
|
||||
],
|
||||
},
|
||||
'present indicative second-person plural': {
|
||||
name: 'present indicative second-person plural',
|
||||
description: 'Present indicative second-person plural form of a verb',
|
||||
rules: [
|
||||
suffixInflection('ni', 'j', [], ['v']),
|
||||
suffixInflection('ni', '', [], ['v']),
|
||||
suffixInflection('heni', 'hem', [], ['v']),
|
||||
],
|
||||
},
|
||||
'present indicative third-person plural': {
|
||||
name: 'present indicative third-person plural',
|
||||
description: 'Present indicative third-person plural form of a verb',
|
||||
rules: [
|
||||
suffixInflection('në', '', [], ['v']),
|
||||
suffixInflection('in', '', [], ['v']),
|
||||
suffixInflection('hen', 'hem', [], ['v']),
|
||||
],
|
||||
},
|
||||
'imperfect first-person singular indicative': {
|
||||
name: 'imperfect first-person singular indicative',
|
||||
description: 'Imperfect first-person singular indicative form of a verb',
|
||||
rules: [
|
||||
suffixInflection('ja', 'j', [], ['v']),
|
||||
suffixInflection('ja', '', [], ['v']),
|
||||
suffixInflection('hesha', 'hem', [], ['v']),
|
||||
],
|
||||
},
|
||||
'imperfect second-person singular indicative': {
|
||||
name: 'imperfect second-person singular indicative',
|
||||
description: 'Imperfect second-person singular indicative form of a verb',
|
||||
rules: [
|
||||
suffixInflection('je', 'j', [], ['v']),
|
||||
suffixInflection('je', '', [], ['v']),
|
||||
suffixInflection('heshe', 'hem', [], ['v']),
|
||||
],
|
||||
},
|
||||
'imperfect third-person singular indicative': {
|
||||
name: 'imperfect third-person singular indicative',
|
||||
description: 'Imperfect third-person singular indicative form of a verb',
|
||||
rules: [
|
||||
suffixInflection('nte', 'j', [], ['v']),
|
||||
suffixInflection('te', '', [], ['v']),
|
||||
suffixInflection('hej', 'hem', [], ['v']),
|
||||
],
|
||||
},
|
||||
'imperfect first-person plural indicative': {
|
||||
name: 'imperfect first-person plural indicative',
|
||||
description: 'Imperfect first-person plural indicative form of a verb',
|
||||
rules: [
|
||||
suffixInflection('nim', 'j', [], ['v']),
|
||||
suffixInflection('nim', '', [], ['v']),
|
||||
suffixInflection('heshim', 'hem', [], ['v']),
|
||||
],
|
||||
},
|
||||
'imperfect second-person plural indicative': {
|
||||
name: 'imperfect second-person plural indicative',
|
||||
description: 'Imperfect second-person plural indicative form of a verb',
|
||||
rules: [
|
||||
suffixInflection('nit', 'j', [], ['v']),
|
||||
suffixInflection('nit', '', [], ['v']),
|
||||
suffixInflection('heshit', 'hem', [], ['v']),
|
||||
],
|
||||
},
|
||||
'imperfect third-person plural indicative': {
|
||||
name: 'imperfect third-person plural indicative',
|
||||
description: 'Imperfect third-person plural indicative form of a verb',
|
||||
rules: [
|
||||
suffixInflection('nin', 'j', [], ['v']),
|
||||
suffixInflection('nin', '', [], ['v']),
|
||||
suffixInflection('heshin', 'hem', [], ['v']),
|
||||
],
|
||||
},
|
||||
'aorist first-person singular indicative': {
|
||||
name: 'aorist first-person singular indicative',
|
||||
description: 'Aorist first-person singular indicative form of a verb',
|
||||
rules: [
|
||||
suffixInflection('ova', 'uaj', [], ['v']),
|
||||
suffixInflection('va', 'j', [], ['v']),
|
||||
conjugationIISuffixInflection('a', '', [], ['v']),
|
||||
],
|
||||
},
|
||||
'aorist second-person singular indicative': {
|
||||
name: 'aorist second-person singular indicative',
|
||||
description: 'Aorist second-person singular indicative form of a verb',
|
||||
rules: [
|
||||
suffixInflection('ove', 'uaj', [], ['v']),
|
||||
suffixInflection('ve', 'j', [], ['v']),
|
||||
conjugationIISuffixInflection('e', '', [], ['v']),
|
||||
],
|
||||
},
|
||||
'aorist third-person singular indicative': {
|
||||
name: 'aorist third-person singular indicative',
|
||||
description: 'Aorist third-person singular indicative form of a verb',
|
||||
rules: [
|
||||
suffixInflection('oi', 'oj', [], ['v']),
|
||||
suffixInflection('oi', 'uaj', [], ['v']),
|
||||
suffixInflection('u', 'j', [], ['v']),
|
||||
conjugationIISuffixInflection('i', '', [], ['v']),
|
||||
suffixInflection('ye', 'ej', [], ['v']),
|
||||
],
|
||||
},
|
||||
'aorist first-person plural indicative': {
|
||||
name: 'aorist first-person plural indicative',
|
||||
description: 'Aorist first-person plural indicative form of a verb',
|
||||
rules: [
|
||||
suffixInflection('uam', 'oj', [], ['v']),
|
||||
suffixInflection('uam', 'uaj', [], ['v']),
|
||||
suffixInflection('më', 'j', [], ['v']),
|
||||
conjugationIISuffixInflection('ëm', '', [], ['v']),
|
||||
],
|
||||
},
|
||||
'aorist second-person plural indicative': {
|
||||
name: 'aorist second-person plural indicative',
|
||||
description: 'Aorist second-person plural indicative form of a verb',
|
||||
rules: [
|
||||
suffixInflection('uat', 'oj', [], ['v']),
|
||||
suffixInflection('uat', 'uaj', [], ['v']),
|
||||
suffixInflection('të', 'j', [], ['v']),
|
||||
conjugationIISuffixInflection('ët', '', [], ['v']),
|
||||
],
|
||||
},
|
||||
'aorist third-person plural indicative': {
|
||||
name: 'aorist third-person plural indicative',
|
||||
description: 'Aorist third-person plural indicative form of a verb',
|
||||
rules: [
|
||||
suffixInflection('uan', 'oj', [], ['v']),
|
||||
suffixInflection('uan', 'uaj', [], ['v']),
|
||||
suffixInflection('në', 'j', [], ['v']),
|
||||
conjugationIISuffixInflection('ën', '', [], ['v']),
|
||||
],
|
||||
},
|
||||
'imperative second-person singular present': {
|
||||
name: 'imperative second-person singular present',
|
||||
description: 'Imperative second-person singular present form of a verb',
|
||||
rules: [
|
||||
suffixInflection('o', 'oj', [], ['v']),
|
||||
suffixInflection('hu', 'hem', [], ['v']),
|
||||
],
|
||||
},
|
||||
'imperative second-person plural present': {
|
||||
name: 'imperative second-person plural present',
|
||||
description: 'Imperative second-person plural present form of a verb',
|
||||
rules: [
|
||||
suffixInflection('ni', 'j', [], ['v']),
|
||||
suffixInflection('ni', '', [], ['v']),
|
||||
suffixInflection('huni', 'hem', [], ['v']),
|
||||
],
|
||||
},
|
||||
'participle': {
|
||||
name: 'participle',
|
||||
description: 'Participle form of a verb',
|
||||
rules: [
|
||||
suffixInflection('uar', 'oj', [], ['v']),
|
||||
suffixInflection('ur', '', [], ['v']),
|
||||
suffixInflection('rë', 'j', [], ['v']),
|
||||
suffixInflection('yer', 'ej', [], ['v']),
|
||||
],
|
||||
},
|
||||
'mediopassive': {
|
||||
name: 'mediopassive',
|
||||
description: 'Mediopassive form of a verb',
|
||||
rules: [
|
||||
suffixInflection('hem', 'h', ['v'], ['v']),
|
||||
suffixInflection('hem', 'j', ['v'], ['v']),
|
||||
],
|
||||
},
|
||||
'optative first-person singular present': {
|
||||
name: 'optative first-person singular present',
|
||||
description: 'Optative first-person singular present form of a verb',
|
||||
rules: [
|
||||
suffixInflection('fsha', 'j', [], ['v']),
|
||||
],
|
||||
},
|
||||
'optative second-person singular present': {
|
||||
name: 'optative second-person singular present',
|
||||
description: 'Optative second-person singular present form of a verb',
|
||||
rules: [
|
||||
suffixInflection('fsh', 'j', [], ['v']),
|
||||
],
|
||||
},
|
||||
'optative third-person singular present': {
|
||||
name: 'optative third-person singular present',
|
||||
description: 'Optative third-person singular present form of a verb',
|
||||
rules: [
|
||||
suffixInflection('ftë', 'j', [], ['v']),
|
||||
],
|
||||
},
|
||||
'optative first-person plural present': {
|
||||
name: 'optative first-person plural present',
|
||||
description: 'Optative first-person plural present form of a verb',
|
||||
rules: [
|
||||
suffixInflection('fshim', 'j', [], ['v']),
|
||||
],
|
||||
},
|
||||
'optative second-person plural present': {
|
||||
name: 'optative second-person plural present',
|
||||
description: 'Optative second-person plural present form of a verb',
|
||||
rules: [
|
||||
suffixInflection('fshi', 'j', [], ['v']),
|
||||
],
|
||||
},
|
||||
'optative third-person plural present': {
|
||||
name: 'optative third-person plural present',
|
||||
description: 'Optative third-person plural present form of a verb',
|
||||
rules: [
|
||||
suffixInflection('fshin', 'j', [], ['v']),
|
||||
],
|
||||
},
|
||||
'nominalization': {
|
||||
name: 'nominalization',
|
||||
description: 'Noun form of a verb',
|
||||
rules: [
|
||||
suffixInflection('im', 'oj', [], ['v']),
|
||||
suffixInflection('im', 'ej', [], ['v']),
|
||||
suffixInflection('je', '', [], ['v']),
|
||||
],
|
||||
},
|
||||
},
|
||||
};
|
||||
49
vendor/yomitan/js/language/text-processors.js
vendored
Executable file
49
vendor/yomitan/js/language/text-processors.js
vendored
Executable file
@@ -0,0 +1,49 @@
|
||||
/*
|
||||
* Copyright (C) 2024-2025 Yomitan Authors
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
/** @type {import('language').TextProcessorOptions<boolean>} */
|
||||
export const basicTextProcessorOptions = [false, true];
|
||||
|
||||
/** @type {import('language').TextProcessor<boolean>} */
|
||||
export const decapitalize = {
|
||||
name: 'Decapitalize text',
|
||||
description: 'CAPITALIZED TEXT → capitalized text',
|
||||
options: basicTextProcessorOptions,
|
||||
process: (str, setting) => (setting ? str.toLowerCase() : str),
|
||||
};
|
||||
|
||||
/** @type {import('language').TextProcessor<boolean>} */
|
||||
export const capitalizeFirstLetter = {
|
||||
name: 'Capitalize first letter',
|
||||
description: 'lowercase text → Lowercase text',
|
||||
options: basicTextProcessorOptions,
|
||||
process: (str, setting) => (setting ? str.charAt(0).toUpperCase() + str.slice(1) : str),
|
||||
};
|
||||
|
||||
/**
|
||||
* WARNING: This should NOT be used with languages that use Han characters,
|
||||
* as it can result in undesirable normalization:
|
||||
* - '\u9038'.normalize('NFD') => '\u9038' (逸)
|
||||
* - '\ufa67'.normalize('NFD') => '\u9038' (逸 => 逸)
|
||||
* @type {import('language').TextProcessor<boolean>}
|
||||
*/
|
||||
export const removeAlphabeticDiacritics = {
|
||||
name: 'Remove Alphabetic Diacritics',
|
||||
description: 'ἄήé -> αηe',
|
||||
options: basicTextProcessorOptions,
|
||||
process: (str, setting) => (setting ? str.normalize('NFD').replace(/[\u0300-\u036f]/g, '') : str),
|
||||
};
|
||||
1711
vendor/yomitan/js/language/text-scanner.js
vendored
Normal file
1711
vendor/yomitan/js/language/text-scanner.js
vendored
Normal file
File diff suppressed because it is too large
Load Diff
36
vendor/yomitan/js/language/text-utilities.js
vendored
Normal file
36
vendor/yomitan/js/language/text-utilities.js
vendored
Normal file
@@ -0,0 +1,36 @@
|
||||
/*
|
||||
* Copyright (C) 2024-2025 Yomitan Authors
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
import {isStringPartiallyJapanese} from './ja/japanese.js';
|
||||
import {isStringPartiallyChinese} from './zh/chinese.js';
|
||||
|
||||
/**
|
||||
* Returns the language that the string might be by using some heuristic checks.
|
||||
* Values returned are ISO codes. `null` is returned if no language can be determined.
|
||||
* @param {string} text
|
||||
* @param {?string} language
|
||||
* @returns {?string}
|
||||
*/
|
||||
export function getLanguageFromText(text, language) {
|
||||
const partiallyJapanese = isStringPartiallyJapanese(text);
|
||||
const partiallyChinese = isStringPartiallyChinese(text);
|
||||
if (!['zh', 'yue'].includes(language ?? '')) {
|
||||
if (partiallyJapanese) { return 'ja'; }
|
||||
if (partiallyChinese) { return 'zh'; }
|
||||
}
|
||||
return language;
|
||||
}
|
||||
718
vendor/yomitan/js/language/tl/tagalog-transforms.js
vendored
Normal file
718
vendor/yomitan/js/language/tl/tagalog-transforms.js
vendored
Normal file
@@ -0,0 +1,718 @@
|
||||
/*
|
||||
* Copyright (C) 2024-2025 Yomitan Authors
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
import {prefixInflection, suffixInflection, wholeWordInflection} from '../language-transforms.js';
|
||||
|
||||
const CONSONANTS = 'bcdfghjklmnpqrstvwxyzBCDFGHJKLMNPQRSTVWXYZ';
|
||||
const VOWELS = 'aeiou';
|
||||
|
||||
/**
|
||||
* @param {string[]} conditionsIn
|
||||
* @param {string[]} conditionsOut
|
||||
* @returns {import('language-transformer').Rule}
|
||||
*/
|
||||
export function hyphenatedInflection(conditionsIn, conditionsOut) {
|
||||
const regex = /-/;
|
||||
return {
|
||||
type: 'prefix',
|
||||
isInflected: regex,
|
||||
deinflect: (text) => text.replace(regex, ''),
|
||||
conditionsIn,
|
||||
conditionsOut,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} inflectedSuffix
|
||||
* @param {string} deinflectedSuffix
|
||||
* @param {string[]} conditionsIn
|
||||
* @param {string[]} conditionsOut
|
||||
* @returns {import('language-transformer').Rule}
|
||||
*/
|
||||
export function suffixInflectionWithOtoUSoundChange(inflectedSuffix, deinflectedSuffix, conditionsIn, conditionsOut) {
|
||||
const regex = new RegExp(`u([${CONSONANTS}]+)${inflectedSuffix}$`);
|
||||
return {
|
||||
type: 'prefix',
|
||||
isInflected: regex,
|
||||
deinflect: (text) => text.replace(regex, `o$1${deinflectedSuffix}`),
|
||||
conditionsIn,
|
||||
conditionsOut,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Prefix inflection with repeated first syllable
|
||||
* @param {string} inflectedPrefix
|
||||
* @param {string} deinflectedPrefix
|
||||
* @param {string[]} conditionsIn
|
||||
* @param {string[]} conditionsOut
|
||||
* @param {string} consonants
|
||||
* @returns {import('language-transformer').Rule}
|
||||
*/
|
||||
export function prefixInflectionWithRep1(inflectedPrefix, deinflectedPrefix, conditionsIn, conditionsOut, consonants = CONSONANTS) {
|
||||
const regex = new RegExp(`^(${inflectedPrefix})([${consonants}]*[${VOWELS}])(\\2)`);
|
||||
return {
|
||||
type: 'prefix',
|
||||
isInflected: regex,
|
||||
deinflect: (text) => text.replace(regex, `${deinflectedPrefix}$2`),
|
||||
conditionsIn,
|
||||
conditionsOut,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} inflectedPrefix
|
||||
* @param {string} deinflectedPrefix
|
||||
* @param {string} inflectedSuffix
|
||||
* @param {string} deinflectedSuffix
|
||||
* @param {string[]} conditionsIn
|
||||
* @param {string[]} conditionsOut
|
||||
* @returns {import('language-transformer').Rule}
|
||||
*/
|
||||
export function sandwichInflection(inflectedPrefix, deinflectedPrefix, inflectedSuffix, deinflectedSuffix, conditionsIn, conditionsOut) {
|
||||
const regex = new RegExp(`^${inflectedPrefix}\\w+${inflectedSuffix}$`);
|
||||
return {
|
||||
type: 'other',
|
||||
isInflected: regex,
|
||||
deinflect: (text) => deinflectedPrefix + text.slice(inflectedPrefix.length, -inflectedSuffix.length) + deinflectedSuffix,
|
||||
conditionsIn,
|
||||
conditionsOut,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} inflectedPrefix
|
||||
* @param {string} deinflectedPrefix
|
||||
* @param {string} inflectedSuffix
|
||||
* @param {string} deinflectedSuffix
|
||||
* @param {string[]} conditionsIn
|
||||
* @param {string[]} conditionsOut
|
||||
* @returns {import('language-transformer').Rule}
|
||||
*/
|
||||
export function sandwichInflectionWithOtoUSoundChange(inflectedPrefix, deinflectedPrefix, inflectedSuffix, deinflectedSuffix, conditionsIn, conditionsOut) {
|
||||
const regex = new RegExp(`^${inflectedPrefix}(\\w+)u([${CONSONANTS}]+)${inflectedSuffix}$`);
|
||||
return {
|
||||
type: 'prefix',
|
||||
isInflected: regex,
|
||||
deinflect: (text) => text.replace(regex, `${deinflectedPrefix}$1o$2${deinflectedSuffix}`),
|
||||
conditionsIn,
|
||||
conditionsOut,
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
/** @type {import('language-transformer').LanguageTransformDescriptor} */
|
||||
export const tagalogTransforms = {
|
||||
language: 'tl',
|
||||
conditions: {
|
||||
n: {
|
||||
name: 'Noun',
|
||||
isDictionaryForm: true,
|
||||
subConditions: ['num'],
|
||||
},
|
||||
adj: {
|
||||
name: 'Adjective',
|
||||
isDictionaryForm: true,
|
||||
},
|
||||
num: {
|
||||
name: 'Numeral',
|
||||
isDictionaryForm: true,
|
||||
},
|
||||
},
|
||||
transforms: {
|
||||
'hyphenated': {
|
||||
name: 'hyphenated',
|
||||
description: 'hyphenated form of words',
|
||||
rules: [
|
||||
hyphenatedInflection([], []),
|
||||
],
|
||||
},
|
||||
'-an': {
|
||||
name: '-an',
|
||||
rules: [
|
||||
suffixInflection('an', '', [], ['n']),
|
||||
suffixInflection('ran', 'd', [], ['n']),
|
||||
suffixInflectionWithOtoUSoundChange('an', '', [], ['n']),
|
||||
suffixInflectionWithOtoUSoundChange('ran', 'd', [], ['n']),
|
||||
...[...'aeiou'].map((v) => suffixInflection(`${v}han`, `${v}`, [], ['n'])),
|
||||
...[...'aeiou'].map((v) => suffixInflection(`${v}nan`, `${v}`, [], ['n'])),
|
||||
suffixInflection('uhan', 'o', [], ['n']),
|
||||
suffixInflection('unan', 'o', [], ['n']),
|
||||
],
|
||||
},
|
||||
'-in': {
|
||||
name: '-in',
|
||||
rules: [
|
||||
suffixInflection('in', '', [], ['n']),
|
||||
suffixInflection('rin', 'd', [], ['n']),
|
||||
suffixInflectionWithOtoUSoundChange('in', '', [], ['n']),
|
||||
suffixInflectionWithOtoUSoundChange('rin', 'd', [], ['n']),
|
||||
...[...'aeiou'].map((v) => suffixInflection(`${v}hin`, `${v}`, [], ['n'])),
|
||||
...[...'aeiou'].map((v) => suffixInflection(`${v}nin`, `${v}`, [], ['n'])),
|
||||
suffixInflection('uhin', 'o', [], ['n']),
|
||||
suffixInflection('unin', 'o', [], ['n']),
|
||||
],
|
||||
},
|
||||
'ma-': {
|
||||
name: 'ma-',
|
||||
rules: [
|
||||
prefixInflection('ma', '', [], ['n', 'adj']),
|
||||
prefixInflection('mar', 'd', [], ['n', 'adj']),
|
||||
],
|
||||
},
|
||||
'pang-': {
|
||||
name: 'pang-',
|
||||
rules: [
|
||||
prefixInflection('pang', '', [], ['n', 'adj']),
|
||||
...[...'dlrst'].map((v) => prefixInflection(`pan${v}`, `${v}`, [], ['n', 'adj'])),
|
||||
...[...'bp'].map((v) => prefixInflection(`pam${v}`, `${v}`, [], ['n', 'adj'])),
|
||||
wholeWordInflection('pangalawa', 'dalawa', [], ['num']),
|
||||
wholeWordInflection('pangatlo', 'tatlo', [], ['num']),
|
||||
],
|
||||
},
|
||||
'ka-': {
|
||||
name: 'ka-',
|
||||
rules: [
|
||||
prefixInflection('ka', '', [], ['n', 'adj']),
|
||||
prefixInflection('kar', 'd', [], ['n', 'adj']),
|
||||
],
|
||||
},
|
||||
'kaka-': {
|
||||
name: 'kaka-',
|
||||
rules: [
|
||||
prefixInflection('kaka', '', [], ['n']),
|
||||
prefixInflection('kakar', 'd', [], ['n']),
|
||||
prefixInflectionWithRep1('ka', '', [], ['n']),
|
||||
],
|
||||
},
|
||||
'ka-...-an': {
|
||||
name: 'ka-...-an',
|
||||
rules: [
|
||||
sandwichInflection('ka', '', 'an', '', [], ['n']),
|
||||
sandwichInflection('kar', 'd', 'an', '', [], ['n']),
|
||||
sandwichInflection('ka', '', 'ran', 'd', [], ['n']),
|
||||
sandwichInflection('kar', 'd', 'ran', 'd', [], ['n']),
|
||||
...[...'aeiou'].map((v) => sandwichInflection('ka', '', `${v}han`, `${v}`, [], ['n'])),
|
||||
...[...'aeiou'].map((v) => sandwichInflection('kar', 'd', `${v}han`, `${v}`, [], ['n'])),
|
||||
...[...'aeiou'].map((v) => sandwichInflection('ka', '', `${v}nan`, `${v}`, [], ['n'])),
|
||||
...[...'aeiou'].map((v) => sandwichInflection('kar', 'd', `${v}nan`, `${v}`, [], ['n'])),
|
||||
sandwichInflection('ka', '', 'uhan', 'o', [], ['n']),
|
||||
sandwichInflection('kar', 'd', 'uhan', 'o', [], ['n']),
|
||||
sandwichInflection('ka', '', 'unan', 'o', [], ['n']),
|
||||
sandwichInflection('kar', 'd', 'unan', 'o', [], ['n']),
|
||||
sandwichInflectionWithOtoUSoundChange('ka', '', 'an', '', [], ['n']),
|
||||
sandwichInflectionWithOtoUSoundChange('kar', 'd', 'an', '', [], ['n']),
|
||||
sandwichInflectionWithOtoUSoundChange('ka', '', 'ran', 'd', [], ['n']),
|
||||
sandwichInflectionWithOtoUSoundChange('kar', 'd', 'ran', 'd', [], ['n']),
|
||||
],
|
||||
},
|
||||
'mag-': {
|
||||
name: 'mag-',
|
||||
rules: [
|
||||
prefixInflection('mag', '', [], ['n']),
|
||||
],
|
||||
},
|
||||
'mag- + rep1': {
|
||||
name: 'mag- + rep1',
|
||||
rules: [
|
||||
prefixInflectionWithRep1('mag', '', [], ['n']),
|
||||
],
|
||||
},
|
||||
'magka-': {
|
||||
name: 'magka-',
|
||||
rules: [
|
||||
prefixInflection('magka', '', [], ['n', 'adj']),
|
||||
prefixInflection('magkar', 'd', [], ['n', 'adj']),
|
||||
],
|
||||
},
|
||||
'magkaka-': {
|
||||
name: 'magkaka-',
|
||||
rules: [
|
||||
prefixInflection('magkaka', '', [], ['n', 'adj']),
|
||||
prefixInflection('magkakar', 'd', [], ['n', 'adj']),
|
||||
],
|
||||
},
|
||||
'mang- + rep1': {
|
||||
name: 'mang- + rep1',
|
||||
rules: [
|
||||
prefixInflectionWithRep1('mang', '', [], ['n']),
|
||||
prefixInflectionWithRep1('man', '', [], ['n'], 'dlrst'),
|
||||
prefixInflectionWithRep1('mam', '', [], ['n'], 'bp'),
|
||||
],
|
||||
},
|
||||
'pa-': {
|
||||
name: 'pa-',
|
||||
rules: [
|
||||
prefixInflection('pa', '', [], ['n', 'adj']),
|
||||
prefixInflection('par', 'd', [], ['n', 'adj']),
|
||||
],
|
||||
},
|
||||
'pa-...-an': {
|
||||
name: 'pa-...-an',
|
||||
rules: [
|
||||
sandwichInflection('pa', '', 'an', '', [], ['n']),
|
||||
sandwichInflection('par', 'd', 'an', '', [], ['n']),
|
||||
sandwichInflection('pa', '', 'ran', 'd', [], ['n']),
|
||||
sandwichInflection('par', 'd', 'ran', 'd', [], ['n']),
|
||||
...[...'aeiou'].map((v) => sandwichInflection('pa', '', `${v}han`, `${v}`, [], ['n'])),
|
||||
...[...'aeiou'].map((v) => sandwichInflection('par', 'd', `${v}han`, `${v}`, [], ['n'])),
|
||||
...[...'aeiou'].map((v) => sandwichInflection('pa', '', `${v}nan`, `${v}`, [], ['n'])),
|
||||
...[...'aeiou'].map((v) => sandwichInflection('par', 'd', `${v}nan`, `${v}`, [], ['n'])),
|
||||
sandwichInflection('pa', '', 'uhan', 'o', [], ['n']),
|
||||
sandwichInflection('par', 'd', 'uhan', 'o', [], ['n']),
|
||||
sandwichInflection('pa', '', 'unan', 'o', [], ['n']),
|
||||
sandwichInflection('par', 'd', 'unan', 'o', [], ['n']),
|
||||
sandwichInflectionWithOtoUSoundChange('pa', '', 'an', '', [], ['n']),
|
||||
sandwichInflectionWithOtoUSoundChange('par', 'd', 'an', '', [], ['n']),
|
||||
sandwichInflectionWithOtoUSoundChange('pa', '', 'ran', 'd', [], ['n']),
|
||||
sandwichInflectionWithOtoUSoundChange('par', 'd', 'ran', 'd', [], ['n']),
|
||||
],
|
||||
},
|
||||
'pag-': {
|
||||
name: 'pag-',
|
||||
rules: [
|
||||
prefixInflection('pag', '', [], ['n']),
|
||||
],
|
||||
},
|
||||
'pag- + rep1': {
|
||||
name: 'pag- + rep1',
|
||||
rules: [
|
||||
prefixInflectionWithRep1('pag', '', [], ['n']),
|
||||
],
|
||||
},
|
||||
'pagka-': {
|
||||
name: 'pagka-',
|
||||
rules: [
|
||||
prefixInflection('pagka', '', [], ['n']),
|
||||
prefixInflection('pagkar', 'd', [], ['n']),
|
||||
prefixInflection('pagkaka', '', [], ['n']),
|
||||
prefixInflection('pagkakar', 'd', [], ['n']),
|
||||
],
|
||||
},
|
||||
'pakiki-': {
|
||||
name: 'pakiki-',
|
||||
rules: [
|
||||
prefixInflection('pakiki', '', [], ['n']),
|
||||
prefixInflectionWithRep1('pakiki', '', [], ['n']),
|
||||
prefixInflection('pakikir', 'd', [], ['n']),
|
||||
],
|
||||
},
|
||||
'pakikipag-': {
|
||||
name: 'pakikipag-',
|
||||
rules: [
|
||||
prefixInflection('pakikipag', '', [], ['n']),
|
||||
],
|
||||
},
|
||||
'pang- + rep1': {
|
||||
name: 'pang- + rep1',
|
||||
rules: [
|
||||
prefixInflectionWithRep1('pang', '', [], ['n']),
|
||||
prefixInflectionWithRep1('pan', '', [], ['n'], 'dlrst'),
|
||||
prefixInflectionWithRep1('pam', '', [], ['n'], 'bp'),
|
||||
],
|
||||
},
|
||||
'tag-': {
|
||||
name: 'tag-',
|
||||
rules: [
|
||||
prefixInflection('tag', '', [], ['n']),
|
||||
],
|
||||
},
|
||||
'taga-': {
|
||||
name: 'taga-',
|
||||
rules: [
|
||||
prefixInflection('taga', '', [], ['n']),
|
||||
],
|
||||
},
|
||||
'tagapag-': {
|
||||
name: 'tagapag-',
|
||||
rules: [
|
||||
prefixInflection('tagapag', '', [], ['n']),
|
||||
],
|
||||
},
|
||||
'tagapang-': {
|
||||
name: 'tagapang-',
|
||||
rules: [
|
||||
prefixInflection('tagapang', '', [], ['n']),
|
||||
...[...'dlrst'].map((v) => prefixInflection(`tagapan${v}`, `${v}`, [], ['n'])),
|
||||
...[...'bp'].map((v) => prefixInflection(`tagapam${v}`, `${v}`, [], ['n'])),
|
||||
],
|
||||
},
|
||||
'i-': {
|
||||
name: 'i-',
|
||||
rules: [
|
||||
prefixInflection('i', '', [], ['n']),
|
||||
],
|
||||
},
|
||||
'ika-': {
|
||||
name: 'ika-',
|
||||
rules: [
|
||||
prefixInflection('ika', '', [], ['n']),
|
||||
prefixInflection('ikar', 'd', [], ['n']),
|
||||
wholeWordInflection('ikalawa', 'dalawa', [], ['num']),
|
||||
wholeWordInflection('ikatlo', 'tatlo', [], ['num']),
|
||||
],
|
||||
},
|
||||
'ipa-': {
|
||||
name: 'ipa-',
|
||||
rules: [
|
||||
prefixInflection('ipa', '', [], ['n']),
|
||||
],
|
||||
},
|
||||
'ipag-': {
|
||||
name: 'ipag-',
|
||||
rules: [
|
||||
prefixInflection('ipag', '', [], ['n']),
|
||||
],
|
||||
},
|
||||
'ipag- + rep1': {
|
||||
name: 'ipag- + rep1',
|
||||
rules: [
|
||||
prefixInflectionWithRep1('ipag', '', [], ['n']),
|
||||
],
|
||||
},
|
||||
'ipang-': {
|
||||
name: 'ipang-',
|
||||
rules: [
|
||||
prefixInflection('ipang', '', [], ['n']),
|
||||
...[...'dlrst'].map((v) => prefixInflection(`ipan${v}`, `${v}`, [], ['n'])),
|
||||
...[...'bp'].map((v) => prefixInflection(`ipam${v}`, `${v}`, [], ['n'])),
|
||||
],
|
||||
},
|
||||
'ma-...-an': {
|
||||
name: 'ma-...-an',
|
||||
rules: [
|
||||
sandwichInflection('ma', '', 'an', '', [], ['n']),
|
||||
sandwichInflection('mar', 'd', 'an', '', [], ['n']),
|
||||
sandwichInflection('ma', '', 'ran', 'd', [], ['n']),
|
||||
sandwichInflection('mar', 'd', 'ran', 'd', [], ['n']),
|
||||
...[...'aeiou'].map((v) => sandwichInflection('ma', '', `${v}han`, `${v}`, [], ['n'])),
|
||||
...[...'aeiou'].map((v) => sandwichInflection('mar', 'd', `${v}han`, `${v}`, [], ['n'])),
|
||||
...[...'aeiou'].map((v) => sandwichInflection('ma', '', `${v}nan`, `${v}`, [], ['n'])),
|
||||
...[...'aeiou'].map((v) => sandwichInflection('mar', 'd', `${v}nan`, `${v}`, [], ['n'])),
|
||||
sandwichInflection('ma', '', 'uhan', 'o', [], ['n']),
|
||||
sandwichInflection('mar', 'd', 'uhan', 'o', [], ['n']),
|
||||
sandwichInflection('ma', '', 'unan', 'o', [], ['n']),
|
||||
sandwichInflection('mar', 'd', 'unan', 'o', [], ['n']),
|
||||
sandwichInflectionWithOtoUSoundChange('ma', '', 'an', '', [], ['n']),
|
||||
sandwichInflectionWithOtoUSoundChange('mar', 'd', 'an', '', [], ['n']),
|
||||
sandwichInflectionWithOtoUSoundChange('ma', '', 'ran', 'd', [], ['n']),
|
||||
sandwichInflectionWithOtoUSoundChange('mar', 'd', 'ran', 'd', [], ['n']),
|
||||
],
|
||||
},
|
||||
'mag-...-an': {
|
||||
name: 'mag-...-an',
|
||||
rules: [
|
||||
sandwichInflection('mag', '', 'an', '', [], ['n']),
|
||||
sandwichInflection('mag', '', 'ran', 'd', [], ['n']),
|
||||
...[...'aeiou'].map((v) => sandwichInflection('mag', '', `${v}han`, `${v}`, [], ['n'])),
|
||||
...[...'aeiou'].map((v) => sandwichInflection('mag', '', `${v}nan`, `${v}`, [], ['n'])),
|
||||
sandwichInflection('mag', '', 'uhan', 'o', [], ['n']),
|
||||
sandwichInflection('mag', '', 'unan', 'o', [], ['n']),
|
||||
sandwichInflectionWithOtoUSoundChange('mag', '', 'an', '', [], ['n']),
|
||||
sandwichInflectionWithOtoUSoundChange('mag', '', 'ran', 'd', [], ['n']),
|
||||
],
|
||||
},
|
||||
'magkanda-': {
|
||||
name: 'magkanda-',
|
||||
rules: [
|
||||
prefixInflection('magkanda', '', [], ['n']),
|
||||
prefixInflection('magkandar', 'd', [], ['n']),
|
||||
],
|
||||
},
|
||||
'magma-': {
|
||||
name: 'magma-',
|
||||
rules: [
|
||||
prefixInflection('magma', '', [], ['n']),
|
||||
prefixInflection('magmar', 'd', [], ['n']),
|
||||
],
|
||||
},
|
||||
'magpa-': {
|
||||
name: 'magpa-',
|
||||
rules: [
|
||||
prefixInflection('magpa', '', [], ['n']),
|
||||
prefixInflection('magpar', 'd', [], ['n']),
|
||||
],
|
||||
},
|
||||
'magpaka-': {
|
||||
name: 'magpaka-',
|
||||
rules: [
|
||||
prefixInflection('magpaka', '', [], ['n']),
|
||||
prefixInflection('magpakar', 'd', [], ['n']),
|
||||
],
|
||||
},
|
||||
'magsi-': {
|
||||
name: 'magsi-',
|
||||
rules: [
|
||||
prefixInflection('magsi', '', [], ['n']),
|
||||
prefixInflection('magsipag', '', [], ['n']),
|
||||
],
|
||||
},
|
||||
'makapang-': {
|
||||
name: 'makapang-',
|
||||
rules: [
|
||||
prefixInflection('makapang', '', [], ['n']),
|
||||
...[...'dlrst'].map((v) => prefixInflection(`makapan${v}`, `${v}`, [], ['n'])),
|
||||
...[...'bp'].map((v) => prefixInflection(`makapam${v}`, `${v}`, [], ['n'])),
|
||||
],
|
||||
},
|
||||
'makapag-': {
|
||||
name: 'makapag-',
|
||||
rules: [
|
||||
prefixInflection('makapag', '', [], ['n']),
|
||||
],
|
||||
},
|
||||
'maka-': {
|
||||
name: 'maka-',
|
||||
rules: [
|
||||
prefixInflection('maka', '', [], ['n', 'adj']),
|
||||
prefixInflection('makar', 'd', [], ['n', 'adj']),
|
||||
],
|
||||
},
|
||||
'maki-': {
|
||||
name: 'maki-',
|
||||
rules: [
|
||||
prefixInflection('maki', '', [], ['n']),
|
||||
prefixInflection('makir', 'd', [], ['n']),
|
||||
],
|
||||
},
|
||||
'makipag-': {
|
||||
name: 'makipag-',
|
||||
rules: [
|
||||
prefixInflection('makipag', '', [], ['n']),
|
||||
],
|
||||
},
|
||||
'makipag-...-an': {
|
||||
name: 'makipag-...-an',
|
||||
rules: [
|
||||
sandwichInflection('makipag', '', 'an', '', [], ['n']),
|
||||
sandwichInflection('makipag', '', 'ran', 'd', [], ['n']),
|
||||
...[...'aeiou'].map((v) => sandwichInflection('makipag', '', `${v}han`, `${v}`, [], ['n'])),
|
||||
...[...'aeiou'].map((v) => sandwichInflection('makipag', '', `${v}nan`, `${v}`, [], ['n'])),
|
||||
sandwichInflection('makipag', '', 'uhan', 'o', [], ['n']),
|
||||
sandwichInflection('makipag', '', 'unan', 'o', [], ['n']),
|
||||
sandwichInflectionWithOtoUSoundChange('makipag', '', 'an', '', [], ['n']),
|
||||
sandwichInflectionWithOtoUSoundChange('makipag', '', 'ran', 'd', [], ['n']),
|
||||
],
|
||||
},
|
||||
'mang-': {
|
||||
name: 'mang-',
|
||||
rules: [
|
||||
prefixInflection('mang', '', [], ['n']),
|
||||
...[...'dlrst'].map((v) => prefixInflection(`man${v}`, `${v}`, [], ['n'])),
|
||||
...[...'bp'].map((v) => prefixInflection(`mam${v}`, `${v}`, [], ['n'])),
|
||||
],
|
||||
},
|
||||
'mapa-': {
|
||||
name: 'mapa-',
|
||||
rules: [
|
||||
prefixInflection('mapa', '', [], ['n']),
|
||||
prefixInflection('mapar', 'd', [], ['n']),
|
||||
],
|
||||
},
|
||||
'pa-...-in': {
|
||||
name: 'pa-...-in',
|
||||
rules: [
|
||||
sandwichInflection('pa', '', 'in', '', [], ['n']),
|
||||
sandwichInflection('par', 'd', 'in', '', [], ['n']),
|
||||
sandwichInflection('pa', '', 'rin', 'd', [], ['n']),
|
||||
sandwichInflection('par', 'd', 'rin', 'd', [], ['n']),
|
||||
...[...'aeiou'].map((v) => sandwichInflection('pa', '', `${v}hin`, `${v}`, [], ['n'])),
|
||||
...[...'aeiou'].map((v) => sandwichInflection('par', 'd', `${v}hin`, `${v}`, [], ['n'])),
|
||||
...[...'aeiou'].map((v) => sandwichInflection('pa', '', `${v}nin`, `${v}`, [], ['n'])),
|
||||
...[...'aeiou'].map((v) => sandwichInflection('par', 'd', `${v}nin`, `${v}`, [], ['n'])),
|
||||
sandwichInflection('pa', '', 'uhin', 'o', [], ['n']),
|
||||
sandwichInflection('par', 'd', 'uhin', 'o', [], ['n']),
|
||||
sandwichInflection('pa', '', 'unin', 'o', [], ['n']),
|
||||
sandwichInflection('par', 'd', 'unin', 'o', [], ['n']),
|
||||
sandwichInflectionWithOtoUSoundChange('pa', '', 'in', '', [], ['n']),
|
||||
sandwichInflectionWithOtoUSoundChange('par', 'd', 'in', '', [], ['n']),
|
||||
sandwichInflectionWithOtoUSoundChange('pa', '', 'rin', 'd', [], ['n']),
|
||||
sandwichInflectionWithOtoUSoundChange('par', 'd', 'rin', 'd', [], ['n']),
|
||||
],
|
||||
},
|
||||
'pag-...-an': {
|
||||
name: 'pag-...-an',
|
||||
rules: [
|
||||
sandwichInflection('pag', '', 'an', '', [], ['n']),
|
||||
sandwichInflection('pag', '', 'ran', 'd', [], ['n']),
|
||||
...[...'aeiou'].map((v) => sandwichInflection('pag', '', `${v}han`, `${v}`, [], ['n'])),
|
||||
...[...'aeiou'].map((v) => sandwichInflection('pag', '', `${v}nan`, `${v}`, [], ['n'])),
|
||||
sandwichInflection('pag', '', 'uhan', 'o', [], ['n']),
|
||||
sandwichInflection('pag', '', 'unan', 'o', [], ['n']),
|
||||
sandwichInflectionWithOtoUSoundChange('pag', '', 'an', '', [], ['n']),
|
||||
sandwichInflectionWithOtoUSoundChange('pag', '', 'ran', 'd', [], ['n']),
|
||||
],
|
||||
},
|
||||
'pang-...-an': {
|
||||
name: 'pang-...-an',
|
||||
rules: [
|
||||
sandwichInflection('pang', '', 'an', '', [], ['n']),
|
||||
sandwichInflection('pang', '', 'ran', 'd', [], ['n']),
|
||||
...[...'aeiou'].map((v) => sandwichInflection('pang', '', `${v}han`, `${v}`, [], ['n'])),
|
||||
...[...'aeiou'].map((v) => sandwichInflection('pang', '', `${v}nan`, `${v}`, [], ['n'])),
|
||||
sandwichInflection('pang', '', 'uhan', 'o', [], ['n']),
|
||||
sandwichInflection('pang', '', 'unan', 'o', [], ['n']),
|
||||
sandwichInflectionWithOtoUSoundChange('pang', '', 'an', '', [], ['n']),
|
||||
sandwichInflectionWithOtoUSoundChange('pang', '', 'ran', 'd', [], ['n']),
|
||||
|
||||
...[...'dlrst'].flatMap((v) => [
|
||||
sandwichInflection(`pan${v}`, `${v}`, 'an', '', [], ['n']),
|
||||
sandwichInflection(`pan${v}`, `${v}`, 'ran', 'd', [], ['n']),
|
||||
...[...'aeiou'].map((k) => sandwichInflection(`pan${v}`, `${v}`, `${k}han`, `${k}`, [], ['n'])),
|
||||
...[...'aeiou'].map((k) => sandwichInflection(`pan${v}`, `${v}`, `${k}nan`, `${k}`, [], ['n'])),
|
||||
sandwichInflection(`pan${v}`, '', 'uhan', 'o', [], ['n']),
|
||||
sandwichInflection(`pan${v}`, '', 'unan', 'o', [], ['n']),
|
||||
sandwichInflectionWithOtoUSoundChange(`pan${v}`, `${v}`, 'an', '', [], ['n']),
|
||||
sandwichInflectionWithOtoUSoundChange(`pan${v}`, `${v}`, 'ran', 'd', [], ['n']),
|
||||
]),
|
||||
...[...'bp'].flatMap((v) => [
|
||||
sandwichInflection(`pam${v}`, `${v}`, 'an', '', [], ['n']),
|
||||
sandwichInflection(`pam${v}`, `${v}`, 'ran', 'd', [], ['n']),
|
||||
...[...'aeiou'].map((k) => sandwichInflection(`pam${v}`, `${v}`, `${k}han`, `${k}`, [], ['n'])),
|
||||
...[...'aeiou'].map((k) => sandwichInflection(`pam${v}`, `${v}`, `${k}nan`, `${k}`, [], ['n'])),
|
||||
sandwichInflection(`pam${v}`, '', 'uhan', 'o', [], ['n']),
|
||||
sandwichInflection(`pam${v}`, '', 'unan', 'o', [], ['n']),
|
||||
sandwichInflectionWithOtoUSoundChange(`pam${v}`, `${v}`, 'an', '', [], ['n']),
|
||||
sandwichInflectionWithOtoUSoundChange(`pam${v}`, `${v}`, 'ran', 'd', [], ['n']),
|
||||
]),
|
||||
],
|
||||
},
|
||||
'pag-...-in': {
|
||||
name: 'pag-...-in',
|
||||
rules: [
|
||||
sandwichInflection('pag', '', 'in', '', [], ['n']),
|
||||
sandwichInflection('pag', '', 'rin', 'd', [], ['n']),
|
||||
...[...'aeiou'].map((v) => sandwichInflection('pag', '', `${v}hin`, `${v}`, [], ['n'])),
|
||||
...[...'aeiou'].map((v) => sandwichInflection('pag', '', `${v}nin`, `${v}`, [], ['n'])),
|
||||
sandwichInflection('pag', '', 'uhin', 'o', [], ['n']),
|
||||
sandwichInflection('pag', '', 'unin', 'o', [], ['n']),
|
||||
sandwichInflectionWithOtoUSoundChange('pag', '', 'in', '', [], ['n']),
|
||||
sandwichInflectionWithOtoUSoundChange('pag', '', 'rin', 'd', [], ['n']),
|
||||
],
|
||||
},
|
||||
'papang-...-in': {
|
||||
name: 'papang-...-in',
|
||||
rules: [
|
||||
sandwichInflection('papang', '', 'in', '', [], ['n']),
|
||||
sandwichInflection('papang', '', 'rin', 'd', [], ['n']),
|
||||
...[...'aeiou'].map((v) => sandwichInflection('papang', '', `${v}hin`, `${v}`, [], ['n'])),
|
||||
...[...'aeiou'].map((v) => sandwichInflection('papang', '', `${v}nin`, `${v}`, [], ['n'])),
|
||||
sandwichInflection('papang', '', 'uhin', 'o', [], ['n']),
|
||||
sandwichInflection('papang', '', 'unin', 'o', [], ['n']),
|
||||
sandwichInflectionWithOtoUSoundChange('papang', '', 'in', '', [], ['n']),
|
||||
sandwichInflectionWithOtoUSoundChange('papang', '', 'rin', 'd', [], ['n']),
|
||||
|
||||
...[...'dlrst'].flatMap((v) => [
|
||||
sandwichInflection(`papan${v}`, `${v}`, 'in', '', [], ['n']),
|
||||
sandwichInflection(`papan${v}`, `${v}`, 'rin', 'd', [], ['n']),
|
||||
...[...'aeiou'].map((k) => sandwichInflection(`papan${v}`, `${v}`, `${k}hin`, `${k}`, [], ['n'])),
|
||||
...[...'aeiou'].map((k) => sandwichInflection(`papan${v}`, `${v}`, `${k}nin`, `${k}`, [], ['n'])),
|
||||
sandwichInflection(`papan${v}`, '', 'uhin', 'o', [], ['n']),
|
||||
sandwichInflection(`papan${v}`, '', 'unin', 'o', [], ['n']),
|
||||
sandwichInflectionWithOtoUSoundChange(`papan${v}`, `${v}`, 'in', '', [], ['n']),
|
||||
sandwichInflectionWithOtoUSoundChange(`papan${v}`, `${v}`, 'rin', 'd', [], ['n']),
|
||||
]),
|
||||
...[...'bp'].flatMap((v) => [
|
||||
sandwichInflection(`papam${v}`, `${v}`, 'in', '', [], ['n']),
|
||||
sandwichInflection(`papam${v}`, `${v}`, 'rin', 'd', [], ['n']),
|
||||
...[...'aeiou'].map((k) => sandwichInflection(`papam${v}`, `${v}`, `${k}hin`, `${k}`, [], ['n'])),
|
||||
...[...'aeiou'].map((k) => sandwichInflection(`papam${v}`, `${v}`, `${k}nin`, `${k}`, [], ['n'])),
|
||||
sandwichInflection(`papam${v}`, '', 'uhin', 'o', [], ['n']),
|
||||
sandwichInflection(`papam${v}`, '', 'unin', 'o', [], ['n']),
|
||||
sandwichInflectionWithOtoUSoundChange(`papam${v}`, `${v}`, 'in', '', [], ['n']),
|
||||
sandwichInflectionWithOtoUSoundChange(`papam${v}`, `${v}`, 'rin', 'd', [], ['n']),
|
||||
]),
|
||||
],
|
||||
},
|
||||
'ma-...-in': {
|
||||
name: 'ma-...-in',
|
||||
rules: [
|
||||
sandwichInflection('ma', '', 'in', '', [], ['n', 'adj']),
|
||||
sandwichInflection('mar', 'd', 'in', '', [], ['n', 'adj']),
|
||||
sandwichInflection('ma', '', 'rin', 'd', [], ['n', 'adj']),
|
||||
sandwichInflection('mar', 'd', 'rin', 'd', [], ['n', 'adj']),
|
||||
...[...'aeiou'].map((v) => sandwichInflection('ma', '', `${v}hin`, `${v}`, [], ['n', 'adj'])),
|
||||
...[...'aeiou'].map((v) => sandwichInflection('mar', 'd', `${v}hin`, `${v}`, [], ['n', 'adj'])),
|
||||
...[...'aeiou'].map((v) => sandwichInflection('ma', '', `${v}nin`, `${v}`, [], ['n', 'adj'])),
|
||||
...[...'aeiou'].map((v) => sandwichInflection('mar', 'd', `${v}nin`, `${v}`, [], ['n', 'adj'])),
|
||||
sandwichInflection('ma', '', 'uhin', 'o', [], ['n', 'adj']),
|
||||
sandwichInflection('mar', 'd', 'uhin', 'o', [], ['n', 'adj']),
|
||||
sandwichInflection('ma', '', 'unin', 'o', [], ['n', 'adj']),
|
||||
sandwichInflection('mar', 'd', 'unin', 'o', [], ['n', 'adj']),
|
||||
sandwichInflectionWithOtoUSoundChange('ma', '', 'in', '', [], ['n', 'adj']),
|
||||
sandwichInflectionWithOtoUSoundChange('mar', 'd', 'in', '', [], ['n', 'adj']),
|
||||
sandwichInflectionWithOtoUSoundChange('ma', '', 'rin', 'd', [], ['n', 'adj']),
|
||||
sandwichInflectionWithOtoUSoundChange('mar', 'd', 'rin', 'd', [], ['n', 'adj']),
|
||||
],
|
||||
},
|
||||
'mapag-': {
|
||||
name: 'mapag-',
|
||||
rules: [
|
||||
prefixInflection('mapag', '', [], ['n', 'adj']),
|
||||
],
|
||||
},
|
||||
'naka-': {
|
||||
name: 'naka-',
|
||||
rules: [
|
||||
prefixInflection('naka', '', [], ['n', 'adj']),
|
||||
prefixInflection('nakar', 'd', [], ['n', 'adj']),
|
||||
],
|
||||
},
|
||||
'nakaka-': {
|
||||
name: 'nakaka-',
|
||||
rules: [
|
||||
prefixInflection('nakaka', '', [], ['n', 'adj']),
|
||||
prefixInflection('nakakar', 'd', [], ['n', 'adj']),
|
||||
],
|
||||
},
|
||||
'nakakapang-': {
|
||||
name: 'nakakapang-',
|
||||
rules: [
|
||||
prefixInflection('nakakapang', '', [], ['n', 'adj']),
|
||||
...[...'dlrst'].map((v) => prefixInflection(`nakakapan${v}`, `${v}`, [], ['n', 'adj'])),
|
||||
...[...'bp'].map((v) => prefixInflection(`nakakapam${v}`, `${v}`, [], ['n', 'adj'])),
|
||||
],
|
||||
},
|
||||
'naka- + rep1': {
|
||||
name: 'naka- + rep1',
|
||||
rules: [
|
||||
prefixInflectionWithRep1('naka', '', [], ['n', 'adj']),
|
||||
],
|
||||
},
|
||||
'nakapang- + rep1': {
|
||||
name: 'nakapang- + rep1',
|
||||
rules: [
|
||||
prefixInflectionWithRep1('nakapang', '', [], ['n', 'adj']),
|
||||
prefixInflectionWithRep1('nakapan', '', [], ['n', 'adj'], 'dlrst'),
|
||||
prefixInflectionWithRep1('nakapam', '', [], ['n', 'adj'], 'bp'),
|
||||
],
|
||||
},
|
||||
'pala-': {
|
||||
name: 'pala-',
|
||||
rules: [
|
||||
prefixInflection('pala', '', [], ['n', 'adj']),
|
||||
prefixInflection('palar', 'd', [], ['n', 'adj']),
|
||||
],
|
||||
},
|
||||
'-ng': {
|
||||
name: 'ng',
|
||||
rules: [
|
||||
suffixInflection('ng', '', [], []),
|
||||
],
|
||||
},
|
||||
},
|
||||
};
|
||||
2482
vendor/yomitan/js/language/translator.js
vendored
Normal file
2482
vendor/yomitan/js/language/translator.js
vendored
Normal file
File diff suppressed because it is too large
Load Diff
58
vendor/yomitan/js/language/vi/viet-text-preprocessors.js
vendored
Normal file
58
vendor/yomitan/js/language/vi/viet-text-preprocessors.js
vendored
Normal file
@@ -0,0 +1,58 @@
|
||||
/*
|
||||
* Copyright (C) 2024-2025 Yomitan Authors
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
const TONE = '([\u0300\u0309\u0303\u0301\u0323])'; // Huyền, hỏi, ngã, sắc, nặng
|
||||
const COMBINING_BREVE = '\u0306'; // Ă
|
||||
const COMBINING_CIRCUMFLEX_ACCENT = '\u0302'; // Â
|
||||
const COMBINING_HORN = '\u031B'; // Ơ
|
||||
const DIACRITICS = `${COMBINING_BREVE}${COMBINING_CIRCUMFLEX_ACCENT}${COMBINING_HORN}`;
|
||||
|
||||
// eslint-disable-next-line no-misleading-character-class
|
||||
const re1 = new RegExp(`${TONE}([aeiouy${DIACRITICS}]+)`, 'i');
|
||||
const re2 = new RegExp(`(?<=[${DIACRITICS}])(.)${TONE}`, 'i');
|
||||
const re3 = new RegExp(`(?<=[ae])([iouy])${TONE}`, 'i');
|
||||
const re4 = new RegExp(`(?<=[oy])([iuy])${TONE}`, 'i');
|
||||
const re5 = new RegExp(`(?<!q)(u)([aeiou])${TONE}`, 'i');
|
||||
const re6 = new RegExp(`(?<!g)(i)([aeiouy])${TONE}`, 'i');
|
||||
const re7 = new RegExp(`(?<!q)([ou])([aeoy])${TONE}(?!\\w)`, 'i');
|
||||
|
||||
/**
|
||||
* This function is adapted from https://github.com/enricobarzetti/viet_text_tools/blob/master/viet_text_tools/__init__.py
|
||||
* @type {import('language').TextProcessor<'old'|'new'|'off'>}
|
||||
*/
|
||||
export const normalizeDiacritics = {
|
||||
name: 'Normalize Diacritics',
|
||||
description: 'Normalize diacritics and their placements (in either the old style or new style). NFC normalization is used.',
|
||||
options: ['old', 'new', 'off'],
|
||||
process: (str, setting) => {
|
||||
if (setting === 'off') { return str; }
|
||||
|
||||
let result = str.normalize('NFD');
|
||||
// Put the tone on the second vowel
|
||||
result = result.replace(re1, '$2$1');
|
||||
// Put the tone on the vowel with a diacritic
|
||||
result = result.replace(re2, '$2$1');
|
||||
// For vowels that are not oa, oe, uy put the tone on the penultimate vowel
|
||||
result = result.replace(re3, '$2$1');
|
||||
result = result.replace(re4, '$2$1');
|
||||
result = result.replace(re5, '$1$3$2');
|
||||
result = result.replace(re6, '$1$3$2');
|
||||
|
||||
if (setting === 'old') { result = result.replace(re7, '$1$3$2'); }
|
||||
return result.normalize('NFC');
|
||||
},
|
||||
};
|
||||
72
vendor/yomitan/js/language/yi/yiddish-text-postprocessors.js
vendored
Normal file
72
vendor/yomitan/js/language/yi/yiddish-text-postprocessors.js
vendored
Normal file
@@ -0,0 +1,72 @@
|
||||
/*
|
||||
* Copyright (C) 2024-2025 Yomitan Authors
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
|
||||
const final_letter_map = new Map([
|
||||
['\u05de', '\u05dd'], // מ to ם
|
||||
['\u05e0', '\u05df'], // נ to ן
|
||||
['\u05e6', '\u05e5'], // צ to ץ
|
||||
['\u05e4', '\u05e3'], // פ to ף
|
||||
['\u05dB', '\u05da'], // כ to ך
|
||||
]);
|
||||
|
||||
const ligatures = [
|
||||
{lig: '\u05f0', split: '\u05d5' + '\u05d5'}, // װ -> וו
|
||||
{lig: '\u05f1', split: '\u05d5' + '\u05d9'}, // ױ -> וי
|
||||
{lig: '\u05f2', split: '\u05d9' + '\u05d9'}, // ײ -> יי
|
||||
{lig: '\ufb1d', split: '\u05d9' + '\u05b4'}, // יִ -> יִ
|
||||
{lig: '\ufb1f', split: '\u05d9' + '\u05d9' + '\u05b7'}, // ײַ -> ייַ
|
||||
{lig: '\ufb2e', split: '\u05d0' + '\u05b7'}, // Pasekh alef
|
||||
{lig: '\ufb2f', split: '\u05d0' + '\u05b8'}, // Komets alef
|
||||
];
|
||||
|
||||
/** @type {import('language').TextProcessor<boolean>} */
|
||||
export const convertFinalLetters = {
|
||||
name: 'Convert to Final Letters',
|
||||
description: 'קויף → קויפֿ',
|
||||
options: [true],
|
||||
process: (str) => {
|
||||
const len = str.length - 1;
|
||||
if ([...final_letter_map.keys()].includes(str.charAt(len))) {
|
||||
str = str.substring(0, len) + final_letter_map.get(str.substring(len));
|
||||
}
|
||||
return str;
|
||||
},
|
||||
};
|
||||
|
||||
/** @type {import('language').BidirectionalConversionPreprocessor} */
|
||||
export const convertYiddishLigatures = {
|
||||
name: 'Split Ligatures',
|
||||
description: 'וו → װ',
|
||||
options: ['off', 'direct', 'inverse'],
|
||||
process: (str, setting) => {
|
||||
switch (setting) {
|
||||
case 'off':
|
||||
return str;
|
||||
case 'direct':
|
||||
for (const ligature of ligatures) {
|
||||
str = str.replace(ligature.lig, ligature.split);
|
||||
}
|
||||
return str;
|
||||
case 'inverse':
|
||||
for (const ligature of ligatures) {
|
||||
str = str.replace(ligature.split, ligature.lig);
|
||||
}
|
||||
return str;
|
||||
}
|
||||
},
|
||||
};
|
||||
49
vendor/yomitan/js/language/yi/yiddish-text-preprocessors.js
vendored
Normal file
49
vendor/yomitan/js/language/yi/yiddish-text-preprocessors.js
vendored
Normal file
@@ -0,0 +1,49 @@
|
||||
/*
|
||||
* Copyright (C) 2024-2025 Yomitan Authors
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
const ligatures = [
|
||||
{lig: '\u05f0', split: '\u05d5' + '\u05d5'}, // װ -> וו
|
||||
{lig: '\u05f1', split: '\u05d5' + '\u05d9'}, // ױ -> וי
|
||||
{lig: '\u05f2', split: '\u05d9' + '\u05d9'}, // ײ -> יי
|
||||
{lig: '\ufb1d', split: '\u05d9' + '\u05b4'}, // יִ -> יִ
|
||||
{lig: '\ufb1f', split: '\u05d9' + '\u05d9' + '\u05b7'}, // ײַ -> ייַ
|
||||
{lig: '\ufb2e', split: '\u05d0' + '\u05b7'}, // Pasekh alef
|
||||
{lig: '\ufb2f', split: '\u05d0' + '\u05b8'}, // Komets alef
|
||||
];
|
||||
|
||||
/** @type {import('language').TextProcessor<boolean>} */
|
||||
export const combineYiddishLigatures = {
|
||||
name: 'Combine Ligatures',
|
||||
description: 'וו → װ',
|
||||
options: [true],
|
||||
process: (str) => {
|
||||
for (const ligature of ligatures) {
|
||||
str = str.replace(ligature.split, ligature.lig);
|
||||
}
|
||||
return str;
|
||||
},
|
||||
};
|
||||
|
||||
/** @type {import('language').TextProcessor<boolean>} */
|
||||
export const removeYiddishDiacritics = {
|
||||
name: 'Remove Diacritics',
|
||||
description: 'פאת → פֿאָתּ',
|
||||
options: [true],
|
||||
process: (str) => {
|
||||
return str.replace(/[\u05B0-\u05C7]/g, '');
|
||||
},
|
||||
};
|
||||
167
vendor/yomitan/js/language/yi/yiddish-transforms.js
vendored
Normal file
167
vendor/yomitan/js/language/yi/yiddish-transforms.js
vendored
Normal file
@@ -0,0 +1,167 @@
|
||||
/*
|
||||
* Copyright (C) 2024-2025 Yomitan Authors
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
import {suffixInflection} from '../language-transforms.js';
|
||||
|
||||
/** @typedef {keyof typeof conditions} Condition */
|
||||
|
||||
const mutations = [
|
||||
{new: '\u05e2', orig: '\ufb2e'}, // Ayin to pasekh alef
|
||||
{new: '\u05e2', orig: '\ufb2f'}, // Ayin to komets alef
|
||||
{new: '\u05e2', orig: '\u05D0'}, // Ayin to shumter alef
|
||||
{new: '\u05f1', orig: '\u05e2'}, // Vov yud to ayin
|
||||
{new: '\u05f2', orig: '\u05f1'}, // Tsvey yudn to Vov yud
|
||||
{new: '\u05d9', orig: '\u05d5'}, // Yud to Vov
|
||||
];
|
||||
|
||||
/**
|
||||
* @param {string} inflectedSuffix
|
||||
* @param {string} deinflectedSuffix
|
||||
* @param {Condition[]} conditionsIn
|
||||
* @param {Condition[]} conditionsOut
|
||||
* @returns {import('language-transformer').SuffixRule<Condition>[]}
|
||||
*/
|
||||
function umlautMutationSuffixInflection(inflectedSuffix, deinflectedSuffix, conditionsIn, conditionsOut) {
|
||||
const suffixRegExp = new RegExp(inflectedSuffix + '$');
|
||||
return mutations.map((mutation) => (
|
||||
{
|
||||
type: 'suffix',
|
||||
isInflected: suffixRegExp,
|
||||
deinflected: deinflectedSuffix,
|
||||
deinflect: (/** @type {string} */ text) => {
|
||||
const match = new RegExp(/[\u05E2\u05F0\u05D0\uFB2E\u05F1\u05D5\u05F2\uFB1D\uFB1F\u05D9\uFB2F](?!.*[\u05E2\u05F0\u05D0\uFB2E\u05F1\u05D5\u05F2\uFB1D\uFB1F\u05D9\uFB2F])/).exec(text.slice(0, -inflectedSuffix.length));
|
||||
return (match?.[0] !== mutation.new) ? '' : text.slice(0, match.index) + mutation.orig + text.slice(match.index + 1, -inflectedSuffix.length) + deinflectedSuffix;
|
||||
},
|
||||
conditionsIn,
|
||||
conditionsOut,
|
||||
}
|
||||
));
|
||||
}
|
||||
|
||||
const conditions = {
|
||||
v: {
|
||||
name: 'Verb',
|
||||
isDictionaryForm: true,
|
||||
subConditions: ['vpast', 'vpresent'],
|
||||
},
|
||||
vpast: {
|
||||
name: 'Verb, past tense',
|
||||
isDictionaryForm: false,
|
||||
},
|
||||
vpresent: {
|
||||
name: 'Verb, present tense',
|
||||
isDictionaryForm: true,
|
||||
},
|
||||
n: {
|
||||
name: 'Noun',
|
||||
isDictionaryForm: true,
|
||||
subConditions: ['np', 'ns'],
|
||||
},
|
||||
np: {
|
||||
name: 'Noun, plural',
|
||||
isDictionaryForm: false,
|
||||
},
|
||||
ns: {
|
||||
name: 'Noun, singular',
|
||||
isDictionaryForm: true,
|
||||
},
|
||||
adj: {
|
||||
name: 'Adjective',
|
||||
isDictionaryForm: true,
|
||||
},
|
||||
adv: {
|
||||
name: 'Adverb',
|
||||
isDictionaryForm: true,
|
||||
},
|
||||
};
|
||||
|
||||
/** @type {import('language-transformer').LanguageTransformDescriptor<Condition>} */
|
||||
export const yiddishTransforms = {
|
||||
language: 'yi',
|
||||
conditions,
|
||||
transforms: {
|
||||
plural: {
|
||||
name: 'plural',
|
||||
description: 'plural form of a noun',
|
||||
rules: [
|
||||
suffixInflection('\u05E1', '', ['np'], ['ns']), // -s
|
||||
suffixInflection('\u05DF', '', ['np'], ['ns']), // -n
|
||||
suffixInflection('\u05D9\u05DD', '', ['np'], ['ns']), // -im, hebrew
|
||||
suffixInflection('\u05E2\u05E8', '', ['np'], ['ns']), // -er
|
||||
suffixInflection('\u05E2\u05DA', '', ['np'], ['ns']), // -ekh
|
||||
suffixInflection('\u05E2\u05DF', '', ['np'], ['ns']), // -en
|
||||
suffixInflection('\u05E2\u05E1', '', ['np'], ['ns']), // -es
|
||||
suffixInflection('\u05D5\u05EA', '', ['np'], ['ns']), // -ot, hebrew
|
||||
suffixInflection('\u05E0\u05E1', '', ['np'], ['ns']), // -ns
|
||||
suffixInflection('\u05E2\u05E8\u05E2\u05DF', '', ['np'], ['ns']), // -eren
|
||||
suffixInflection('\u05E2\u05E0\u05E2\u05E1', '', ['np'], ['ns']), // -enes
|
||||
suffixInflection('\u05E2\u05E0\u05E1', '', ['np'], ['ns']), // -ens
|
||||
suffixInflection('\u05E2\u05E8\u05E1', '', ['np'], ['ns']), // -ers
|
||||
suffixInflection('\u05E1\u05E2\u05E8', '', ['np'], ['ns']), // -ser
|
||||
],
|
||||
},
|
||||
umlaut_plural: {
|
||||
name: 'umlaut_plural',
|
||||
description: 'plural form of a umlaut noun',
|
||||
rules: [
|
||||
...umlautMutationSuffixInflection('\u05E2\u05E8', '', ['np'], ['ns']), // -er
|
||||
...umlautMutationSuffixInflection('\u05E2\u05E1', '', ['np'], ['ns']), // -es
|
||||
...umlautMutationSuffixInflection('\u05D9\u05DD', '', ['np'], ['ns']), // -im
|
||||
...umlautMutationSuffixInflection('\u05E2\u05DF', '', ['np'], ['ns']), // -en
|
||||
...umlautMutationSuffixInflection('\u05DF', '', ['np'], ['ns']), // -n
|
||||
...umlautMutationSuffixInflection('\u05E1', '', ['np'], ['ns']), // -s
|
||||
...umlautMutationSuffixInflection('\u05E2\u05DA', '', ['np'], ['ns']), // -ekh
|
||||
...umlautMutationSuffixInflection('\u05E2\u05E8\u05E1', '', ['np'], ['ns']), // -ers
|
||||
],
|
||||
},
|
||||
diminutive: {
|
||||
name: 'diminutive',
|
||||
description: 'diminutive form of a noun',
|
||||
rules: [
|
||||
suffixInflection('\u05D8\u05E9\u05D9\u05E7', '', ['n'], ['n']), // -tshik
|
||||
suffixInflection('\u05E7\u05E2', '', ['n'], ['n']), // -ke
|
||||
suffixInflection('\u05DC', '', ['n'], ['n']), // -l
|
||||
suffixInflection('\u05E2\u05DC\u05E2', '', ['n'], ['n']), // -ele
|
||||
],
|
||||
},
|
||||
diminutive_and_umlaut: {
|
||||
name: 'diminutive_and_umlaut',
|
||||
description: 'diminutive form of a noun with stem umlaut',
|
||||
rules: [
|
||||
...umlautMutationSuffixInflection('\u05DC', '', ['n'], ['n']), // -l
|
||||
...umlautMutationSuffixInflection('\u05E2\u05DC\u05E2', '', ['n'], ['n']), // -ele
|
||||
],
|
||||
},
|
||||
verb_present_singular_to_first_person: {
|
||||
name: 'verb_present_singular_to_first_person',
|
||||
description: 'Turn the second and third person singular form to first person',
|
||||
rules: [
|
||||
suffixInflection('\u05E1\u05D8', '', ['v'], ['vpresent']), // -st
|
||||
suffixInflection('\u05D8', '', ['v'], ['vpresent']), // -t
|
||||
suffixInflection('\u05E0\u05D3\u05D9\u05E7', '', ['v'], ['vpresent']), // -ndik
|
||||
],
|
||||
},
|
||||
verb_present_plural_to_first_person: {
|
||||
name: 'verb_present_plural_to_first_person',
|
||||
description: 'Turn the second plural form to first person plural form',
|
||||
rules: [
|
||||
suffixInflection('\u05D8\u05E1', '\u05E0', ['v'], ['vpresent']), // -ts
|
||||
suffixInflection('\u05D8', '\u05E0', ['v'], ['vpresent']), // -t
|
||||
],
|
||||
},
|
||||
},
|
||||
};
|
||||
75
vendor/yomitan/js/language/zh/chinese.js
vendored
Normal file
75
vendor/yomitan/js/language/zh/chinese.js
vendored
Normal file
@@ -0,0 +1,75 @@
|
||||
/*
|
||||
* Copyright (C) 2024-2025 Yomitan Authors
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
import {CJK_IDEOGRAPH_RANGES, CJK_PUNCTUATION_RANGE, FULLWIDTH_CHARACTER_RANGES, isCodePointInRanges} from '../CJK-util.js';
|
||||
|
||||
/** @type {import('CJK-util').CodepointRange} */
|
||||
const BOPOMOFO_RANGE = [0x3100, 0x312f];
|
||||
/** @type {import('CJK-util').CodepointRange} */
|
||||
const BOPOMOFO_EXTENDED_RANGE = [0x31a0, 0x31bf];
|
||||
/** @type {import('CJK-util').CodepointRange} */
|
||||
const IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION_RANGE = [0x16fe0, 0x16fff];
|
||||
/** @type {import('CJK-util').CodepointRange} */
|
||||
const SMALL_FORM_RANGE = [0xfe50, 0xfe6f];
|
||||
/** @type {import('CJK-util').CodepointRange} */
|
||||
const VERTICAL_FORM_RANGE = [0xfe10, 0xfe1f];
|
||||
|
||||
|
||||
/**
|
||||
* Chinese character ranges, roughly ordered in order of expected frequency.
|
||||
* @type {import('CJK-util').CodepointRange[]}
|
||||
*/
|
||||
const CHINESE_RANGES = [
|
||||
...CJK_IDEOGRAPH_RANGES,
|
||||
CJK_PUNCTUATION_RANGE,
|
||||
|
||||
...FULLWIDTH_CHARACTER_RANGES,
|
||||
|
||||
BOPOMOFO_RANGE,
|
||||
BOPOMOFO_EXTENDED_RANGE,
|
||||
IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION_RANGE,
|
||||
SMALL_FORM_RANGE,
|
||||
VERTICAL_FORM_RANGE,
|
||||
];
|
||||
|
||||
|
||||
/**
|
||||
* @param {string} str
|
||||
* @returns {boolean}
|
||||
*/
|
||||
export function isStringPartiallyChinese(str) {
|
||||
if (str.length === 0) { return false; }
|
||||
for (const c of str) {
|
||||
if (isCodePointInRanges(/** @type {number} */ (c.codePointAt(0)), CHINESE_RANGES)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {number} codePoint
|
||||
* @returns {boolean}
|
||||
*/
|
||||
export function isCodePointChinese(codePoint) {
|
||||
return isCodePointInRanges(codePoint, CHINESE_RANGES);
|
||||
}
|
||||
|
||||
/** @type {import('language').ReadingNormalizer} */
|
||||
export function normalizePinyin(str) {
|
||||
return str.normalize('NFC').toLowerCase().replace(/[\s・:'’-]|\/\//g, '');
|
||||
}
|
||||
Reference in New Issue
Block a user