mirror of
https://github.com/ksyasuda/SubMiner.git
synced 2026-03-01 06:22:44 -08:00
feat(assets): bundle runtime assets and vendor dependencies
This commit is contained in:
498
vendor/yomitan/js/dictionary/dictionary-data-util.js
vendored
Normal file
498
vendor/yomitan/js/dictionary/dictionary-data-util.js
vendored
Normal file
@@ -0,0 +1,498 @@
|
||||
/*
|
||||
* Copyright (C) 2023-2025 Yomitan Authors
|
||||
* Copyright (C) 2020-2022 Yomichan Authors
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
import {getFrequencyHarmonic} from '../data/anki-note-data-creator.js';
|
||||
|
||||
|
||||
/**
|
||||
* @param {import('dictionary').TermDictionaryEntry} dictionaryEntry
|
||||
* @returns {import('dictionary-data-util').TagGroup[]}
|
||||
*/
|
||||
export function groupTermTags(dictionaryEntry) {
|
||||
const {headwords} = dictionaryEntry;
|
||||
const headwordCount = headwords.length;
|
||||
const uniqueCheck = (headwordCount > 1);
|
||||
/** @type {Map<string, number>} */
|
||||
const resultsIndexMap = new Map();
|
||||
const results = [];
|
||||
for (let i = 0; i < headwordCount; ++i) {
|
||||
const {tags} = headwords[i];
|
||||
for (const tag of tags) {
|
||||
if (uniqueCheck) {
|
||||
const {name, category, content, dictionaries} = tag;
|
||||
const key = createMapKey([name, category, content, dictionaries]);
|
||||
const index = resultsIndexMap.get(key);
|
||||
if (typeof index !== 'undefined') {
|
||||
const existingItem = results[index];
|
||||
existingItem.headwordIndices.push(i);
|
||||
continue;
|
||||
}
|
||||
resultsIndexMap.set(key, results.length);
|
||||
}
|
||||
|
||||
const item = {tag, headwordIndices: [i]};
|
||||
results.push(item);
|
||||
}
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {import('dictionary').TermDictionaryEntry} dictionaryEntry
|
||||
* @param {import('dictionary-importer').Summary[]} dictionaryInfo
|
||||
* @returns {import('dictionary-data-util').DictionaryFrequency<import('dictionary-data-util').TermFrequency>[]}
|
||||
*/
|
||||
export function groupTermFrequencies(dictionaryEntry, dictionaryInfo) {
|
||||
const {headwords, frequencies: sourceFrequencies} = dictionaryEntry;
|
||||
|
||||
/** @type {import('dictionary-data-util').TermFrequenciesMap1} */
|
||||
const map1 = new Map();
|
||||
/** @type {Map<string, string>} */
|
||||
const aliasMap = new Map();
|
||||
for (const {headwordIndex, dictionary, dictionaryAlias, hasReading, frequency, displayValue} of sourceFrequencies) {
|
||||
const {term, reading} = headwords[headwordIndex];
|
||||
|
||||
let map2 = map1.get(dictionary);
|
||||
if (typeof map2 === 'undefined') {
|
||||
map2 = new Map();
|
||||
map1.set(dictionary, map2);
|
||||
aliasMap.set(dictionary, dictionaryAlias);
|
||||
}
|
||||
|
||||
const readingKey = hasReading ? reading : null;
|
||||
const key = createMapKey([term, readingKey]);
|
||||
let frequencyData = map2.get(key);
|
||||
if (typeof frequencyData === 'undefined') {
|
||||
frequencyData = {term, reading: readingKey, values: new Map()};
|
||||
map2.set(key, frequencyData);
|
||||
}
|
||||
|
||||
frequencyData.values.set(createMapKey([frequency, displayValue]), {frequency, displayValue});
|
||||
}
|
||||
|
||||
const results = [];
|
||||
|
||||
for (const [dictionary, map2] of map1.entries()) {
|
||||
/** @type {import('dictionary-data-util').TermFrequency[]} */
|
||||
const frequencies = [];
|
||||
const dictionaryAlias = aliasMap.get(dictionary) ?? dictionary;
|
||||
for (const {term, reading, values} of map2.values()) {
|
||||
const termFrequency = {
|
||||
term,
|
||||
reading,
|
||||
values: [...values.values()],
|
||||
};
|
||||
frequencies.push(termFrequency);
|
||||
}
|
||||
const currentDictionaryInfo = dictionaryInfo.find(({title}) => title === dictionary);
|
||||
const freqCount = currentDictionaryInfo?.counts?.termMeta.freq ?? 0;
|
||||
results.push({dictionary, frequencies, dictionaryAlias, freqCount});
|
||||
}
|
||||
|
||||
const averageFrequencies = [];
|
||||
for (let i = 0; i < dictionaryEntry.headwords.length; i++) {
|
||||
const averageFrequency = getFrequencyHarmonic(dictionaryEntry, i);
|
||||
averageFrequencies.push({
|
||||
term: dictionaryEntry.headwords[i].term,
|
||||
reading: dictionaryEntry.headwords[i].reading,
|
||||
values: [{
|
||||
frequency: averageFrequency,
|
||||
displayValue: averageFrequency.toString(),
|
||||
}],
|
||||
});
|
||||
}
|
||||
|
||||
results.push({
|
||||
dictionary: 'Average',
|
||||
frequencies: averageFrequencies,
|
||||
dictionaryAlias: 'Average',
|
||||
freqCount: averageFrequencies.length,
|
||||
});
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {import('dictionary').KanjiFrequency[]} sourceFrequencies
|
||||
* @param {import('dictionary-importer').Summary[]} dictionaryInfo
|
||||
* @returns {import('dictionary-data-util').DictionaryFrequency<import('dictionary-data-util').KanjiFrequency>[]}
|
||||
*/
|
||||
export function groupKanjiFrequencies(sourceFrequencies, dictionaryInfo) {
|
||||
/** @type {import('dictionary-data-util').KanjiFrequenciesMap1} */
|
||||
const map1 = new Map();
|
||||
/** @type {Map<string, string>} */
|
||||
const aliasMap = new Map();
|
||||
for (const {dictionary, dictionaryAlias, character, frequency, displayValue} of sourceFrequencies) {
|
||||
let map2 = map1.get(dictionary);
|
||||
if (typeof map2 === 'undefined') {
|
||||
map2 = new Map();
|
||||
map1.set(dictionary, map2);
|
||||
aliasMap.set(dictionary, dictionaryAlias);
|
||||
}
|
||||
|
||||
let frequencyData = map2.get(character);
|
||||
if (typeof frequencyData === 'undefined') {
|
||||
frequencyData = {character, values: new Map()};
|
||||
map2.set(character, frequencyData);
|
||||
}
|
||||
|
||||
frequencyData.values.set(createMapKey([frequency, displayValue]), {frequency, displayValue});
|
||||
}
|
||||
|
||||
const results = [];
|
||||
for (const [dictionary, map2] of map1.entries()) {
|
||||
const frequencies = [];
|
||||
const dictionaryAlias = aliasMap.get(dictionary) ?? dictionary;
|
||||
for (const {character, values} of map2.values()) {
|
||||
frequencies.push({
|
||||
character,
|
||||
values: [...values.values()],
|
||||
});
|
||||
}
|
||||
const currentDictionaryInfo = dictionaryInfo.find(({title}) => title === dictionary);
|
||||
const freqCount = currentDictionaryInfo?.counts?.kanjiMeta.freq ?? 0;
|
||||
results.push({dictionary, frequencies, dictionaryAlias, freqCount});
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {import('dictionary').TermDictionaryEntry} dictionaryEntry
|
||||
* @returns {import('dictionary-data-util').DictionaryGroupedPronunciations[]}
|
||||
*/
|
||||
export function getGroupedPronunciations(dictionaryEntry) {
|
||||
const {headwords, pronunciations: termPronunciations} = dictionaryEntry;
|
||||
|
||||
/** @type {Set<string>} */
|
||||
const allTerms = new Set();
|
||||
const allReadings = new Set();
|
||||
/** @type {Map<string, string>} */
|
||||
const aliasMap = new Map();
|
||||
for (const {term, reading} of headwords) {
|
||||
allTerms.add(term);
|
||||
allReadings.add(reading);
|
||||
}
|
||||
|
||||
/** @type {Map<string, import('dictionary-data-util').GroupedPronunciationInternal[]>} */
|
||||
const groupedPronunciationsMap = new Map();
|
||||
for (const {headwordIndex, dictionary, dictionaryAlias, pronunciations} of termPronunciations) {
|
||||
const {term, reading} = headwords[headwordIndex];
|
||||
let dictionaryGroupedPronunciationList = groupedPronunciationsMap.get(dictionary);
|
||||
if (typeof dictionaryGroupedPronunciationList === 'undefined') {
|
||||
dictionaryGroupedPronunciationList = [];
|
||||
groupedPronunciationsMap.set(dictionary, dictionaryGroupedPronunciationList);
|
||||
aliasMap.set(dictionary, dictionaryAlias);
|
||||
}
|
||||
for (const pronunciation of pronunciations) {
|
||||
let groupedPronunciation = findExistingGroupedPronunciation(reading, pronunciation, dictionaryGroupedPronunciationList);
|
||||
if (groupedPronunciation === null) {
|
||||
groupedPronunciation = {
|
||||
pronunciation,
|
||||
terms: new Set(),
|
||||
reading,
|
||||
};
|
||||
dictionaryGroupedPronunciationList.push(groupedPronunciation);
|
||||
}
|
||||
groupedPronunciation.terms.add(term);
|
||||
}
|
||||
}
|
||||
|
||||
/** @type {import('dictionary-data-util').DictionaryGroupedPronunciations[]} */
|
||||
const results2 = [];
|
||||
const multipleReadings = (allReadings.size > 1);
|
||||
for (const [dictionary, dictionaryGroupedPronunciationList] of groupedPronunciationsMap.entries()) {
|
||||
/** @type {import('dictionary-data-util').GroupedPronunciation[]} */
|
||||
const pronunciations2 = [];
|
||||
const dictionaryAlias = aliasMap.get(dictionary) ?? dictionary;
|
||||
for (const groupedPronunciation of dictionaryGroupedPronunciationList) {
|
||||
const {pronunciation, terms, reading} = groupedPronunciation;
|
||||
const exclusiveTerms = !areSetsEqual(terms, allTerms) ? getSetIntersection(terms, allTerms) : [];
|
||||
const exclusiveReadings = [];
|
||||
if (multipleReadings) {
|
||||
exclusiveReadings.push(reading);
|
||||
}
|
||||
pronunciations2.push({
|
||||
pronunciation,
|
||||
terms: [...terms],
|
||||
reading,
|
||||
exclusiveTerms,
|
||||
exclusiveReadings,
|
||||
});
|
||||
}
|
||||
|
||||
results2.push({dictionary, dictionaryAlias, pronunciations: pronunciations2});
|
||||
}
|
||||
return results2;
|
||||
}
|
||||
|
||||
/**
|
||||
* @template {import('dictionary').PronunciationType} T
|
||||
* @param {import('dictionary').Pronunciation[]} pronunciations
|
||||
* @param {T} type
|
||||
* @returns {import('dictionary').PronunciationGeneric<T>[]}
|
||||
*/
|
||||
export function getPronunciationsOfType(pronunciations, type) {
|
||||
/** @type {import('dictionary').PronunciationGeneric<T>[]} */
|
||||
const results = [];
|
||||
for (const pronunciation of pronunciations) {
|
||||
if (pronunciation.type !== type) { continue; }
|
||||
// This is type safe, but for some reason the cast is needed.
|
||||
results.push(/** @type {import('dictionary').PronunciationGeneric<T>} */ (pronunciation));
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {import('dictionary').Tag[]|import('anki-templates').Tag[]} termTags
|
||||
* @returns {import('dictionary-data-util').TermFrequencyType}
|
||||
*/
|
||||
export function getTermFrequency(termTags) {
|
||||
let totalScore = 0;
|
||||
for (const {score} of termTags) {
|
||||
totalScore += score;
|
||||
}
|
||||
if (totalScore > 0) {
|
||||
return 'popular';
|
||||
} else if (totalScore < 0) {
|
||||
return 'rare';
|
||||
} else {
|
||||
return 'normal';
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {import('dictionary').TermHeadword[]} headwords
|
||||
* @param {number[]} headwordIndices
|
||||
* @param {Set<string>} allTermsSet
|
||||
* @param {Set<string>} allReadingsSet
|
||||
* @returns {string[]}
|
||||
*/
|
||||
export function getDisambiguations(headwords, headwordIndices, allTermsSet, allReadingsSet) {
|
||||
if (allTermsSet.size <= 1 && allReadingsSet.size <= 1) { return []; }
|
||||
|
||||
/** @type {Set<string>} */
|
||||
const terms = new Set();
|
||||
/** @type {Set<string>} */
|
||||
const readings = new Set();
|
||||
for (const headwordIndex of headwordIndices) {
|
||||
const {term, reading} = headwords[headwordIndex];
|
||||
terms.add(term);
|
||||
readings.add(reading);
|
||||
}
|
||||
|
||||
/** @type {string[]} */
|
||||
const disambiguations = [];
|
||||
const addTerms = !areSetsEqual(terms, allTermsSet);
|
||||
const addReadings = !areSetsEqual(readings, allReadingsSet);
|
||||
if (addTerms) {
|
||||
disambiguations.push(...getSetIntersection(terms, allTermsSet));
|
||||
}
|
||||
if (addReadings) {
|
||||
if (addTerms) {
|
||||
for (const term of terms) {
|
||||
readings.delete(term);
|
||||
}
|
||||
}
|
||||
disambiguations.push(...getSetIntersection(readings, allReadingsSet));
|
||||
}
|
||||
return disambiguations;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string[]} wordClasses
|
||||
* @returns {boolean}
|
||||
*/
|
||||
export function isNonNounVerbOrAdjective(wordClasses) {
|
||||
let isVerbOrAdjective = false;
|
||||
let isSuruVerb = false;
|
||||
let isNoun = false;
|
||||
for (const wordClass of wordClasses) {
|
||||
switch (wordClass) {
|
||||
case 'v1':
|
||||
case 'v5':
|
||||
case 'vk':
|
||||
case 'vz':
|
||||
case 'adj-i':
|
||||
isVerbOrAdjective = true;
|
||||
break;
|
||||
case 'vs':
|
||||
isVerbOrAdjective = true;
|
||||
isSuruVerb = true;
|
||||
break;
|
||||
case 'n':
|
||||
isNoun = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return isVerbOrAdjective && !(isSuruVerb && isNoun);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} current
|
||||
* @param {string} latest
|
||||
* @returns {boolean}
|
||||
*/
|
||||
export function compareRevisions(current, latest) {
|
||||
const simpleVersionTest = /^(\d+\.)*\d+$/; // dot-separated integers, so 4.7 or 24.1.1.1 are ok, 1.0.0-alpha is not
|
||||
if (!simpleVersionTest.test(current) || !simpleVersionTest.test(latest)) {
|
||||
return current < latest;
|
||||
}
|
||||
|
||||
const currentParts = current.split('.').map((part) => Number.parseInt(part, 10));
|
||||
const latestParts = latest.split('.').map((part) => Number.parseInt(part, 10));
|
||||
|
||||
if (currentParts.length !== latestParts.length) {
|
||||
return current < latest;
|
||||
}
|
||||
|
||||
for (let i = 0; i < currentParts.length; i++) {
|
||||
if (currentParts[i] !== latestParts[i]) {
|
||||
return currentParts[i] < latestParts[i];
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// Private
|
||||
|
||||
/**
|
||||
* @param {string} reading
|
||||
* @param {import('dictionary').Pronunciation} pronunciation
|
||||
* @param {import('dictionary-data-util').GroupedPronunciationInternal[]} groupedPronunciationList
|
||||
* @returns {?import('dictionary-data-util').GroupedPronunciationInternal}
|
||||
*/
|
||||
function findExistingGroupedPronunciation(reading, pronunciation, groupedPronunciationList) {
|
||||
const existingGroupedPronunciation = groupedPronunciationList.find((groupedPronunciation) => {
|
||||
return groupedPronunciation.reading === reading && arePronunciationsEquivalent(groupedPronunciation, pronunciation);
|
||||
});
|
||||
|
||||
return existingGroupedPronunciation || null;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {import('dictionary-data-util').GroupedPronunciationInternal} groupedPronunciation
|
||||
* @param {import('dictionary').Pronunciation} pronunciation2
|
||||
* @returns {boolean}
|
||||
*/
|
||||
function arePronunciationsEquivalent({pronunciation: pronunciation1}, pronunciation2) {
|
||||
if (
|
||||
pronunciation1.type !== pronunciation2.type ||
|
||||
!areTagListsEqual(pronunciation1.tags, pronunciation2.tags)
|
||||
) {
|
||||
return false;
|
||||
}
|
||||
switch (pronunciation1.type) {
|
||||
case 'pitch-accent':
|
||||
{
|
||||
// This cast is valid based on the type check at the start of the function.
|
||||
const pitchAccent2 = /** @type {import('dictionary').PitchAccent} */ (pronunciation2);
|
||||
return (
|
||||
pronunciation1.positions === pitchAccent2.positions &&
|
||||
areArraysEqual(pronunciation1.nasalPositions, pitchAccent2.nasalPositions) &&
|
||||
areArraysEqual(pronunciation1.devoicePositions, pitchAccent2.devoicePositions)
|
||||
);
|
||||
}
|
||||
case 'phonetic-transcription':
|
||||
{
|
||||
// This cast is valid based on the type check at the start of the function.
|
||||
const phoneticTranscription2 = /** @type {import('dictionary').PhoneticTranscription} */ (pronunciation2);
|
||||
return pronunciation1.ipa === phoneticTranscription2.ipa;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* @template [T=unknown]
|
||||
* @param {T[]} array1
|
||||
* @param {T[]} array2
|
||||
* @returns {boolean}
|
||||
*/
|
||||
function areArraysEqual(array1, array2) {
|
||||
const ii = array1.length;
|
||||
if (ii !== array2.length) { return false; }
|
||||
for (let i = 0; i < ii; ++i) {
|
||||
if (array1[i] !== array2[i]) { return false; }
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {import('dictionary').Tag[]} tagList1
|
||||
* @param {import('dictionary').Tag[]} tagList2
|
||||
* @returns {boolean}
|
||||
*/
|
||||
function areTagListsEqual(tagList1, tagList2) {
|
||||
const ii = tagList1.length;
|
||||
if (tagList2.length !== ii) { return false; }
|
||||
|
||||
for (let i = 0; i < ii; ++i) {
|
||||
const tag1 = tagList1[i];
|
||||
const tag2 = tagList2[i];
|
||||
if (tag1.name !== tag2.name || !areArraysEqual(tag1.dictionaries, tag2.dictionaries)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* @template [T=unknown]
|
||||
* @param {Set<T>} set1
|
||||
* @param {Set<T>} set2
|
||||
* @returns {boolean}
|
||||
*/
|
||||
function areSetsEqual(set1, set2) {
|
||||
if (set1.size !== set2.size) {
|
||||
return false;
|
||||
}
|
||||
|
||||
for (const value of set1) {
|
||||
if (!set2.has(value)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* @template [T=unknown]
|
||||
* @param {Set<T>} set1
|
||||
* @param {Set<T>} set2
|
||||
* @returns {T[]}
|
||||
*/
|
||||
function getSetIntersection(set1, set2) {
|
||||
const result = [];
|
||||
for (const value of set1) {
|
||||
if (set2.has(value)) {
|
||||
result.push(value);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {unknown[]} array
|
||||
* @returns {string}
|
||||
*/
|
||||
function createMapKey(array) {
|
||||
return JSON.stringify(array);
|
||||
}
|
||||
Reference in New Issue
Block a user