diff --git a/MANIFEST.in b/MANIFEST.in index 3cd6da4..fdefd2b 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1 +1 @@ -include manga_ocr/assets/*.jpg +include assets/example.jpg diff --git a/README.md b/README.md index 34911e6..b315761 100644 --- a/README.md +++ b/README.md @@ -110,20 +110,22 @@ Here are some cherry-picked examples showing the capability of the model. | image | Manga OCR result | |----------------------|------------------| -| ![](examples/00.jpg) | 素直にあやまるしか | -| ![](examples/01.jpg) | 立川で見た〝穴〟の下の巨大な眼は: | -| ![](examples/02.jpg) | 実戦剣術も一流です | -| ![](examples/03.jpg) | 第30話重苦しい闇の奥で静かに呼吸づきながら | -| ![](examples/04.jpg) | よかったじゃないわよ!何逃げてるのよ!!早くあいつを退治してよ! | -| ![](examples/05.jpg) | ぎゃっ | -| ![](examples/06.jpg) | ピンポーーン | -| ![](examples/07.jpg) | LINK!私達7人の力でガノンの塔の結界をやぶります | -| ![](examples/08.jpg) | ファイアパンチ | -| ![](examples/09.jpg) | 少し黙っている | -| ![](examples/10.jpg) | わかるかな〜? | -| ![](examples/11.jpg) | 警察にも先生にも町中の人達に!! | +| ![](assets/examples/00.jpg) | 素直にあやまるしか | +| ![](assets/examples/01.jpg) | 立川で見た〝穴〟の下の巨大な眼は: | +| ![](assets/examples/02.jpg) | 実戦剣術も一流です | +| ![](assets/examples/03.jpg) | 第30話重苦しい闇の奥で静かに呼吸づきながら | +| ![](assets/examples/04.jpg) | よかったじゃないわよ!何逃げてるのよ!!早くあいつを退治してよ! | +| ![](assets/examples/05.jpg) | ぎゃっ | +| ![](assets/examples/06.jpg) | ピンポーーン | +| ![](assets/examples/07.jpg) | LINK!私達7人の力でガノンの塔の結界をやぶります | +| ![](assets/examples/08.jpg) | ファイアパンチ | +| ![](assets/examples/09.jpg) | 少し黙っている | +| ![](assets/examples/10.jpg) | わかるかな〜? | +| ![](assets/examples/11.jpg) | 警察にも先生にも町中の人達に!! | # Acknowledgments -This project was done with the usage of [Manga109-s](http://www.manga109.org/en/download_s.html) dataset. +This project was done with the usage of: +- [Manga109-s](http://www.manga109.org/en/download_s.html) dataset +- [CC-100](https://data.statmt.org/cc-100/) dataset diff --git a/manga_ocr/assets/example.jpg b/assets/example.jpg similarity index 100% rename from manga_ocr/assets/example.jpg rename to assets/example.jpg diff --git a/examples/00.jpg b/assets/examples/00.jpg similarity index 100% rename from examples/00.jpg rename to assets/examples/00.jpg diff --git a/examples/01.jpg b/assets/examples/01.jpg similarity index 100% rename from examples/01.jpg rename to assets/examples/01.jpg diff --git a/examples/02.jpg b/assets/examples/02.jpg similarity index 100% rename from examples/02.jpg rename to assets/examples/02.jpg diff --git a/examples/03.jpg b/assets/examples/03.jpg similarity index 100% rename from examples/03.jpg rename to assets/examples/03.jpg diff --git a/examples/04.jpg b/assets/examples/04.jpg similarity index 100% rename from examples/04.jpg rename to assets/examples/04.jpg diff --git a/examples/05.jpg b/assets/examples/05.jpg similarity index 100% rename from examples/05.jpg rename to assets/examples/05.jpg diff --git a/examples/06.jpg b/assets/examples/06.jpg similarity index 100% rename from examples/06.jpg rename to assets/examples/06.jpg diff --git a/examples/07.jpg b/assets/examples/07.jpg similarity index 100% rename from examples/07.jpg rename to assets/examples/07.jpg diff --git a/examples/08.jpg b/assets/examples/08.jpg similarity index 100% rename from examples/08.jpg rename to assets/examples/08.jpg diff --git a/examples/09.jpg b/assets/examples/09.jpg similarity index 100% rename from examples/09.jpg rename to assets/examples/09.jpg diff --git a/examples/10.jpg b/assets/examples/10.jpg similarity index 100% rename from examples/10.jpg rename to assets/examples/10.jpg diff --git a/examples/11.jpg b/assets/examples/11.jpg similarity index 100% rename from examples/11.jpg rename to assets/examples/11.jpg diff --git a/assets/examples/cc-100.jpg b/assets/examples/cc-100.jpg new file mode 100644 index 0000000..5243dac Binary files /dev/null and b/assets/examples/cc-100.jpg differ diff --git a/assets/examples/random.jpg b/assets/examples/random.jpg new file mode 100644 index 0000000..cd585bd Binary files /dev/null and b/assets/examples/random.jpg differ diff --git a/assets/fonts.csv b/assets/fonts.csv new file mode 100644 index 0000000..ba75656 --- /dev/null +++ b/assets/fonts.csv @@ -0,0 +1,3 @@ +font_path,supported_chars,num_chars,label +Noto Sans JP Medium 500.otf,"!""#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~¡¢£¥§©«¬®°±·»¿□▲△▼▽◆◇○◎●◯★☆☉♀♂♠♡♣♥♦♪♭♯、。〃々〆〇〈〉《》「」『』【】〒〔〕〜〝〟〰ぁあぃいぅうぇえぉおかがきぎくぐけげこごさざしじすずせぜそぞただちぢっつづてでとどなにぬねのはばぱひびぴふぶぷへべぺほぼぽまみむめもゃやゅゆょよらりるれろゎわゐゑをん゙゚ゝゞ゠ァアィイゥウェエォオカガキギクグケゲコゴサザシジスズセゼソゾタダチヂッツヅテデトドナニヌネノハバパヒビピフブプヘベペホボポマミムメモャヤュユョヨラリルレロヮワヰヱヲンヴヵヶ・ー㆑ㇱㇷㇼ一丁七万丈三上下不与丑且丕世丘丙丞両並个中串丸丹主丼乂乃久么之乍乎乏乖乗乘乙九乞也乱乳乾亀了予争事二于云互五井亘亙些亜亞亡亢交亥亦亨享京亭亮人什仁仄仇今介仍仏仔仕他仗付仙代令以仮仰仲件价任份企伊伍伎伏伐休会伝伯伴伶伸伺似伽佃但佇位低住佐佑体何佗余佚佛作佞佟你佩佬佰佳併佼使侂侃來侈例侍侏侑侗侘供依侠価侭侮侯侵侶便俀係促俄俊俎俑俗俘俚保俟俠信俣修俯俱俳俵俶俸俺倅倉個倍們倒倖候倚借倡倣値倦倧倩倪倫倭倶倹偃假偈偉偏偕做停健偲側偵偶偽傀傅傍傑傕傘備催傭傲傳債傷傾僅僉僊働像僑僕僖僚僥僧僭僻儀儁儂億儋儒儔儘儚償儡優儲儺儼兀允元兄充兆兇先光克兌免兎児兒兗党兜入內全兩兪八公六兮共兴兵其具典兼冀内円冉冊册再冏冑冒冕冗写冠冤冥冨冪冬冰冲决冴冶冷冼凄准凉凋凌凍凛凜凝几凡処凧凪凰凱凶凸凹出函刀刁刃刄分切刈刊刎刑列初判別利刪到刳制刷券刹刺刻剃剄則削剋剌前剖剛剝剣剤剥剪副剰剱割創剽劃劇劈劉劍劔力功加劣助努劫劭励労効劾勁勃勅勇勉勒動勗勘務勛勝募勢勣勤勧勲勳勺勾勿匁匂包匈匍匐匕化北匙匝匠匡匣匪匯匱匹区医匿區十千升午卉半卍卑卒卓協南単博卜卞占卡卦卬卯印危即却卵卷卸卿厄厘厙厚厝原厠厥厦厨厩厭厲厳去参參又叉及友双反収叔取受叙叛叟叡叢口古句叩只叫召叭可台叱史右叶号司吃各合吉吊吋同名后吏吐向吒君吝吟吠否含吳吶吸吹吻吼吽吾呀呂呆呈呉告呑呟周呪呰味呵呻呼命咀咄咆咋和咎咒咢咤咥咨咩咫咬咲咳咸咽哀品哈哉員哥哨哩哭哮哲哺唄唆唇唐唖唯唱唸唹唾啄商問啓啖啜啤啦啼喀善喆喇喉喊喋喘喙喚喜喝喧喩喪喫喬喰営嗄嗅嗇嗚嗜嗟嗣嗽嘆嘉嘎嘔嘗嘘嘩嘯嘱嘲嘴噂噌噛器噲噴噺嚆嚇嚢嚥嚮嚴嚶嚼囁囂囃囎囚四回因団园囮困囲図固国囿圀圃圉國圏園圓圖團土圧在圩圭地圳圻址坂均坊坎坏坐坑坡坤坦坪坳垂型垓垠垢垣埃埈埋城埒埔埕埜域埠埣埴埵執培基埼堀堂堅堆堕堡堤堪堯堰報場堵堺塀塁塊塑塔塗塘塙塚塞塢塩填塵塹塾境墅墉墓増墜墟墨墩墳墺墻墾壁壇壊壌壕壙壜壢壩士壬壮声壱売壷壹壺壽変夏夔夕外夙多夛夜夢大天太夫夭央失夷夾奄奇奈奉奎奏奐契奔奕套奘奚奠奢奥奧奨奪奭奮女奴奸好如妃妄妊妍妓妖妙妤妥妨妬妲妹妻妾姆姉始姐姑姓委姚姜姥姦姨姪姫姶姻姿威娃娑娘娜娟娠娥娩娯娶娼婁婆婉婕婚婢婦婷婿媒媚媛媽嫁嫉嫌嫡嫦嫩嬉嬌嬛嬢嬪嬬嬰嬲嬴嬶孁子孔孕字存孚孜孝孟季孤学孩孫孵學孺宅宇守安宋完宍宏宕宗官宙定宛宜宝実客宣室宥宦宮宰害宴宵家宸容宿寂寄寅密寇富寒寓寔寛寝察寡實寧寨審寫寬寮寳寵寶寸寺対寿封専射将將專尉尊尋對導小少尓尖尚尤尭就尸尹尺尻尼尽尾尿局屁居屈届屋屍屎屏屑屓展属屠層履屬屯山岌岐岑岔岡岩岫岬岱岳岷岸峅峙峠峡峨峪峯峰峴島峻崇崋崎崑崔崖崗崙崧崩嵆嵊嵌嵐嵜嵩嵬嵯嶋嶌嶝嶠嶷嶺嶼嶽巌巍巒巖川州巡巣工左巧巨巫差己已巳巴巷巻巽巾市布帆希帖帙帚帛帝帥師席帯帰帳帶帷常帽幀幄幅幇幌幔幕幟幡幢幣干平年并幸幹幺幻幼幽幾广庁広庄庇床序底店庚府庠度座庫庭庵庶康庸庾廂廃廆廈廉廊廓廖廙廞廟廠廣廩廬廳延廷建廻廼廿弁弄弈弉弊弋弌式弐弑弓弔引弖弗弘弛弟弥弦弧弩弯弱張強弼弾彅彊彌彎当彗彙彝彡形彤彦彧彩彪彫彬彭彰影彷役彼彿往征徂径待徊律後徐徒従得徘徙從徠御徧徨復循徭微徳徴德徹徽心必忌忍志忘忙応忠快忱念忻忽怒怖怜思怠怡急性怨怪怯恂恆恋恍恐恒恕恢恣恤恥恨恩恪恫恬恭息恰恵悉悌悍悔悛悝悟悠患悦悧悩悪悲悳悴悶悸悼情惇惑惚惜惟惠惡惣惧惨惰想惹惺愁愈愉愍意愔愕愚愛感愴愷愼愾愿慄慇慈慊態慌慎慕慟慢慣慧慨慮慰慶慾憂憊憎憐憑憔憚憤憧憩憫憬憮憲憶憺憾懃懇應懊懋懐懣懲懸懺懼懿戀戈戊戌戍戎成我戒戔或戚戛戟戡戦戩截戮戯戰戴戸戻房所扁扇扈扉手才扎打払托扮扱扶批扼承技抄抉把抑抒投抗折抜択披抱抵抹押抽担拉拌拍拐拒拓拔拗拘拙招拝拠拡括拭拮拯拱拳拵拶拷拼拾拿持挂指按挑挖挙挟挨挫振挹挺挽挾挿捉捌捏捐捕捗捜捧捨捩据捲捷捺捻掃授掌掏排掖掘掛掟掠採探掣接控推掩措掬掲掴掻掾揃揄揆揉描提揖揚換握揮援揶揺損搗搦搬搭携搾摂摔摘摠摩摯摸摺撃撈撒撓撚撞撤撥撫播撮撰撲撹撻撼擁擂擅操擢擦擬擱擲擾攀攘攝攣攪攫攬支收攷攸改攻放政故效敎敏救敖敗教敞敢散敦敬数敲整敵敷數斂斃文斉斌斎斐斑斗料斛斜斟斡斤斥斧斫斬断斯新方於施旁旅旋旌族旒旗旛无既日旦旧旨早旬旭旱旺旻昀昂昆昇昉昊昌明昏易昔昕星映春昧昨昭是昱昴昵昶昼晁時晃晄晋晏晒晟晤晦晧晨晩普景晰晴晶智暁暇暈暉暎暐暑暖暗暘暢暦暫暮暲暴暹曁曄曇曉曖曙曜曝曠曰曲曳更曷書曹曺曼曽曾替最會月有朋服朐朔朕朗望朝期朦朧木未末本札朮朱朴朶机朽杁杉李杏材村杓杖杙杜杞束条杢杣来杭杮杯杰東杲杳杵杷杼松板枇枉枋析枓枕林枚果枝枠枡枢枯枳架枷枹柄柊柏某柑染柔柘柚柞柩柯柱柳柴柵査柾柿栂栃栄栓栖栗栞校栢栩株栴核根格栽桀桁桂桃框案桐桑桓桔桙桜桝桟桧桴桶桿梁梃梅梓梔梗條梟梠梢梧梨梭梯械梱梳梵梶棄棋棍棒棕棗棘棚棟棠棣森棲棹棺椀椅椋植椎椏椒椙椚椛検椰椴椿楊楓楔楕楚楠楡楢楨楫業楮楯楳極楷楸楼楽概榊榎榑榔榕榛榜榧榮榴槃槇槊構槌槍槎槐槓様槙槨槻槽槿樂樅樊樋樒樓樗標樟模樣権横樫樵樹樺樽橇橈橋橘橙機橡橿檀檄檎檗檜檢檣檬檮檳檸檻櫂櫃櫓櫚櫛櫟櫨櫻欄欅權欒欠次欣欧欲欺欽款歆歌歎歓歙止正此步武歩歪歯歳歴歸死歿殆殉殊残殖殘殤殲殴段殷殺殻殼殿毀毅毋母毎毒毓比毖毗毘毛毫毬毯氈氏氐民気氣水氷永氾汀汁求汎汐汕汗汚汜汝江池汪汰汲汴汶決汽汾沁沂沃沅沈沌沐沓沔沖沙沛没沢沪沫沮沱河沸油治沼沽沾沿況泄泉泊泌泓法泗泛泠泡波泣泥注泪泮泰泳洄洋洌洒洗洙洛洞津洩洪洮洱洲洵洸活洽派流浄浅浙浚浜浣浦浩浪浬浮浴海浸涂涅涇消涌涎涙涛涜涪涯液涵涸涼涿淀淄淅淆淇淋淑淘淝淞淡淤淦淨淪淫淮深淳淵混淸淹淺添清渇済渉渋渓渕渙渚減渝渟渠渡渣渤渥渦温渫測渭港游渺渾湊湍湖湘湛湟湣湧湫湯湾湿満源準溜溝溟溢溥溪溯溶溺滄滅滇滉滋滎滑滓滔滕滝滞滬滲滴滷滸滾滿漁漂漆漉漏漑演漕漠漢漣漫漬漱漲漳漸漿潁潅潔潘潜潟潤潭潮潰潴潼澁澂澄澈澎澗澤澧澪澱澳澹激濁濂濃濊濘濟濠濡濤濫濬濮濯濰濱濵濾瀉瀋瀏瀑瀕瀘瀚瀛瀝瀞瀟瀧瀬瀾灌灘灞灣灤火灯灰灸灼災炅炉炊炎炒炙炫炬炭炮炯炳炸点為烈烏烙烟烹烽焉焔焙焚無焦焰然焼煉煌煎煒煕煖煙煜煤煥照煨煩煬煮煽熈熊熏熔熕熙熟熨熱熹熾燁燃燈燎燐燕燗營燥燦燧燭燮燵燻燼燾燿爆爛爨爪爬爲爵父爺爻爽爾牆片版牌牒牘牙牛牝牟牡牢牧物牲特牽犀犁犂犍犠犢犬犯状狂狄狆狐狗狙狛狡狩独狭狸狼狽猊猗猛猜猟猥猩猪猫献猴猶猷猾猿獄獅獏獠獣獨獪獰獲獺玄率玉王玖玩玫玲玻珀珂珈珉珊珍珠珥珪班珸現球琅理琉琛琢琥琦琨琪琬琮琰琲琳琴琵琶琺琿瑀瑁瑋瑕瑗瑙瑚瑛瑜瑞瑟瑠瑣瑤瑩瑪瑯瑰瑳瑶瑾璃璇璋璐璜璞璟璠璧璩環璽璿瓊瓌瓏瓔瓘瓚瓜瓠瓢瓦瓶瓷甄甌甑甕甘甚甜生產産甥甦用甫甯田由甲申男甸町画界畏畑畔留畚畜畝畠畢畤略畦番畫異畳當畷畸畿疆疇疋疎疏疑疝疣疫疱疲疵疸疹疼疽疾病症痍痒痔痕痘痙痛痢痣痩痰痴痺瘍瘡瘢瘤瘴瘻療癇癌癒癖癩癪癬癲癸発登發白百的皆皇皋皎皐皓皖皝皮皴皺皿盂盃盆盈益盒盗盛盞盟監盤盥盧盪目盲直相盾省眈眉看県眞真眠眩眷眸眺眼着睡睢督睦睨睫睺睾睿瞑瞞瞥瞬瞭瞰瞳瞻瞼瞽瞿矗矛矜矢矣知矧矩短矮矯石砂砌砒研砕砥砦砧砲破砺砿硝硫硬硯碁碇碌碍碑碓碕碗碣碧碩碭確碼碾磁磊磋磐磔磧磨磯磾礁礎礒礙礦礪礫礬示礼礽社祀祁祇祈祉祐祓祖祗祚祜祝神祟祠祢祥票祭祷祺禁禄禅禊禍禎福禕禦禧禪禮禰禹禺禽禾禿秀私秉秋科秒秘租秣秤秦秩称移稀稈程稍税稔稗稙稚稜稟稠種稲稷稻稼稽稿穀穂穆積穎穏穢穣穫穴究穹空穿突窃窄窈窒窓窘窟窠窩窪窮窯窺竃竄竇竈立站竜竟章竣童竪端竴競竹竺竿笄笈笊笏笑笘笙笛笞笠笥符第笹筆筈等筋筌筍筏筐筑筒答策筝筠筥筧筬筮筰筵筺箆箇箋箍箏箒箔箕算箙箚管箪箭箱箴箸節篁範篆篇築篋篝篠篤篥篩篪篭篳簀簑簒簗簡簪簫簸簾簿籃籌籍籐籔籟籠籤籬米籾粁粂粃粉粋粍粒粕粗粘粛粟粤粥粧粲粽精糀糊糎糖糜糞糟糠糧糸糺系糾紀紂約紅紆紇紊紋納紐純紗紘紙級紛素紡索紫紬紮累細紳紹紺終絃組絅絆経結絞絡絢絣給絨統絲絳絵絶絹綏經継続綜綝綠綫綬維綰綱網綴綵綸綺綻綽綾綿緊緋総緑緒緘線緝緞締編緩緬緯練緻縁縄縅縉縊縋縒縛縝縞縢縣縦縫縮縯縵縷縹縺總績繁繆繇繊繋繍織繕繚繞繪繭繰繹繼纂續纏纒纓缶罐网罔罕罘罠罪罫置罰署罵罷罹羂羅羆羊羋羌美羚羞羣群羨義羯羲羹羽翁翅翊翌翎習翔翟翠翡翦翫翰翳翹翻翼耀老考者耆而耐耕耗耘耳耶耽耿聊聖聘聚聞聟聡聯聰聲聳聴聶職聾肄肆肇肉肋肌肖肘肛肜肝股肢肥肩肪肯肱育肴肺胃胆背胎胖胚胝胞胡胤胥胱胴胸能脂脅脆脇脈脊脚脛脩脱脳脹脾腋腎腐腑腓腔腕腫腰腱腸腹腺腿膀膂膊膏膚膜膝膠膣膨膳膵膺膾膿臀臂臆臈臍臓臘臙臚臣臥臧臨自臭至致臺臻臼舁舂舅與興舊舌舍舎舐舒舗舘舛舜舞舟舩航舫般舳舵舶舷船艀艇艘艙艤艦艮良艱色艶艸艾芋芍芎芒芙芝芥芦芬芭芮芯花芳芷芸芹芻芽芾苅苑苓苔苗苛苞苟若苦苧苫英苴苺苻茂范茄茅茉茎茗茘茜茨茫茯茱茲茶茸茹荀荃草荊荏荒荘荷荻荼莆莇莉莊莎莒莘莚莞莢莫莱莽菁菅菊菌菓菖菘菜菟菩菫華菰菱菲菴萄萇萊萌萍萎萠萩萬萱萸萼落葆葉著葛葡董葦葫葬葭葯葱葵葺蒋蒐蒔蒙蒜蒟蒯蒲蒴蒸蒹蒻蒼蒿蓁蓄蓉蓋蓑蓬蓮蓼蔀蔑蔓蔗蔚蔡蔣蔦蔬蔭蔵蔽蕁蕃蕉蕊蕎蕗蕙蕤蕨蕩蕪蕭蕷蕾薀薁薄薇薈薊薔薗薙薛薦薨薩薪薫薬薭薮薯藁藉藍藏藝藤藥藩藪藷藺藻蘂蘄蘆蘇蘊蘋蘚蘭蘿虎虐虔處虚虜虞號虢虫虹虻蚊蚕蚤蚩蛆蛇蛉蛋蛍蛎蛙蛛蛟蛤蛭蛮蛯蛸蛹蛾蜀蜂蜃蜆蜉蜘蜚蜜蜥蜴蜷蜻蝉蝋蝎蝕蝗蝙蝠蝣蝦蝮蝶蝸蝿螂融螢螳螺蟄蟇蟠蟲蟷蟹蟻蟾蠅蠍蠕蠡蠢蠣蠱血衆行衍衒術街衙衛衝衞衡衢衣表衫衰衷衾衿袁袂袈袋袍袖袞袢被袰袱袴袷袿裁裂裃装裏裒裔裕補裝裟裡裳裴裵裸製裾褄複褌褐褒褚褥褪褶褸褻襄襖襞襟襤襦襲襴襷西要覆覇覈見規視覗覚覧親覯観覺覽觀角觚觜解触言訂訃計訊訌討訓託記訛訝訟訢訣訥訪設許訳訴訶診註証詁詈詐詔評詛詞詠詡詢詣試詧詩詫詭詮詰話該詳詵詹誄誅誇誉誌認誑誓誕誘語誠誡誣誤誥誦誨說説読誰課誹誼誾調談請諌諍諏諒論諜諝諡諦諧諫諭諮諱諳諶諷諸諺諾謀謁謂謄謎謐謔謗謙講謝謡謨謬謳謹證譏識譙譚譜警譬議譲譴護譽讀讃變讎讐讒讓讖谷谺谿豆豉豊豎豐豚象豪豫豬豳豹豺貂貉貊貌貘貝貞負財貢貧貨販貪貫責貯貰貳貴貶買貸費貼貽貿賀賁賂賃賄資賈賊賎賑賓賛賜賞賠賢賣賤賦質賭購賽贄贅贈贋贍贔贖贛赤赦赧赫赭走赳赴起超越趙趣趨足趾跆跋跎跏跗跛距跡跨跪路跳践踊踏踞踪踰踵蹂蹄蹉蹊蹋蹙蹟蹠蹲蹴蹶躁躅躇躊躍躑躓躙身躬躯躰躱躾軀車軋軌軍軒軛軟転軫軸軻軼軽軾較載輌輓輔輛輜輝輦輩輪輯輳輸輻輿轄轅轆轍轟轡轢车辛辜辞辟辣辦辨辭辮辯辰辱農辷辺辻込辿迂迄迅迎运近返迢迥迦迩迪迫迭述迴迷迹追退送逃逅逆逍透逐逓途逖逗這通逝逞速造逡逢連逮週進逵逸逹逼遁遂遅遇遊運遍過遐道達違遙遜遠遡遣遥適遭遮遵遷選遹遺遼遽避邀邁邂邃還邇邈邉邊邏邑邕邙邠邢那邦邨邪邯邱邳邵邸邽邾郁郃郅郊郎郗郛郝郞郡郢郤部郭郯郵郷都鄂鄄鄒鄔鄖鄙鄢鄧鄭鄯鄰鄱鄲鄴酈酉酊酋酌配酎酒酔酘酛酢酩酪酬酵酷酸醂醇醉醍醐醒醗醜醤醪醫醸采釈釉釋里重野量釐金釗釘釜針釣釦釧釵鈍鈎鈑鈔鈕鈞鈴鈷鈺鈿鉄鉅鉈鉉鉋鉗鉛鉞鉢鉤鉦鉱鉾銀銃銅銈銑銓銕銘銚銛銜銭鋏鋒鋤鋪鋭鋲鋳鋸鋺鋼錆錐錕錘錚錠錢錣錦錨錫錬錮錯録鍋鍍鍔鍛鍬鍮鍵鍼鍾鎌鎔鎖鎗鎚鎧鎬鎭鎮鎰鎹鏃鏑鏞鏡鏢鐐鐔鐘鐙鐡鐵鐸鑁鑑鑒鑓鑚鑢鑫鑰鑲鑼鑽鑿長长門閂閃閉開閏閑閒間閔閖閘関閣閤閥閨閩閬閭閲閻閼閾闇闊闍闐闓闕闖闘關闡闢闥阜阪阮阯防阻阿陀陂附陋陌降限陘陛陝陞陟院陣除陥陪陰陳陵陶陷陸険陽隅隆隈隊隋階随隔隕隗隘隙際障隠隣隧隨隴隷隻隼雀雁雄雅集雇雉雌雍雎雑雒雕雖雙雛雜雞離難雨雪雫雰雲零雷雹電需霄霆震霊霍霑霓霖霜霞霧霰露霸霹霽靂靄靈靏青靖静靚靜非靡面革靫靭靱靳靴靺靼鞄鞅鞆鞋鞍鞏鞘鞜鞠鞨鞬鞭鞮鞴韃韋韓韜韮音韶韻響頁頂頃項順須頊頌預頑頒頓頗領頚頠頡頤頬頭頴頷頸頻頼頽顆題額顎顒顓顔顕顗願顛類顥顧顯顰風颪颯飄飛飜食飡飢飫飯飲飴飼飽飾餃餅餉養餌餐餓餘餞餡館饅饉饋饌饒饗首馗香馥馨馬馮馳馴駁駄駅駆駈駐駒駕駙駝駢駱駿騎騏騒験騙騨騫騭騰騸驀驃驍驕驚驛驟驢驤驥驩驪骨骸髄髏髑體高髙髠髢髣髦髪髭髯髴髷髻鬆鬘鬚鬢鬣鬨鬬鬱鬲鬼魁魂魃魄魅魍魎魏魑魔魚魯鮎鮑鮒鮓鮨鮪鮫鮭鮮鯉鯏鯖鯛鯨鯰鯱鯵鰊鰍鰐鰒鰓鰕鰭鰯鰹鰺鰻鱈鱒鱗鳥鳧鳩鳰鳳鳴鳶鴇鴈鴉鴎鴛鴦鴨鴫鴬鴻鵄鵜鵝鵞鵠鵡鵬鵯鵰鵲鵺鶉鶏鶚鶯鶴鶻鷗鷦鷯鷲鷹鷺鸕鸚鸞鹵鹸鹹鹽鹿麁麒麓麗麝麟麥麦麩麵麹麺麻麾麿黃黄黌黍黎黑黒黔默黙黛點鼈鼎鼓鼠鼬鼻鼾齊齋齎齟齢齧齬齮齲龍龐龔龕龗龙龜﨑﨟",5234,regular +SawarabiMincho Regular 400.ttf,"!""#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~¡¢£¥§©«¬®°±·»¿□▲△▼▽◆◇○◎●◯★☆☉♀♂♠♡♣♥♦♪♭♯、。〃〈〉《》「」『』【】〒〔〕〜〝〟ぁあぃいぅうぇえぉおかがきぎくぐけげこごさざしじすずせぜそぞただちぢっつづてでとどなにぬねのはばぱひびぴふぶぷへべぺほぼぽまみむめもゃやゅゆょよらりるれろゎわゐゑをん゙゚ゝゞァアィイゥウェエォオカガキギクグケゲコゴサザシジスズセゼソゾタダチヂッツヅテデトドナニヌネノハバパヒビピフブプヘベペホボポマミムメモャヤュユョヨラリルレロヮワヰヱヲンヴヵヶ・ー一丁七万丈三上下不与丑且世丘丙丞両並中串丸丹主乃久之乍乎乏乗乙九乞也乱乳乾亀了予争事二云互五井亘亙些亜亡交亥亦亨享京亭亮人什仁仇今介仏仔仕他付仙代令以仮仰仲件任企伊伍伎伏伐休会伝伯伴伶伸伺似伽佃但位低住佐佑体何余佚作佳併佼使侃例侍供依侠価侭侮侯侵侶便係促俄俊俗保信俣修俳俵俸俺倉個倍倒候借値倫倶倹偉偏停健偲側偵偶偽傍傑傘備催傭債傷傾働像僑僕僚僧儀億儒償優儲元兄充兆先光克免兎児党兜入全八公六共兵其具典兼内円冊再冒冗写冠冥冬冴冶冷凄准凍凝凡処凧凪凶出函刀刃分切刈刊刑列初判別利到制刷券刺刻則削前剖剛剣剤剥副剰割創劇力功加劣助努劫励労効劾勅勇勉動勘務勝募勢勤勧勲勾勿匂包化北匙匠匡匪匹区医匿十千升午半卑卒卓協南単博占卦卯印危即却卵卸卿厄厘厚原厭厳去参又叉及友双反収叔取受叙叛口古句叩只叫召可台叱史右叶号司吃各合吉吊吋同名后吏吐向君吟吠否含吸吹吻吾呂呆呈呉告呑周呪味呼命咋和咲咳咽哀品哉員哨哩哲唄唇唐唯唱唾啄商問啓善喋喚喜喝喧喪喫営嗣嘆嘉嘘嘱器噴囚四回因団困囲図固国園土圧在圭地坂均坊坑坡坤坦坪垂型垢垣埋城域埴執培基埼堀堂堅堆堕堤堪報場塗塙塚塩填塵塾境墓増墨墳壊壌士壬壮声壱売変夏夕外多夜夢大天太夫央失奇奈奉奏契奥奪奮女奴奸好如妃妊妓妖妙妥妨妬妹妻姉始姐姑姓委姫姶姿威娃娘娠娯娼婦媒嫌嫡嬢子孔字存孝季孤学孫宅宇守安宋完宍宏宕宗官宙定宛宜宝実客宣室宥宮宰害宵家容宿寂寄寅密富寒寛寝察寧審寸寺対寿封専射尊尋導小少尖尚就尺尻尼尽尾尿局居屈届屋屍展属層山岐岡岩岬岳岸峰島崎嵩嶋川州巣工左巧巨差己巳巴巻巾市布帆希帖帝帥師席帯帰帳帷常帽幅幕幡幣干平年幸幹幻幼幽幾庁広庄床序底店府度座庫庭庶康庸廃延廷建廿弁弊式弐弓引弘弟弥弦弧弱張強弾当形彦彩彫彰影役彼往征径待律後徐徒従得御復微徳徴心必忌忍志忘忙応忠快念怒怖怜思怠急性怨怪怯恐恒恨恩息恰恵悔悟患悦悩悪悲悼情惑惜惟惣想惹愉意愚感態慌慎慕慢慣慮憂憎憐憤憧憩憲憶懇懐懲戊戎成我戒或戚戟戦戸戻房所扇扉手才打払托扱扶批承技抄把抑投抗折抜択披抱抵抹押抽担拉拍拐拒拓拘拙招拝拠拡括拭拾持指挑挙挟振挽挿捉捕捗捜捧捨据捻掃授掌排掘掛掟掠採探接控推掩措掲掴描提揚換揮援揺損携摂摘摩撃撞撲操擾支改攻放政故敏救敗教敢散敦敬数整敵文斉斎斑斗料斜斤斥斬断斯新方於施旅旋族旗既日旦旧旨早旬旺昆昇昌明易昔星映春昧昨昭是昼時晩普景晴智暁暑暖暗暦暮暴曜曲更書替最月有朋服朗望朝期木未末本札朱朴机朽杉李杏材村杓杖杜束条杢来杭杯東杵杷松板枇析枕林枚果枝枠枢枯架柄柊柏某柑染柔柘柚柱柴査柿栄栖栗校栢株核根格栽桁桂桃案桐桑桓桔桜桟桧桶梅梢梨械棄棒棟森棺椅椋植椎椒椙検楊業楯極楽概構様槙標模権横樹橋機欠次欣欧欲欺欽歌歓止正此武歩歪歯歳歴死殆殉殊残殖殴段殺殿母毎毒比毛氏民気水氷永氾汀汁求汎汗汚汝江池汰汲決汽沈沌沖沙没沢沫河沸油治沼沿況泉泊泌法泡波泣泥注泰泳洋洗洛洞津洪洲活派流浄浅浜浦浩浪浬浮浴海浸消涌涯液涼淀淋淑淡淫淮深淳淵混添清渇済渉渋渚減渡渦温測港湖湛湧湯湿満源準溝溶溺滅滑滝滞滴漁漂漏演漢漫漬漸潔潜潮澄激濁濡濯瀬火灯灰灸灼災炉炊炎炭点為烈烏焔無焦然焼煉煙煤照煩煮煽熊熟熱燃燈爆爪父爽片版牛牝牡牢牧物牲特犠犬犯状狂狆狐狗狙狛狩独狭狸狼猛猟猫献獄獣獲率玉王玩玲玻珂珍珠班現球理琉琢琳環瓜甘生産甥用田由甲申男町画界畏畑留畜略番異疎疑疲疹疾病症痔痛痩痰痴癌癖発登白百的皆皇皮皿盃盆益盗盛盟監盤目直相省眉看県真眠眺眼着睡督睦瞬瞳矛矢知矩短石砂研砕砧砲破硬碇碍碓碩確磨礁礎示礼社祈祉祐祖祝神祢祥票祭禁禄禅禍禎福禾秀私秋科秒秘租秤秦秩称移程税稚種稲稿穀穂積穏穴究空突窃窒窓窟窮窺立竜章童竪端競竹竺竿笑笛笠符第笹筆筈等筋筑筒答策箇箔算管箱節範築篤簡簿籍米粂粉粋粍粒粕粗粘粛粧精糊糖糞糧糸系糾紀約紅紋納純紗紙級紛素紡索紫累細紳紹紺終組経結絞絡絢給統絵絶絹継続綬維綱網綴綻綿緊総緑緒線締編緩練縁縄縦縮績繁繍織繰缶罪罫置罰署罵羊美群義羽翌習翼老考者而耐耕耗耳耶耽聞聴職肉肋肌肖肘肛肝股肢肥肩肪肯育肺胃胆背胎胞胡胴胸能脂脅脇脈脚脱脳腎腐腔腕腫腰腸腹腺膜臆臓臣臨自臭至致臼興舌舎舗舘舞舟航般舵舶船艦良色芋芙芝芦芭芯花芳芸芹芽苑苔苗苛若苦苫英茂茄茜茨茶草荏荒荘荷荻菅菊菌菓菖菜華萩落葉著蒔蒸蒼蔦蔵蕉薄薪薬藍藤藩藻蘇虎虐虚虜虫虹虻蚊蚕蚤蛆蛇蛍蛙蛛蛤蛭蛮蛸蛾蜂蜘蝉蝋蝎蝕蝮蝿蟻血衆行術街衛衝衣表衰衷衿袈袋袖被袷裁裂装裏裕補裟裡裸製複褐襲西要見規視覚覧親観角解触言訂計訊討訓託記訟訣訪設許訳訴診註証詐詔評詞詣試詩詫詰話該詳誇誉誌認誓誕誘語誠誤説読誰課調談請諌論諦諭諸諾謀謎謙講謝謡謹識譜警議譲護讃谷豆豊豚象豹貝貞負財貢貧貨販貫責貯貰貴買貸費貼貿賀賂賃賄資賊賑賛賜賞賠賢質賭購贈贋赤走赴起超越趣足距跡路跳践踊踏躍身躯車軋軌軍軒軟転軸軽較載輔輝輩輪輸辛辞辰辱農辺辻込辿迂迄迅迎近返迫述迷追退送逃逆透逐途這通逝速造逢連逮週進逸遂遅遇遊運遍過道達違遠遣適選遺避邑邦邪邸郁郊郎郡部郭郵郷都酉酌配酎酒酔酢酪酬酵酷酸醜采釈里重野量金釘針釣釧鈍鈎鈴鈷鉄鉛鉢鉦鉱銀銃銅銑銘銚銭鋭鋼錆錐錠錦錬錯録鍋鎌鎖鎧鎮鏑鏡鑑長門閃閉開閏閑間関閣阜阪防阻阿陀附降限陛院陣除陥陪陰陳陸険陽隅隆隈隊階隔際障隠隣隻隼雀雁雄雅集雇雌雑離難雨雪雫雰雲雷電需震霜青靖静非面革靴鞄鞘韓音頁頂頃項順須預頑領頭頼題額顔願類顧風飛食飢飯飲飴飼飽飾養餌館首香馬馳馴駄駅駆駈駐駒駕駿騎騒験騨骨骸高髪髭鬼魅魔魚鮎鮒鮪鮫鮭鮮鯉鯖鯛鯨鯵鰻鱈鳥鳳鳴鳶鴨鶴鹿麗麦麺麻黄黒黙黛鼎鼓鼠鼻齢龍",2564,regular diff --git a/assets/len_to_p.csv b/assets/len_to_p.csv new file mode 100644 index 0000000..c11df9d --- /dev/null +++ b/assets/len_to_p.csv @@ -0,0 +1,251 @@ +len,p +1,0.014734972701616804 +2,0.05048222747773489 +3,0.05624961536094529 +4,0.05972235654062228 +5,0.05244278768803355 +6,0.05518581363248727 +7,0.046578690556781516 +8,0.04875025276280738 +9,0.04442471185039959 +10,0.04181356215327536 +11,0.040160713186745564 +12,0.041162972666449804 +13,0.03785727473339019 +14,0.03527250028573187 +15,0.03326798132632338 +16,0.0307271656277749 +17,0.028151182929938547 +18,0.025794993977651372 +19,0.024731192249193356 +20,0.021856290057410126 +21,0.021135366572008825 +22,0.019113264112956403 +23,0.017073578154260045 +24,0.015992192926158093 +25,0.013952506967461734 +26,0.012572202245412905 +27,0.011288606771405713 +28,0.009758842302383443 +29,0.008993960067872309 +30,0.008176327334429372 +31,0.0072356101034788955 +32,0.006919107109888081 +33,0.005978389878937605 +34,0.004712377904574347 +35,0.00467721090528648 +36,0.004220039914544191 +37,0.003463949429855024 +38,0.003358448431991419 +39,0.003059528938044539 +40,0.00263752494659012 +41,0.0021891457056697995 +42,0.002364980702109141 +43,0.002013310709230458 +44,0.0019341849608327545 +45,0.0013099707234730928 +46,0.0013363459729389942 +47,0.001204469725609488 +48,0.0011341357270337517 +49,0.0008967584818406407 +50,0.000914341981484575 +51,0.000914341981484575 +52,0.0007736739843331018 +53,0.0006505894868255629 +54,0.0006681729864694971 +55,0.0005011297398521228 +56,0.0005714637384278593 +57,0.00044837924092032037 +58,0.000395628741988518 +59,0.00031650299359081436 +60,0.00031650299359081436 +61,0.0002813359943029461 +62,0.00026375249465901196 +63,0.0002725442444809791 +64,0.00020221024590524252 +65,0.00032529474341278143 +66,0.00023737724519311078 +67,0.00023737724519311078 +68,0.00022858549537114374 +69,0.00020221024590524252 +70,0.00012308449750753894 +71,0.00010550099786360479 +72,8.791749821967066e-05 +73,0.00012308449750753894 +74,0.00011429274768557187 +75,7.912574839770359e-05 +76,3.516699928786826e-05 +77,7.033399857573652e-05 +78,8.791749821967066e-05 +79,3.516699928786826e-05 +80,2.6375249465901198e-05 +81,6.154224875376947e-05 +82,0.00011429274768557187 +83,7.033399857573652e-05 +84,5.2750498931802396e-05 +85,4.395874910983533e-05 +86,3.516699928786826e-05 +87,8.791749821967066e-05 +88,6.154224875376947e-05 +89,1.758349964393413e-05 +90,1.758349964393413e-05 +91,1.758349964393413e-05 +92,8.791749821967065e-06 +93,3.516699928786826e-05 +94,2.6375249465901198e-05 +95,2.6375249465901198e-05 +96,1.758349964393413e-05 +97,1.758349964393413e-05 +98,4.395874910983533e-05 +99,4.395874910983533e-05 +100,8.791749821967065e-06 +101,8.791749821967065e-06 +102,2.6375249465901198e-05 +103,2.6375249465901198e-05 +104,8.791749821967065e-06 +105,8.791749821967065e-06 +106,1.758349964393413e-05 +107,1.758349964393413e-05 +108,8.791749821967065e-06 +109,8.791749821967065e-06 +110,8.791749821967065e-06 +111,8.791749821967065e-06 +112,8.791749821967065e-06 +113,8.791749821967065e-06 +114,3.516699928786826e-05 +115,2.6375249465901198e-05 +116,2.6375249465901198e-05 +117,2.6375249465901198e-05 +118,8.791749821967065e-06 +119,8.791749821967065e-06 +120,8.791749821967065e-06 +121,8.791749821967065e-06 +122,1.758349964393413e-05 +123,8.791749821967065e-06 +124,8.791749821967065e-06 +125,8.791749821967065e-06 +126,1.758349964393413e-05 +127,1.758349964393413e-05 +128,1.758349964393413e-05 +129,1.758349964393413e-05 +130,1.758349964393413e-05 +131,8.791749821967065e-06 +132,1.758349964393413e-05 +133,8.791749821967065e-06 +134,8.791749821967065e-06 +135,8.791749821967065e-06 +136,8.791749821967065e-06 +137,8.791749821967065e-06 +138,8.791749821967065e-06 +139,8.791749821967065e-06 +140,8.791749821967065e-06 +141,8.791749821967065e-06 +142,8.791749821967065e-06 +143,8.791749821967065e-06 +144,8.791749821967065e-06 +145,8.791749821967065e-06 +146,8.791749821967065e-06 +147,8.791749821967065e-06 +148,8.791749821967065e-06 +149,8.791749821967065e-06 +150,8.791749821967065e-06 +151,8.791749821967065e-06 +152,8.791749821967065e-06 +153,8.791749821967065e-06 +154,8.791749821967065e-06 +155,8.791749821967065e-06 +156,8.791749821967065e-06 +157,8.791749821967065e-06 +158,8.791749821967065e-06 +159,8.791749821967065e-06 +160,8.791749821967065e-06 +161,8.791749821967065e-06 +162,8.791749821967065e-06 +163,8.791749821967065e-06 +164,8.791749821967065e-06 +165,8.791749821967065e-06 +166,8.791749821967065e-06 +167,8.791749821967065e-06 +168,8.791749821967065e-06 +169,8.791749821967065e-06 +170,8.791749821967065e-06 +171,8.791749821967065e-06 +172,8.791749821967065e-06 +173,8.791749821967065e-06 +174,8.791749821967065e-06 +175,8.791749821967065e-06 +176,8.791749821967065e-06 +177,8.791749821967065e-06 +178,8.791749821967065e-06 +179,8.791749821967065e-06 +180,8.791749821967065e-06 +181,8.791749821967065e-06 +182,8.791749821967065e-06 +183,8.791749821967065e-06 +184,8.791749821967065e-06 +185,8.791749821967065e-06 +186,8.791749821967065e-06 +187,8.791749821967065e-06 +188,8.791749821967065e-06 +189,8.791749821967065e-06 +190,8.791749821967065e-06 +191,8.791749821967065e-06 +192,8.791749821967065e-06 +193,8.791749821967065e-06 +194,8.791749821967065e-06 +195,8.791749821967065e-06 +196,8.791749821967065e-06 +197,8.791749821967065e-06 +198,8.791749821967065e-06 +199,8.791749821967065e-06 +200,8.791749821967065e-06 +201,8.791749821967065e-06 +202,8.791749821967065e-06 +203,8.791749821967065e-06 +204,8.791749821967065e-06 +205,8.791749821967065e-06 +206,8.791749821967065e-06 +207,8.791749821967065e-06 +208,8.791749821967065e-06 +209,8.791749821967065e-06 +210,8.791749821967065e-06 +211,8.791749821967065e-06 +212,8.791749821967065e-06 +213,8.791749821967065e-06 +214,8.791749821967065e-06 +215,8.791749821967065e-06 +216,8.791749821967065e-06 +217,8.791749821967065e-06 +218,8.791749821967065e-06 +219,8.791749821967065e-06 +220,8.791749821967065e-06 +221,8.791749821967065e-06 +222,8.791749821967065e-06 +223,8.791749821967065e-06 +224,8.791749821967065e-06 +225,8.791749821967065e-06 +226,8.791749821967065e-06 +227,8.791749821967065e-06 +228,8.791749821967065e-06 +229,8.791749821967065e-06 +230,8.791749821967065e-06 +231,8.791749821967065e-06 +232,8.791749821967065e-06 +233,8.791749821967065e-06 +234,8.791749821967065e-06 +235,8.791749821967065e-06 +236,8.791749821967065e-06 +237,8.791749821967065e-06 +238,8.791749821967065e-06 +239,8.791749821967065e-06 +240,8.791749821967065e-06 +241,8.791749821967065e-06 +242,8.791749821967065e-06 +243,8.791749821967065e-06 +244,8.791749821967065e-06 +245,8.791749821967065e-06 +246,8.791749821967065e-06 +247,8.791749821967065e-06 +248,8.791749821967065e-06 +249,8.791749821967065e-06 +250,8.791749821967065e-06 diff --git a/assets/lines_example.csv b/assets/lines_example.csv new file mode 100644 index 0000000..2ef32dc --- /dev/null +++ b/assets/lines_example.csv @@ -0,0 +1,6 @@ +source,id,line +cc-100,cc-100_446088,発展を遂げた貨幣経済に対して、後戻りする形の改革が、民衆に受け入れられるはずもありません。 +cc-100,cc-100_446387,東京都渋谷区本町1丁目4−14 ホームヘルパー(パート:茂原) +cc-100,cc-100_446430,同時に、発表しあう場を増やしたいです。まず、自分の考えを発表するためには、しっかりと自分の考えを持っていなくてはいけません。そのために、ますますノートの必要性を感じることでしょう。また、質問や意見に答えることで、考えが深まります。友達の意見を聞くことが、より理解を深めることを実感してほしいです。 +cc-100,cc-100_446493,※特典の数に限りがございますので、対象商品はお早めにお買い求めください。特典は無くなり次第終了となります。 +cc-100,cc-100_446543,ハリウッドスターってもっと豪華な生活を送っているのかと思えば、キアヌ・リーブスってかなり質素なんですね。 diff --git a/assets/vocab.csv b/assets/vocab.csv new file mode 100644 index 0000000..331a497 --- /dev/null +++ b/assets/vocab.csv @@ -0,0 +1,5451 @@ +id,char +15,! +16,"""" +17,# +18,$ +19,% +20,& +21,' +22,( +23,) +24,* +25,+ +26,"," +27,- +28,. +29,/ +30,0 +31,1 +32,2 +33,3 +34,4 +35,5 +36,6 +37,7 +38,8 +39,9 +40,: +41,; +42,< +43,= +44,> +45,? +46,@ +47,A +48,B +49,C +50,D +51,E +52,F +53,G +54,H +55,I +56,J +57,K +58,L +59,M +60,N +61,O +62,P +63,Q +64,R +65,S +66,T +67,U +68,V +69,W +70,X +71,Y +72,Z +73,[ +74,\ +75,] +76,^ +77,_ +78,` +79,a +80,b +81,c +82,d +83,e +84,f +85,g +86,h +87,i +88,j +89,k +90,l +91,m +92,n +93,o +94,p +95,q +96,r +97,s +98,t +99,u +100,v +101,w +102,x +103,y +104,z +105,{ +106,| +107,} +108,~ +109,¡ +110,¢ +111,£ +112,¥ +113,§ +114,© +115,« +116,¬ +117,® +118,° +119,± +120,· +121,» +122,¿ +800,□ +801,▲ +802,△ +803,▼ +804,▽ +805,◆ +806,◇ +807,○ +808,◎ +809,● +810,◯ +811,★ +812,☆ +813,☉ +814,♀ +815,♂ +816,♠ +817,♡ +818,♣ +819,♥ +820,♦ +821,♪ +822,♭ +823,♯ +824,✩ +825,⟨ +826,⟩ +827,ⴰ +828,、 +829,。 +830,〃 +831,々 +832,〆 +833,〇 +834,〈 +835,〉 +836,《 +837,》 +838,「 +839,」 +840,『 +841,』 +842,【 +843,】 +844,〒 +845,〔 +846,〕 +847,〜 +848,〝 +849,〟 +850,〰 +851,ぁ +852,あ +853,ぃ +854,い +855,ぅ +856,う +857,ぇ +858,え +859,ぉ +860,お +861,か +862,が +863,き +864,ぎ +865,く +866,ぐ +867,け +868,げ +869,こ +870,ご +871,さ +872,ざ +873,し +874,じ +875,す +876,ず +877,せ +878,ぜ +879,そ +880,ぞ +881,た +882,だ +883,ち +884,ぢ +885,っ +886,つ +887,づ +888,て +889,で +890,と +891,ど +892,な +893,に +894,ぬ +895,ね +896,の +897,は +898,ば +899,ぱ +900,ひ +901,び +902,ぴ +903,ふ +904,ぶ +905,ぷ +906,へ +907,べ +908,ぺ +909,ほ +910,ぼ +911,ぽ +912,ま +913,み +914,む +915,め +916,も +917,ゃ +918,や +919,ゅ +920,ゆ +921,ょ +922,よ +923,ら +924,り +925,る +926,れ +927,ろ +928,ゎ +929,わ +930,ゐ +931,ゑ +932,を +933,ん +934,゙ +935,゚ +936,ゝ +937,ゞ +938,゠ +939,ァ +940,ア +941,ィ +942,イ +943,ゥ +944,ウ +945,ェ +946,エ +947,ォ +948,オ +949,カ +950,ガ +951,キ +952,ギ +953,ク +954,グ +955,ケ +956,ゲ +957,コ +958,ゴ +959,サ +960,ザ +961,シ +962,ジ +963,ス +964,ズ +965,セ +966,ゼ +967,ソ +968,ゾ +969,タ +970,ダ +971,チ +972,ヂ +973,ッ +974,ツ +975,ヅ +976,テ +977,デ +978,ト +979,ド +980,ナ +981,ニ +982,ヌ +983,ネ +984,ノ +985,ハ +986,バ +987,パ +988,ヒ +989,ビ +990,ピ +991,フ +992,ブ +993,プ +994,ヘ +995,ベ +996,ペ +997,ホ +998,ボ +999,ポ +1000,マ +1001,ミ +1002,ム +1003,メ +1004,モ +1005,ャ +1006,ヤ +1007,ュ +1008,ユ +1009,ョ +1010,ヨ +1011,ラ +1012,リ +1013,ル +1014,レ +1015,ロ +1016,ヮ +1017,ワ +1018,ヰ +1019,ヱ +1020,ヲ +1021,ン +1022,ヴ +1023,ヵ +1024,ヶ +1025,・ +1026,ー +1027,㆑ +1028,ㇱ +1029,ㇷ +1030,ㇼ +1031,一 +1032,丁 +1033,七 +1034,万 +1035,丈 +1036,三 +1037,上 +1038,下 +1039,不 +1040,与 +1041,丑 +1042,且 +1043,丕 +1044,世 +1045,丘 +1046,丙 +1047,东 +1048,丞 +1049,両 +1050,並 +1051,个 +1052,中 +1053,串 +1054,丸 +1055,丹 +1056,主 +1057,丼 +1058,乂 +1059,乃 +1060,久 +1061,么 +1062,之 +1063,乍 +1064,乎 +1065,乏 +1066,乐 +1067,乖 +1068,乗 +1069,乘 +1070,乙 +1071,九 +1072,乞 +1073,也 +1074,乭 +1075,乱 +1076,乳 +1077,乾 +1078,亀 +1079,了 +1080,予 +1081,争 +1082,事 +1083,二 +1084,于 +1085,云 +1086,互 +1087,五 +1088,井 +1089,亘 +1090,亙 +1091,些 +1092,亜 +1093,亞 +1094,亡 +1095,亢 +1096,交 +1097,亥 +1098,亦 +1099,亨 +1100,享 +1101,京 +1102,亭 +1103,亮 +1104,人 +1105,什 +1106,仁 +1107,仄 +1108,仇 +1109,今 +1110,介 +1111,仍 +1112,仏 +1113,仔 +1114,仕 +1115,他 +1116,仗 +1117,付 +1118,仙 +1119,代 +1120,令 +1121,以 +1122,仮 +1123,仰 +1124,仲 +1125,件 +1126,价 +1127,任 +1128,份 +1129,企 +1130,伊 +1131,伍 +1132,伎 +1133,伏 +1134,伐 +1135,休 +1136,会 +1137,伝 +1138,伯 +1139,伴 +1140,伶 +1141,伸 +1142,伺 +1143,似 +1144,伽 +1145,佃 +1146,但 +1147,佇 +1148,位 +1149,低 +1150,住 +1151,佐 +1152,佑 +1153,体 +1154,何 +1155,佗 +1156,余 +1157,佚 +1158,佛 +1159,作 +1160,佞 +1161,佟 +1162,你 +1163,佩 +1164,佬 +1165,佰 +1166,佳 +1167,併 +1168,佼 +1169,使 +1170,侂 +1171,侃 +1172,來 +1173,侈 +1174,例 +1175,侍 +1176,侏 +1177,侑 +1178,侗 +1179,侘 +1180,供 +1181,依 +1182,侠 +1183,価 +1184,侭 +1185,侮 +1186,侯 +1187,侵 +1188,侶 +1189,便 +1190,俀 +1191,係 +1192,促 +1193,俄 +1194,俊 +1195,俎 +1196,俑 +1197,俗 +1198,俘 +1199,俚 +1200,保 +1201,俟 +1202,俠 +1203,信 +1204,俣 +1205,修 +1206,俯 +1207,俱 +1208,俳 +1209,俵 +1210,俶 +1211,俸 +1212,俺 +1213,倅 +1214,倉 +1215,個 +1216,倍 +1217,們 +1218,倒 +1219,倖 +1220,候 +1221,倚 +1222,借 +1223,倡 +1224,倣 +1225,値 +1226,倦 +1227,倧 +1228,倩 +1229,倪 +1230,倫 +1231,倭 +1232,倶 +1233,倹 +1234,偃 +1235,假 +1236,偈 +1237,偉 +1238,偏 +1239,偕 +1240,做 +1241,停 +1242,健 +1243,偲 +1244,側 +1245,偵 +1246,偶 +1247,偽 +1248,傀 +1249,傅 +1250,傍 +1251,傑 +1252,傕 +1253,傘 +1254,備 +1255,催 +1256,傭 +1257,傲 +1258,傳 +1259,債 +1260,傷 +1261,傾 +1262,僅 +1263,僉 +1264,僊 +1265,働 +1266,像 +1267,僑 +1268,僕 +1269,僖 +1270,僚 +1271,僥 +1272,僧 +1273,僭 +1274,僻 +1275,儀 +1276,儁 +1277,儂 +1278,億 +1279,儋 +1280,儒 +1281,儔 +1282,儘 +1283,儚 +1284,償 +1285,儡 +1286,優 +1287,儲 +1288,儺 +1289,儼 +1290,兀 +1291,允 +1292,元 +1293,兄 +1294,充 +1295,兆 +1296,兇 +1297,先 +1298,光 +1299,克 +1300,兌 +1301,免 +1302,兎 +1303,児 +1304,兒 +1305,兗 +1306,党 +1307,兜 +1308,入 +1309,內 +1310,全 +1311,兩 +1312,兪 +1313,八 +1314,公 +1315,六 +1316,兮 +1317,共 +1318,兴 +1319,兵 +1320,其 +1321,具 +1322,典 +1323,兼 +1324,冀 +1325,内 +1326,円 +1327,冉 +1328,冊 +1329,册 +1330,再 +1331,冏 +1332,冑 +1333,冒 +1334,冕 +1335,冗 +1336,写 +1337,军 +1338,冠 +1339,冤 +1340,冥 +1341,冨 +1342,冪 +1343,冬 +1344,冰 +1345,冲 +1346,决 +1347,冴 +1348,冶 +1349,冷 +1350,冼 +1351,凄 +1352,准 +1353,凉 +1354,凋 +1355,凌 +1356,凍 +1357,凛 +1358,凜 +1359,凝 +1360,几 +1361,凡 +1362,処 +1363,凧 +1364,凪 +1365,凰 +1366,凱 +1367,凶 +1368,凸 +1369,凹 +1370,出 +1371,函 +1372,刀 +1373,刁 +1374,刃 +1375,刄 +1376,分 +1377,切 +1378,刈 +1379,刊 +1380,刎 +1381,刑 +1382,列 +1383,初 +1384,判 +1385,別 +1386,利 +1387,刪 +1388,到 +1389,刳 +1390,制 +1391,刷 +1392,券 +1393,刹 +1394,刺 +1395,刻 +1396,剃 +1397,剄 +1398,則 +1399,削 +1400,剋 +1401,剌 +1402,前 +1403,剖 +1404,剛 +1405,剝 +1406,剣 +1407,剤 +1408,剥 +1409,剪 +1410,副 +1411,剰 +1412,剱 +1413,割 +1414,創 +1415,剽 +1416,劃 +1417,劇 +1418,劈 +1419,劉 +1420,劍 +1421,劔 +1422,力 +1423,功 +1424,加 +1425,劣 +1426,动 +1427,助 +1428,努 +1429,劫 +1430,劭 +1431,励 +1432,労 +1433,効 +1434,劾 +1435,勁 +1436,勃 +1437,勅 +1438,勇 +1439,勉 +1440,勒 +1441,動 +1442,勗 +1443,勘 +1444,務 +1445,勛 +1446,勝 +1447,募 +1448,勢 +1449,勣 +1450,勤 +1451,勧 +1452,勲 +1453,勳 +1454,勺 +1455,勾 +1456,勿 +1457,匁 +1458,匂 +1459,包 +1460,匈 +1461,匍 +1462,匐 +1463,匕 +1464,化 +1465,北 +1466,匙 +1467,匝 +1468,匠 +1469,匡 +1470,匣 +1471,匪 +1472,匯 +1473,匱 +1474,匹 +1475,区 +1476,医 +1477,匿 +1478,區 +1479,十 +1480,千 +1481,升 +1482,午 +1483,卉 +1484,半 +1485,卍 +1486,华 +1487,卑 +1488,卒 +1489,卓 +1490,協 +1491,南 +1492,単 +1493,博 +1494,卜 +1495,卞 +1496,占 +1497,卡 +1498,卦 +1499,卬 +1500,卯 +1501,印 +1502,危 +1503,即 +1504,却 +1505,卵 +1506,卷 +1507,卸 +1508,卿 +1509,厄 +1510,厘 +1511,厙 +1512,厚 +1513,厝 +1514,原 +1515,厠 +1516,厥 +1517,厦 +1518,厨 +1519,厩 +1520,厭 +1521,厲 +1522,厳 +1523,去 +1524,参 +1525,參 +1526,又 +1527,叉 +1528,及 +1529,友 +1530,双 +1531,反 +1532,収 +1533,叔 +1534,取 +1535,受 +1536,叙 +1537,叛 +1538,叟 +1539,叡 +1540,叢 +1541,口 +1542,古 +1543,句 +1544,叩 +1545,只 +1546,叫 +1547,召 +1548,叭 +1549,可 +1550,台 +1551,叱 +1552,史 +1553,右 +1554,叶 +1555,号 +1556,司 +1557,吃 +1558,各 +1559,合 +1560,吉 +1561,吊 +1562,吋 +1563,同 +1564,名 +1565,后 +1566,吏 +1567,吐 +1568,向 +1569,吒 +1570,君 +1571,吝 +1572,吟 +1573,吠 +1574,否 +1575,含 +1576,吳 +1577,吶 +1578,吸 +1579,吹 +1580,吻 +1581,吼 +1582,吽 +1583,吾 +1584,呀 +1585,呂 +1586,呆 +1587,呈 +1588,呉 +1589,告 +1590,呑 +1591,呟 +1592,周 +1593,呪 +1594,呰 +1595,味 +1596,呵 +1597,呻 +1598,呼 +1599,命 +1600,咀 +1601,咄 +1602,咆 +1603,咋 +1604,和 +1605,咎 +1606,咒 +1607,咢 +1608,咤 +1609,咥 +1610,咨 +1611,咩 +1612,咫 +1613,咬 +1614,咲 +1615,咳 +1616,咸 +1617,咽 +1618,哀 +1619,品 +1620,哈 +1621,哉 +1622,員 +1623,哥 +1624,哨 +1625,哩 +1626,哭 +1627,哮 +1628,哲 +1629,哺 +1630,唄 +1631,唆 +1632,唇 +1633,唐 +1634,唖 +1635,唯 +1636,唱 +1637,唸 +1638,唹 +1639,唾 +1640,啄 +1641,商 +1642,問 +1643,啓 +1644,啖 +1645,啜 +1646,啤 +1647,啦 +1648,啼 +1649,喀 +1650,善 +1651,喆 +1652,喇 +1653,喉 +1654,喊 +1655,喋 +1656,喘 +1657,喙 +1658,喚 +1659,喜 +1660,喝 +1661,喧 +1662,喩 +1663,喪 +1664,喫 +1665,喬 +1666,喰 +1667,営 +1668,嗄 +1669,嗅 +1670,嗇 +1671,嗚 +1672,嗜 +1673,嗟 +1674,嗣 +1675,嗽 +1676,嘆 +1677,嘉 +1678,嘎 +1679,嘔 +1680,嘗 +1681,嘘 +1682,嘩 +1683,嘯 +1684,嘱 +1685,嘲 +1686,嘴 +1687,噂 +1688,噌 +1689,噛 +1690,器 +1691,噲 +1692,噴 +1693,噺 +1694,嚆 +1695,嚇 +1696,嚢 +1697,嚥 +1698,嚮 +1699,嚴 +1700,嚶 +1701,嚼 +1702,囁 +1703,囂 +1704,囃 +1705,囎 +1706,囚 +1707,四 +1708,回 +1709,因 +1710,团 +1711,団 +1712,园 +1713,囮 +1714,困 +1715,囲 +1716,図 +1717,围 +1718,固 +1719,国 +1720,囿 +1721,圀 +1722,圃 +1723,圉 +1724,國 +1725,圏 +1726,園 +1727,圓 +1728,圖 +1729,團 +1730,土 +1731,圧 +1732,在 +1733,圩 +1734,圭 +1735,地 +1736,圳 +1737,圻 +1738,址 +1739,坂 +1740,均 +1741,坊 +1742,坎 +1743,坏 +1744,坐 +1745,坑 +1746,坡 +1747,坤 +1748,坦 +1749,坪 +1750,坳 +1751,垂 +1752,型 +1753,垓 +1754,垠 +1755,垢 +1756,垣 +1757,埃 +1758,埈 +1759,埋 +1760,城 +1761,埒 +1762,埔 +1763,埕 +1764,埜 +1765,域 +1766,埠 +1767,埣 +1768,埴 +1769,埵 +1770,執 +1771,培 +1772,基 +1773,埼 +1774,堀 +1775,堂 +1776,堅 +1777,堆 +1778,堕 +1779,堡 +1780,堤 +1781,堪 +1782,堯 +1783,堰 +1784,報 +1785,場 +1786,堵 +1787,堺 +1788,塀 +1789,塁 +1790,塊 +1791,塑 +1792,塔 +1793,塗 +1794,塘 +1795,塙 +1796,塚 +1797,塞 +1798,塢 +1799,塩 +1800,填 +1801,塵 +1802,塹 +1803,塾 +1804,境 +1805,墅 +1806,墉 +1807,墓 +1808,増 +1809,墜 +1810,墟 +1811,墨 +1812,墩 +1813,墳 +1814,墺 +1815,墻 +1816,墾 +1817,壁 +1818,壇 +1819,壊 +1820,壌 +1821,壕 +1822,壙 +1823,壜 +1824,壢 +1825,壩 +1826,士 +1827,壬 +1828,壮 +1829,声 +1830,壱 +1831,売 +1832,壷 +1833,壹 +1834,壺 +1835,壽 +1836,変 +1837,夏 +1838,夔 +1839,夕 +1840,外 +1841,夙 +1842,多 +1843,夛 +1844,夜 +1845,夢 +1846,大 +1847,天 +1848,太 +1849,夫 +1850,夭 +1851,央 +1852,失 +1853,夷 +1854,夾 +1855,奄 +1856,奇 +1857,奈 +1858,奉 +1859,奎 +1860,奏 +1861,奐 +1862,契 +1863,奔 +1864,奕 +1865,套 +1866,奘 +1867,奚 +1868,奠 +1869,奢 +1870,奥 +1871,奧 +1872,奨 +1873,奪 +1874,奭 +1875,奮 +1876,女 +1877,奴 +1878,奸 +1879,好 +1880,如 +1881,妃 +1882,妄 +1883,妊 +1884,妍 +1885,妓 +1886,妖 +1887,妙 +1888,妤 +1889,妥 +1890,妨 +1891,妬 +1892,妲 +1893,妹 +1894,妻 +1895,妾 +1896,姆 +1897,姉 +1898,始 +1899,姐 +1900,姑 +1901,姓 +1902,委 +1903,姚 +1904,姜 +1905,姥 +1906,姦 +1907,姨 +1908,姪 +1909,姫 +1910,姶 +1911,姻 +1912,姿 +1913,威 +1914,娃 +1915,娑 +1916,娘 +1917,娜 +1918,娟 +1919,娠 +1920,娥 +1921,娩 +1922,娯 +1923,娶 +1924,娼 +1925,婁 +1926,婆 +1927,婉 +1928,婕 +1929,婚 +1930,婢 +1931,婦 +1932,婷 +1933,婿 +1934,媒 +1935,媚 +1936,媛 +1937,媯 +1938,媽 +1939,嫁 +1940,嫉 +1941,嫌 +1942,嫡 +1943,嫦 +1944,嫩 +1945,嬉 +1946,嬌 +1947,嬛 +1948,嬢 +1949,嬪 +1950,嬬 +1951,嬰 +1952,嬲 +1953,嬴 +1954,嬶 +1955,孁 +1956,子 +1957,孔 +1958,孕 +1959,字 +1960,存 +1961,孚 +1962,孜 +1963,孝 +1964,孟 +1965,季 +1966,孤 +1967,学 +1968,孩 +1969,孫 +1970,孵 +1971,學 +1972,孺 +1973,宅 +1974,宇 +1975,守 +1976,安 +1977,宋 +1978,完 +1979,宍 +1980,宏 +1981,宕 +1982,宗 +1983,官 +1984,宙 +1985,定 +1986,宛 +1987,宜 +1988,宝 +1989,実 +1990,客 +1991,宣 +1992,室 +1993,宥 +1994,宦 +1995,宮 +1996,宰 +1997,害 +1998,宴 +1999,宵 +2000,家 +2001,宸 +2002,容 +2003,宿 +2004,寂 +2005,寄 +2006,寅 +2007,密 +2008,寇 +2009,富 +2010,寒 +2011,寓 +2012,寔 +2013,寛 +2014,寝 +2015,察 +2016,寡 +2017,實 +2018,寧 +2019,寨 +2020,審 +2021,寫 +2022,寬 +2023,寮 +2024,寳 +2025,寵 +2026,寶 +2027,寸 +2028,对 +2029,寺 +2030,対 +2031,寿 +2032,封 +2033,専 +2034,射 +2035,将 +2036,將 +2037,專 +2038,尉 +2039,尊 +2040,尋 +2041,對 +2042,導 +2043,小 +2044,少 +2045,尓 +2046,尖 +2047,尚 +2048,尤 +2049,尭 +2050,就 +2051,尸 +2052,尹 +2053,尺 +2054,尻 +2055,尼 +2056,尽 +2057,尾 +2058,尿 +2059,局 +2060,屁 +2061,居 +2062,屈 +2063,届 +2064,屋 +2065,屍 +2066,屎 +2067,屏 +2068,屑 +2069,屓 +2070,展 +2071,属 +2072,屠 +2073,層 +2074,履 +2075,屬 +2076,屯 +2077,山 +2078,岌 +2079,岐 +2080,岑 +2081,岔 +2082,岡 +2083,岩 +2084,岫 +2085,岬 +2086,岱 +2087,岳 +2088,岷 +2089,岸 +2090,峅 +2091,峙 +2092,峠 +2093,峡 +2094,峨 +2095,峪 +2096,峯 +2097,峰 +2098,峴 +2099,島 +2100,峻 +2101,崇 +2102,崋 +2103,崎 +2104,崑 +2105,崔 +2106,崖 +2107,崗 +2108,崙 +2109,崧 +2110,崩 +2111,嵆 +2112,嵊 +2113,嵌 +2114,嵐 +2115,嵜 +2116,嵩 +2117,嵬 +2118,嵯 +2119,嶋 +2120,嶌 +2121,嶝 +2122,嶠 +2123,嶷 +2124,嶺 +2125,嶼 +2126,嶽 +2127,巌 +2128,巍 +2129,巒 +2130,巖 +2131,川 +2132,州 +2133,巡 +2134,巣 +2135,工 +2136,左 +2137,巧 +2138,巨 +2139,巫 +2140,差 +2141,己 +2142,已 +2143,巳 +2144,巴 +2145,巷 +2146,巻 +2147,巽 +2148,巾 +2149,市 +2150,布 +2151,帆 +2152,希 +2153,帖 +2154,帙 +2155,帚 +2156,帛 +2157,帝 +2158,帥 +2159,師 +2160,席 +2161,帯 +2162,帰 +2163,帳 +2164,帶 +2165,帷 +2166,常 +2167,帽 +2168,幀 +2169,幄 +2170,幅 +2171,幇 +2172,幌 +2173,幔 +2174,幕 +2175,幟 +2176,幡 +2177,幢 +2178,幣 +2179,干 +2180,平 +2181,年 +2182,并 +2183,幸 +2184,幹 +2185,幺 +2186,幻 +2187,幼 +2188,幽 +2189,幾 +2190,广 +2191,庁 +2192,広 +2193,庄 +2194,庇 +2195,床 +2196,序 +2197,底 +2198,店 +2199,庚 +2200,府 +2201,庠 +2202,度 +2203,座 +2204,庫 +2205,庭 +2206,庵 +2207,庶 +2208,康 +2209,庸 +2210,庾 +2211,廂 +2212,廃 +2213,廆 +2214,廈 +2215,廉 +2216,廊 +2217,廓 +2218,廖 +2219,廙 +2220,廞 +2221,廟 +2222,廠 +2223,廣 +2224,廩 +2225,廬 +2226,廳 +2227,延 +2228,廷 +2229,建 +2230,廻 +2231,廼 +2232,廿 +2233,弁 +2234,弄 +2235,弈 +2236,弉 +2237,弊 +2238,弋 +2239,弌 +2240,式 +2241,弐 +2242,弑 +2243,弓 +2244,弔 +2245,引 +2246,弖 +2247,弗 +2248,弘 +2249,弛 +2250,弟 +2251,张 +2252,弥 +2253,弦 +2254,弧 +2255,弩 +2256,弯 +2257,弱 +2258,張 +2259,強 +2260,弼 +2261,弾 +2262,彅 +2263,彊 +2264,彌 +2265,彎 +2266,当 +2267,彗 +2268,彙 +2269,彝 +2270,彡 +2271,形 +2272,彤 +2273,彦 +2274,彧 +2275,彩 +2276,彪 +2277,彫 +2278,彬 +2279,彭 +2280,彰 +2281,影 +2282,彷 +2283,役 +2284,彼 +2285,彿 +2286,往 +2287,征 +2288,徂 +2289,径 +2290,待 +2291,徊 +2292,律 +2293,後 +2294,徐 +2295,徒 +2296,従 +2297,得 +2298,徘 +2299,徙 +2300,從 +2301,徠 +2302,御 +2303,徧 +2304,徨 +2305,復 +2306,循 +2307,徭 +2308,微 +2309,徳 +2310,徴 +2311,德 +2312,徹 +2313,徽 +2314,心 +2315,必 +2316,忌 +2317,忍 +2318,志 +2319,忘 +2320,忙 +2321,応 +2322,忠 +2323,快 +2324,忱 +2325,念 +2326,忻 +2327,忽 +2328,怒 +2329,怖 +2330,怜 +2331,思 +2332,怠 +2333,怡 +2334,急 +2335,性 +2336,怨 +2337,怪 +2338,怯 +2339,恂 +2340,恆 +2341,恋 +2342,恍 +2343,恐 +2344,恒 +2345,恕 +2346,恢 +2347,恣 +2348,恤 +2349,恥 +2350,恨 +2351,恩 +2352,恪 +2353,恫 +2354,恬 +2355,恭 +2356,息 +2357,恰 +2358,恵 +2359,悉 +2360,悌 +2361,悍 +2362,悔 +2363,悛 +2364,悝 +2365,悟 +2366,悠 +2367,患 +2368,悦 +2369,悧 +2370,悩 +2371,悪 +2372,悲 +2373,悳 +2374,悴 +2375,悶 +2376,悸 +2377,悼 +2378,情 +2379,惇 +2380,惑 +2381,惚 +2382,惜 +2383,惟 +2384,惠 +2385,惡 +2386,惣 +2387,惧 +2388,惨 +2389,惰 +2390,想 +2391,惹 +2392,惺 +2393,愁 +2394,愈 +2395,愉 +2396,愍 +2397,意 +2398,愔 +2399,愕 +2400,愚 +2401,愛 +2402,感 +2403,愴 +2404,愷 +2405,愼 +2406,愾 +2407,愿 +2408,慄 +2409,慇 +2410,慈 +2411,慊 +2412,態 +2413,慌 +2414,慎 +2415,慕 +2416,慟 +2417,慢 +2418,慣 +2419,慧 +2420,慨 +2421,慮 +2422,慰 +2423,慶 +2424,慾 +2425,憂 +2426,憊 +2427,憎 +2428,憐 +2429,憑 +2430,憔 +2431,憚 +2432,憤 +2433,憧 +2434,憩 +2435,憫 +2436,憬 +2437,憮 +2438,憲 +2439,憶 +2440,憺 +2441,憾 +2442,懃 +2443,懇 +2444,應 +2445,懊 +2446,懋 +2447,懐 +2448,懣 +2449,懲 +2450,懸 +2451,懺 +2452,懼 +2453,懿 +2454,戀 +2455,戈 +2456,戊 +2457,戌 +2458,戍 +2459,戎 +2460,成 +2461,我 +2462,戒 +2463,戔 +2464,或 +2465,战 +2466,戚 +2467,戛 +2468,戟 +2469,戡 +2470,戦 +2471,戩 +2472,截 +2473,戮 +2474,戯 +2475,戰 +2476,戴 +2477,戸 +2478,戻 +2479,房 +2480,所 +2481,扁 +2482,扇 +2483,扈 +2484,扉 +2485,手 +2486,才 +2487,扎 +2488,打 +2489,払 +2490,托 +2491,扮 +2492,扱 +2493,扶 +2494,批 +2495,扼 +2496,承 +2497,技 +2498,抄 +2499,抉 +2500,把 +2501,抑 +2502,抒 +2503,投 +2504,抗 +2505,折 +2506,抜 +2507,択 +2508,披 +2509,抱 +2510,抵 +2511,抹 +2512,押 +2513,抽 +2514,担 +2515,拉 +2516,拌 +2517,拍 +2518,拐 +2519,拒 +2520,拓 +2521,拔 +2522,拗 +2523,拘 +2524,拙 +2525,招 +2526,拝 +2527,拠 +2528,拡 +2529,括 +2530,拭 +2531,拮 +2532,拯 +2533,拱 +2534,拳 +2535,拵 +2536,拶 +2537,拷 +2538,拼 +2539,拾 +2540,拿 +2541,持 +2542,挂 +2543,指 +2544,按 +2545,挑 +2546,挖 +2547,挙 +2548,挟 +2549,挨 +2550,挫 +2551,振 +2552,挹 +2553,挺 +2554,挽 +2555,挾 +2556,挿 +2557,捉 +2558,捌 +2559,捏 +2560,捐 +2561,捕 +2562,捗 +2563,捜 +2564,捧 +2565,捨 +2566,捩 +2567,据 +2568,捲 +2569,捷 +2570,捺 +2571,捻 +2572,掃 +2573,授 +2574,掌 +2575,掏 +2576,排 +2577,掖 +2578,掘 +2579,掛 +2580,掟 +2581,掠 +2582,採 +2583,探 +2584,掣 +2585,接 +2586,控 +2587,推 +2588,掩 +2589,措 +2590,掬 +2591,掲 +2592,掴 +2593,掻 +2594,掾 +2595,揃 +2596,揄 +2597,揆 +2598,揉 +2599,描 +2600,提 +2601,揖 +2602,揚 +2603,換 +2604,握 +2605,揮 +2606,援 +2607,揶 +2608,揺 +2609,損 +2610,搗 +2611,搦 +2612,搬 +2613,搭 +2614,携 +2615,搾 +2616,摂 +2617,摔 +2618,摘 +2619,摠 +2620,摩 +2621,摯 +2622,摸 +2623,摺 +2624,撃 +2625,撈 +2626,撒 +2627,撓 +2628,撚 +2629,撞 +2630,撤 +2631,撥 +2632,撫 +2633,播 +2634,撮 +2635,撰 +2636,撲 +2637,撹 +2638,撻 +2639,撼 +2640,擁 +2641,擂 +2642,擅 +2643,操 +2644,擢 +2645,擦 +2646,擬 +2647,擱 +2648,擲 +2649,擾 +2650,攀 +2651,攘 +2652,攝 +2653,攣 +2654,攪 +2655,攫 +2656,攬 +2657,支 +2658,收 +2659,攷 +2660,攸 +2661,改 +2662,攻 +2663,放 +2664,政 +2665,故 +2666,效 +2667,敎 +2668,敏 +2669,救 +2670,敖 +2671,敗 +2672,教 +2673,敞 +2674,敢 +2675,散 +2676,敦 +2677,敬 +2678,数 +2679,敲 +2680,整 +2681,敵 +2682,敷 +2683,數 +2684,斂 +2685,斃 +2686,文 +2687,斉 +2688,斌 +2689,斎 +2690,斐 +2691,斑 +2692,斗 +2693,料 +2694,斛 +2695,斜 +2696,斟 +2697,斡 +2698,斤 +2699,斥 +2700,斧 +2701,斫 +2702,斬 +2703,断 +2704,斯 +2705,新 +2706,方 +2707,於 +2708,施 +2709,旁 +2710,旅 +2711,旋 +2712,旌 +2713,族 +2714,旒 +2715,旗 +2716,旛 +2717,无 +2718,既 +2719,日 +2720,旦 +2721,旧 +2722,旨 +2723,早 +2724,旬 +2725,旭 +2726,旱 +2727,旺 +2728,旻 +2729,昀 +2730,昂 +2731,昆 +2732,昇 +2733,昉 +2734,昊 +2735,昌 +2736,明 +2737,昏 +2738,易 +2739,昔 +2740,昕 +2741,星 +2742,映 +2743,春 +2744,昧 +2745,昨 +2746,昭 +2747,是 +2748,昱 +2749,昴 +2750,昵 +2751,昶 +2752,昼 +2753,晁 +2754,時 +2755,晃 +2756,晄 +2757,晋 +2758,晏 +2759,晒 +2760,晟 +2761,晤 +2762,晦 +2763,晧 +2764,晨 +2765,晩 +2766,普 +2767,景 +2768,晰 +2769,晴 +2770,晶 +2771,智 +2772,暁 +2773,暇 +2774,暈 +2775,暉 +2776,暎 +2777,暐 +2778,暑 +2779,暖 +2780,暗 +2781,暘 +2782,暢 +2783,暦 +2784,暫 +2785,暮 +2786,暲 +2787,暴 +2788,暹 +2789,曁 +2790,曄 +2791,曇 +2792,曉 +2793,曖 +2794,曙 +2795,曜 +2796,曝 +2797,曠 +2798,曰 +2799,曲 +2800,曳 +2801,更 +2802,曷 +2803,書 +2804,曹 +2805,曺 +2806,曼 +2807,曽 +2808,曾 +2809,替 +2810,最 +2811,會 +2812,月 +2813,有 +2814,朋 +2815,服 +2816,朐 +2817,朔 +2818,朕 +2819,朗 +2820,望 +2821,朝 +2822,期 +2823,朦 +2824,朧 +2825,木 +2826,未 +2827,末 +2828,本 +2829,札 +2830,朮 +2831,朱 +2832,朴 +2833,朶 +2834,机 +2835,朽 +2836,杁 +2837,杉 +2838,李 +2839,杏 +2840,材 +2841,村 +2842,杓 +2843,杖 +2844,杙 +2845,杜 +2846,杞 +2847,束 +2848,条 +2849,杢 +2850,杣 +2851,来 +2852,杭 +2853,杮 +2854,杯 +2855,杰 +2856,東 +2857,杲 +2858,杳 +2859,杵 +2860,杷 +2861,杼 +2862,松 +2863,板 +2864,枇 +2865,枉 +2866,枋 +2867,析 +2868,枓 +2869,枕 +2870,林 +2871,枚 +2872,果 +2873,枝 +2874,枠 +2875,枡 +2876,枢 +2877,枯 +2878,枳 +2879,架 +2880,枷 +2881,枹 +2882,柄 +2883,柊 +2884,柏 +2885,某 +2886,柑 +2887,染 +2888,柔 +2889,柘 +2890,柚 +2891,柞 +2892,柩 +2893,柯 +2894,柱 +2895,柳 +2896,柴 +2897,柵 +2898,査 +2899,柾 +2900,柿 +2901,栂 +2902,栃 +2903,栄 +2904,栓 +2905,栖 +2906,栗 +2907,栞 +2908,校 +2909,栢 +2910,栩 +2911,株 +2912,栴 +2913,核 +2914,根 +2915,格 +2916,栽 +2917,桀 +2918,桁 +2919,桂 +2920,桃 +2921,框 +2922,案 +2923,桐 +2924,桑 +2925,桓 +2926,桔 +2927,桙 +2928,桜 +2929,桝 +2930,桟 +2931,桧 +2932,桴 +2933,桶 +2934,桿 +2935,梁 +2936,梃 +2937,梅 +2938,梓 +2939,梔 +2940,梗 +2941,條 +2942,梟 +2943,梠 +2944,梢 +2945,梧 +2946,梨 +2947,梭 +2948,梯 +2949,械 +2950,梱 +2951,梳 +2952,梵 +2953,梶 +2954,棄 +2955,棋 +2956,棍 +2957,棒 +2958,棕 +2959,棗 +2960,棘 +2961,棚 +2962,棟 +2963,棠 +2964,棣 +2965,森 +2966,棲 +2967,棹 +2968,棺 +2969,椀 +2970,椅 +2971,椋 +2972,植 +2973,椎 +2974,椏 +2975,椒 +2976,椙 +2977,椚 +2978,椛 +2979,検 +2980,椰 +2981,椴 +2982,椿 +2983,楊 +2984,楓 +2985,楔 +2986,楕 +2987,楚 +2988,楠 +2989,楡 +2990,楢 +2991,楨 +2992,楫 +2993,業 +2994,楮 +2995,楯 +2996,楳 +2997,極 +2998,楷 +2999,楸 +3000,楼 +3001,楽 +3002,概 +3003,榊 +3004,榎 +3005,榑 +3006,榔 +3007,榕 +3008,榛 +3009,榜 +3010,榧 +3011,榮 +3012,榴 +3013,槃 +3014,槇 +3015,槊 +3016,構 +3017,槌 +3018,槍 +3019,槎 +3020,槐 +3021,槓 +3022,様 +3023,槙 +3024,槨 +3025,槻 +3026,槽 +3027,槿 +3028,樂 +3029,樅 +3030,樊 +3031,樋 +3032,樒 +3033,樓 +3034,樗 +3035,標 +3036,樟 +3037,模 +3038,樣 +3039,権 +3040,横 +3041,樫 +3042,樵 +3043,樹 +3044,樺 +3045,樽 +3046,橇 +3047,橈 +3048,橋 +3049,橘 +3050,橙 +3051,機 +3052,橡 +3053,橿 +3054,檀 +3055,檄 +3056,檎 +3057,檗 +3058,檜 +3059,檢 +3060,檣 +3061,檬 +3062,檮 +3063,檳 +3064,檸 +3065,檻 +3066,櫂 +3067,櫃 +3068,櫓 +3069,櫚 +3070,櫛 +3071,櫟 +3072,櫨 +3073,櫻 +3074,欄 +3075,欅 +3076,權 +3077,欒 +3078,欠 +3079,次 +3080,欣 +3081,欧 +3082,欲 +3083,欺 +3084,欽 +3085,款 +3086,歆 +3087,歌 +3088,歎 +3089,歓 +3090,歙 +3091,止 +3092,正 +3093,此 +3094,步 +3095,武 +3096,歩 +3097,歪 +3098,歯 +3099,歳 +3100,歴 +3101,歸 +3102,死 +3103,歿 +3104,殆 +3105,殉 +3106,殊 +3107,残 +3108,殖 +3109,殘 +3110,殤 +3111,殲 +3112,殴 +3113,段 +3114,殷 +3115,殺 +3116,殻 +3117,殼 +3118,殿 +3119,毀 +3120,毅 +3121,毋 +3122,母 +3123,毎 +3124,毐 +3125,毒 +3126,毓 +3127,比 +3128,毖 +3129,毗 +3130,毘 +3131,毛 +3132,毫 +3133,毬 +3134,毯 +3135,氈 +3136,氏 +3137,氐 +3138,民 +3139,気 +3140,氣 +3141,水 +3142,氷 +3143,永 +3144,氾 +3145,汀 +3146,汁 +3147,求 +3148,汎 +3149,汐 +3150,汕 +3151,汗 +3152,汚 +3153,汜 +3154,汝 +3155,江 +3156,池 +3157,汪 +3158,汰 +3159,汲 +3160,汴 +3161,汶 +3162,決 +3163,汽 +3164,汾 +3165,沁 +3166,沂 +3167,沃 +3168,沅 +3169,沈 +3170,沌 +3171,沐 +3172,沓 +3173,沔 +3174,沖 +3175,沙 +3176,沛 +3177,没 +3178,沢 +3179,沪 +3180,沫 +3181,沮 +3182,沱 +3183,河 +3184,沸 +3185,油 +3186,治 +3187,沼 +3188,沽 +3189,沾 +3190,沿 +3191,況 +3192,泄 +3193,泉 +3194,泊 +3195,泌 +3196,泓 +3197,法 +3198,泗 +3199,泛 +3200,泠 +3201,泡 +3202,波 +3203,泣 +3204,泥 +3205,注 +3206,泪 +3207,泮 +3208,泰 +3209,泳 +3210,洄 +3211,洋 +3212,洌 +3213,洒 +3214,洗 +3215,洙 +3216,洛 +3217,洞 +3218,津 +3219,洩 +3220,洪 +3221,洮 +3222,洱 +3223,洲 +3224,洵 +3225,洸 +3226,活 +3227,洽 +3228,派 +3229,流 +3230,浄 +3231,浅 +3232,浙 +3233,浚 +3234,浜 +3235,浣 +3236,浦 +3237,浩 +3238,浪 +3239,浬 +3240,浮 +3241,浴 +3242,海 +3243,浸 +3244,涂 +3245,涅 +3246,涇 +3247,消 +3248,涌 +3249,涎 +3250,涙 +3251,涛 +3252,涜 +3253,涪 +3254,涯 +3255,液 +3256,涵 +3257,涸 +3258,涼 +3259,涿 +3260,淀 +3261,淄 +3262,淅 +3263,淆 +3264,淇 +3265,淋 +3266,淑 +3267,淘 +3268,淝 +3269,淞 +3270,淡 +3271,淤 +3272,淦 +3273,淨 +3274,淪 +3275,淫 +3276,淮 +3277,深 +3278,淳 +3279,淵 +3280,混 +3281,淸 +3282,淹 +3283,淺 +3284,添 +3285,清 +3286,渇 +3287,済 +3288,渉 +3289,渋 +3290,渓 +3291,渕 +3292,渙 +3293,渚 +3294,減 +3295,渝 +3296,渟 +3297,渠 +3298,渡 +3299,渣 +3300,渤 +3301,渥 +3302,渦 +3303,温 +3304,渫 +3305,測 +3306,渭 +3307,港 +3308,游 +3309,渺 +3310,渾 +3311,湊 +3312,湍 +3313,湖 +3314,湘 +3315,湛 +3316,湟 +3317,湣 +3318,湧 +3319,湫 +3320,湯 +3321,湾 +3322,湿 +3323,満 +3324,源 +3325,準 +3326,溜 +3327,溝 +3328,溟 +3329,溢 +3330,溥 +3331,溪 +3332,溯 +3333,溶 +3334,溺 +3335,滄 +3336,滅 +3337,滇 +3338,滉 +3339,滋 +3340,滎 +3341,滑 +3342,滓 +3343,滔 +3344,滕 +3345,滝 +3346,滞 +3347,滬 +3348,滲 +3349,滴 +3350,滷 +3351,滸 +3352,滾 +3353,滿 +3354,漁 +3355,漂 +3356,漆 +3357,漉 +3358,漏 +3359,漑 +3360,演 +3361,漕 +3362,漠 +3363,漢 +3364,漣 +3365,漫 +3366,漬 +3367,漱 +3368,漲 +3369,漳 +3370,漸 +3371,漿 +3372,潁 +3373,潅 +3374,潔 +3375,潘 +3376,潜 +3377,潟 +3378,潤 +3379,潭 +3380,潮 +3381,潰 +3382,潴 +3383,潼 +3384,澁 +3385,澂 +3386,澄 +3387,澈 +3388,澎 +3389,澗 +3390,澤 +3391,澧 +3392,澪 +3393,澱 +3394,澳 +3395,澹 +3396,激 +3397,濁 +3398,濂 +3399,濃 +3400,濊 +3401,濘 +3402,濟 +3403,濠 +3404,濡 +3405,濤 +3406,濫 +3407,濬 +3408,濮 +3409,濯 +3410,濰 +3411,濱 +3412,濵 +3413,濾 +3414,瀉 +3415,瀋 +3416,瀏 +3417,瀑 +3418,瀕 +3419,瀘 +3420,瀚 +3421,瀛 +3422,瀝 +3423,瀞 +3424,瀟 +3425,瀧 +3426,瀬 +3427,瀾 +3428,灌 +3429,灘 +3430,灞 +3431,灣 +3432,灤 +3433,火 +3434,灯 +3435,灰 +3436,灸 +3437,灼 +3438,災 +3439,炅 +3440,炉 +3441,炊 +3442,炎 +3443,炒 +3444,炙 +3445,炫 +3446,炬 +3447,炭 +3448,炮 +3449,炯 +3450,炳 +3451,炸 +3452,点 +3453,為 +3454,烈 +3455,烏 +3456,烙 +3457,烟 +3458,烹 +3459,烽 +3460,焉 +3461,焔 +3462,焙 +3463,焚 +3464,無 +3465,焦 +3466,焰 +3467,然 +3468,焼 +3469,煉 +3470,煌 +3471,煎 +3472,煒 +3473,煕 +3474,煖 +3475,煙 +3476,煜 +3477,煤 +3478,煥 +3479,照 +3480,煨 +3481,煩 +3482,煬 +3483,煮 +3484,煽 +3485,熈 +3486,熊 +3487,熏 +3488,熔 +3489,熕 +3490,熙 +3491,熟 +3492,熨 +3493,熱 +3494,熹 +3495,熾 +3496,燁 +3497,燃 +3498,燈 +3499,燎 +3500,燐 +3501,燕 +3502,燗 +3503,營 +3504,燥 +3505,燦 +3506,燧 +3507,燭 +3508,燮 +3509,燵 +3510,燻 +3511,燼 +3512,燾 +3513,燿 +3514,爆 +3515,爛 +3516,爨 +3517,爪 +3518,爬 +3519,爲 +3520,爵 +3521,父 +3522,爺 +3523,爻 +3524,爽 +3525,爾 +3526,牆 +3527,片 +3528,版 +3529,牌 +3530,牒 +3531,牘 +3532,牙 +3533,牛 +3534,牝 +3535,牟 +3536,牡 +3537,牢 +3538,牧 +3539,物 +3540,牲 +3541,特 +3542,牽 +3543,犀 +3544,犁 +3545,犂 +3546,犍 +3547,犠 +3548,犢 +3549,犬 +3550,犯 +3551,状 +3552,狂 +3553,狄 +3554,狆 +3555,狐 +3556,狗 +3557,狙 +3558,狛 +3559,狡 +3560,狩 +3561,独 +3562,狭 +3563,狸 +3564,狼 +3565,狽 +3566,猊 +3567,猗 +3568,猛 +3569,猜 +3570,猟 +3571,猥 +3572,猩 +3573,猪 +3574,猫 +3575,献 +3576,猴 +3577,猶 +3578,猷 +3579,猾 +3580,猿 +3581,獄 +3582,獅 +3583,獏 +3584,獠 +3585,獣 +3586,獨 +3587,獪 +3588,獰 +3589,獲 +3590,獺 +3591,玄 +3592,率 +3593,玉 +3594,王 +3595,玖 +3596,玩 +3597,玫 +3598,玲 +3599,玻 +3600,珀 +3601,珂 +3602,珈 +3603,珉 +3604,珊 +3605,珍 +3606,珠 +3607,珥 +3608,珪 +3609,班 +3610,珸 +3611,現 +3612,球 +3613,琅 +3614,理 +3615,琉 +3616,琛 +3617,琢 +3618,琥 +3619,琦 +3620,琨 +3621,琪 +3622,琬 +3623,琮 +3624,琰 +3625,琲 +3626,琳 +3627,琴 +3628,琵 +3629,琶 +3630,琺 +3631,琿 +3632,瑀 +3633,瑁 +3634,瑋 +3635,瑕 +3636,瑗 +3637,瑙 +3638,瑚 +3639,瑛 +3640,瑜 +3641,瑞 +3642,瑟 +3643,瑠 +3644,瑣 +3645,瑤 +3646,瑩 +3647,瑪 +3648,瑯 +3649,瑰 +3650,瑳 +3651,瑶 +3652,瑾 +3653,璃 +3654,璇 +3655,璋 +3656,璐 +3657,璜 +3658,璞 +3659,璟 +3660,璠 +3661,璧 +3662,璩 +3663,環 +3664,璽 +3665,璿 +3666,瓊 +3667,瓌 +3668,瓏 +3669,瓔 +3670,瓘 +3671,瓚 +3672,瓜 +3673,瓠 +3674,瓢 +3675,瓦 +3676,瓶 +3677,瓷 +3678,甄 +3679,甌 +3680,甑 +3681,甕 +3682,甘 +3683,甚 +3684,甜 +3685,生 +3686,產 +3687,産 +3688,甥 +3689,甦 +3690,用 +3691,甫 +3692,甯 +3693,田 +3694,由 +3695,甲 +3696,申 +3697,男 +3698,甸 +3699,町 +3700,画 +3701,界 +3702,畏 +3703,畑 +3704,畔 +3705,留 +3706,畚 +3707,畜 +3708,畝 +3709,畠 +3710,畢 +3711,畤 +3712,略 +3713,畦 +3714,番 +3715,畫 +3716,異 +3717,畳 +3718,當 +3719,畷 +3720,畸 +3721,畿 +3722,疆 +3723,疇 +3724,疋 +3725,疎 +3726,疏 +3727,疑 +3728,疝 +3729,疣 +3730,疫 +3731,疱 +3732,疲 +3733,疵 +3734,疸 +3735,疹 +3736,疼 +3737,疽 +3738,疾 +3739,病 +3740,症 +3741,痍 +3742,痒 +3743,痔 +3744,痕 +3745,痘 +3746,痙 +3747,痛 +3748,痢 +3749,痣 +3750,痩 +3751,痰 +3752,痴 +3753,痺 +3754,瘍 +3755,瘡 +3756,瘢 +3757,瘤 +3758,瘴 +3759,瘻 +3760,療 +3761,癇 +3762,癌 +3763,癒 +3764,癖 +3765,癩 +3766,癪 +3767,癬 +3768,癲 +3769,癸 +3770,発 +3771,登 +3772,發 +3773,白 +3774,百 +3775,的 +3776,皆 +3777,皇 +3778,皋 +3779,皎 +3780,皐 +3781,皓 +3782,皖 +3783,皝 +3784,皮 +3785,皴 +3786,皺 +3787,皿 +3788,盂 +3789,盃 +3790,盆 +3791,盈 +3792,益 +3793,盒 +3794,盗 +3795,盛 +3796,盞 +3797,盟 +3798,監 +3799,盤 +3800,盥 +3801,盧 +3802,盪 +3803,目 +3804,盲 +3805,直 +3806,相 +3807,盾 +3808,省 +3809,眈 +3810,眉 +3811,看 +3812,県 +3813,眞 +3814,真 +3815,眠 +3816,眩 +3817,眷 +3818,眸 +3819,眺 +3820,眼 +3821,着 +3822,睡 +3823,睢 +3824,督 +3825,睦 +3826,睨 +3827,睫 +3828,睺 +3829,睾 +3830,睿 +3831,瞑 +3832,瞞 +3833,瞥 +3834,瞬 +3835,瞭 +3836,瞰 +3837,瞳 +3838,瞻 +3839,瞼 +3840,瞽 +3841,瞿 +3842,矗 +3843,矛 +3844,矜 +3845,矢 +3846,矣 +3847,知 +3848,矧 +3849,矩 +3850,短 +3851,矮 +3852,矯 +3853,石 +3854,砂 +3855,砌 +3856,砒 +3857,研 +3858,砕 +3859,砥 +3860,砦 +3861,砧 +3862,砲 +3863,破 +3864,砺 +3865,砿 +3866,硝 +3867,硫 +3868,硬 +3869,硯 +3870,碁 +3871,碇 +3872,碌 +3873,碍 +3874,碑 +3875,碓 +3876,碕 +3877,碗 +3878,碣 +3879,碧 +3880,碩 +3881,碭 +3882,確 +3883,碼 +3884,碾 +3885,磁 +3886,磊 +3887,磋 +3888,磐 +3889,磔 +3890,磧 +3891,磨 +3892,磯 +3893,磾 +3894,礁 +3895,礎 +3896,礒 +3897,礙 +3898,礦 +3899,礪 +3900,礫 +3901,礬 +3902,示 +3903,礼 +3904,礽 +3905,社 +3906,祀 +3907,祁 +3908,祇 +3909,祈 +3910,祉 +3911,祐 +3912,祓 +3913,祖 +3914,祗 +3915,祚 +3916,祜 +3917,祝 +3918,神 +3919,祟 +3920,祠 +3921,祢 +3922,祥 +3923,票 +3924,祭 +3925,祷 +3926,祺 +3927,禁 +3928,禄 +3929,禅 +3930,禊 +3931,禍 +3932,禎 +3933,福 +3934,禕 +3935,禦 +3936,禧 +3937,禪 +3938,禮 +3939,禰 +3940,禹 +3941,禺 +3942,禽 +3943,禾 +3944,禿 +3945,秀 +3946,私 +3947,秉 +3948,秋 +3949,科 +3950,秒 +3951,秘 +3952,租 +3953,秣 +3954,秤 +3955,秦 +3956,秩 +3957,称 +3958,移 +3959,稀 +3960,稈 +3961,程 +3962,稍 +3963,税 +3964,稔 +3965,稗 +3966,稙 +3967,稚 +3968,稜 +3969,稟 +3970,稠 +3971,種 +3972,稲 +3973,稷 +3974,稻 +3975,稼 +3976,稽 +3977,稿 +3978,穀 +3979,穂 +3980,穆 +3981,積 +3982,穎 +3983,穏 +3984,穢 +3985,穣 +3986,穫 +3987,穴 +3988,究 +3989,穹 +3990,空 +3991,穿 +3992,突 +3993,窃 +3994,窄 +3995,窈 +3996,窒 +3997,窓 +3998,窘 +3999,窟 +4000,窠 +4001,窩 +4002,窪 +4003,窮 +4004,窯 +4005,窺 +4006,竃 +4007,竄 +4008,竇 +4009,竈 +4010,立 +4011,站 +4012,竜 +4013,竟 +4014,章 +4015,竣 +4016,童 +4017,竪 +4018,端 +4019,竴 +4020,競 +4021,竹 +4022,竺 +4023,竿 +4024,笄 +4025,笈 +4026,笊 +4027,笏 +4028,笑 +4029,笘 +4030,笙 +4031,笛 +4032,笞 +4033,笠 +4034,笥 +4035,符 +4036,第 +4037,笹 +4038,筆 +4039,筈 +4040,等 +4041,筋 +4042,筌 +4043,筍 +4044,筏 +4045,筐 +4046,筑 +4047,筒 +4048,答 +4049,策 +4050,筝 +4051,筠 +4052,筥 +4053,筧 +4054,筬 +4055,筮 +4056,筰 +4057,筵 +4058,筺 +4059,箆 +4060,箇 +4061,箋 +4062,箍 +4063,箏 +4064,箒 +4065,箔 +4066,箕 +4067,算 +4068,箙 +4069,箚 +4070,管 +4071,箪 +4072,箭 +4073,箱 +4074,箴 +4075,箸 +4076,節 +4077,篁 +4078,範 +4079,篆 +4080,篇 +4081,築 +4082,篋 +4083,篝 +4084,篠 +4085,篤 +4086,篥 +4087,篩 +4088,篪 +4089,篭 +4090,篳 +4091,簀 +4092,簑 +4093,簒 +4094,簗 +4095,簡 +4096,簪 +4097,簫 +4098,簸 +4099,簾 +4100,簿 +4101,籃 +4102,籌 +4103,籍 +4104,籐 +4105,籔 +4106,籟 +4107,籠 +4108,籤 +4109,籬 +4110,米 +4111,籾 +4112,粁 +4113,粂 +4114,粃 +4115,粉 +4116,粋 +4117,粍 +4118,粒 +4119,粕 +4120,粗 +4121,粘 +4122,粛 +4123,粟 +4124,粤 +4125,粥 +4126,粧 +4127,粲 +4128,粽 +4129,精 +4130,糀 +4131,糊 +4132,糎 +4133,糖 +4134,糜 +4135,糞 +4136,糟 +4137,糠 +4138,糧 +4139,糸 +4140,糺 +4141,系 +4142,糾 +4143,紀 +4144,紂 +4145,約 +4146,紅 +4147,紆 +4148,紇 +4149,紊 +4150,紋 +4151,納 +4152,紐 +4153,純 +4154,紗 +4155,紘 +4156,紙 +4157,級 +4158,紛 +4159,素 +4160,紡 +4161,索 +4162,紫 +4163,紬 +4164,紮 +4165,累 +4166,細 +4167,紳 +4168,紹 +4169,紺 +4170,終 +4171,絃 +4172,組 +4173,絅 +4174,絆 +4175,経 +4176,結 +4177,絞 +4178,絡 +4179,絢 +4180,絣 +4181,給 +4182,絨 +4183,統 +4184,絲 +4185,絳 +4186,絵 +4187,絶 +4188,絹 +4189,綏 +4190,經 +4191,継 +4192,続 +4193,綜 +4194,綝 +4195,綠 +4196,綫 +4197,綬 +4198,維 +4199,綰 +4200,綱 +4201,網 +4202,綴 +4203,綵 +4204,綸 +4205,綺 +4206,綻 +4207,綽 +4208,綾 +4209,綿 +4210,緊 +4211,緋 +4212,総 +4213,緑 +4214,緒 +4215,緘 +4216,線 +4217,緝 +4218,緞 +4219,締 +4220,編 +4221,緩 +4222,緬 +4223,緯 +4224,練 +4225,緻 +4226,縁 +4227,縄 +4228,縅 +4229,縉 +4230,縊 +4231,縋 +4232,縒 +4233,縛 +4234,縝 +4235,縞 +4236,縢 +4237,縣 +4238,縦 +4239,縫 +4240,縮 +4241,縯 +4242,縵 +4243,縷 +4244,縹 +4245,縺 +4246,總 +4247,績 +4248,繁 +4249,繆 +4250,繇 +4251,繊 +4252,繋 +4253,繍 +4254,織 +4255,繕 +4256,繚 +4257,繞 +4258,繪 +4259,繭 +4260,繰 +4261,繹 +4262,繼 +4263,纂 +4264,續 +4265,纏 +4266,纒 +4267,纓 +4268,红 +4269,级 +4270,线 +4271,缶 +4272,罐 +4273,网 +4274,罔 +4275,罕 +4276,罘 +4277,罠 +4278,罪 +4279,罫 +4280,置 +4281,罰 +4282,署 +4283,罵 +4284,罷 +4285,罹 +4286,羂 +4287,羅 +4288,羆 +4289,羊 +4290,羋 +4291,羌 +4292,美 +4293,羚 +4294,羞 +4295,羣 +4296,群 +4297,羨 +4298,義 +4299,羯 +4300,羲 +4301,羹 +4302,羽 +4303,翁 +4304,翅 +4305,翊 +4306,翌 +4307,翎 +4308,習 +4309,翔 +4310,翟 +4311,翠 +4312,翡 +4313,翦 +4314,翫 +4315,翰 +4316,翳 +4317,翹 +4318,翻 +4319,翼 +4320,耀 +4321,老 +4322,考 +4323,者 +4324,耆 +4325,而 +4326,耐 +4327,耕 +4328,耗 +4329,耘 +4330,耳 +4331,耶 +4332,耽 +4333,耿 +4334,聊 +4335,聖 +4336,聘 +4337,聚 +4338,聞 +4339,聟 +4340,聡 +4341,聯 +4342,聰 +4343,聲 +4344,聳 +4345,聴 +4346,聶 +4347,職 +4348,聾 +4349,肄 +4350,肆 +4351,肇 +4352,肉 +4353,肋 +4354,肌 +4355,肖 +4356,肘 +4357,肛 +4358,肜 +4359,肝 +4360,股 +4361,肢 +4362,肥 +4363,肩 +4364,肪 +4365,肯 +4366,肱 +4367,育 +4368,肴 +4369,肺 +4370,胃 +4371,胆 +4372,背 +4373,胎 +4374,胖 +4375,胚 +4376,胝 +4377,胞 +4378,胡 +4379,胤 +4380,胥 +4381,胱 +4382,胴 +4383,胸 +4384,能 +4385,脂 +4386,脅 +4387,脆 +4388,脇 +4389,脈 +4390,脊 +4391,脚 +4392,脛 +4393,脩 +4394,脱 +4395,脳 +4396,脹 +4397,脾 +4398,腋 +4399,腎 +4400,腐 +4401,腑 +4402,腓 +4403,腔 +4404,腕 +4405,腫 +4406,腰 +4407,腱 +4408,腸 +4409,腹 +4410,腺 +4411,腿 +4412,膀 +4413,膂 +4414,膊 +4415,膏 +4416,膚 +4417,膜 +4418,膝 +4419,膠 +4420,膣 +4421,膨 +4422,膳 +4423,膵 +4424,膺 +4425,膾 +4426,膿 +4427,臀 +4428,臂 +4429,臆 +4430,臈 +4431,臍 +4432,臓 +4433,臘 +4434,臙 +4435,臚 +4436,臣 +4437,臥 +4438,臧 +4439,臨 +4440,自 +4441,臭 +4442,至 +4443,致 +4444,臺 +4445,臻 +4446,臼 +4447,舁 +4448,舂 +4449,舅 +4450,與 +4451,興 +4452,舊 +4453,舌 +4454,舍 +4455,舎 +4456,舐 +4457,舒 +4458,舗 +4459,舘 +4460,舛 +4461,舜 +4462,舞 +4463,舟 +4464,舩 +4465,航 +4466,舫 +4467,般 +4468,舳 +4469,舵 +4470,舶 +4471,舷 +4472,船 +4473,艀 +4474,艇 +4475,艘 +4476,艙 +4477,艤 +4478,艦 +4479,艮 +4480,良 +4481,艱 +4482,色 +4483,艶 +4484,艸 +4485,艾 +4486,芋 +4487,芍 +4488,芎 +4489,芒 +4490,芙 +4491,芝 +4492,芥 +4493,芦 +4494,芬 +4495,芭 +4496,芮 +4497,芯 +4498,花 +4499,芳 +4500,芷 +4501,芸 +4502,芹 +4503,芻 +4504,芽 +4505,芾 +4506,苅 +4507,苑 +4508,苓 +4509,苔 +4510,苗 +4511,苛 +4512,苞 +4513,苟 +4514,若 +4515,苦 +4516,苧 +4517,苫 +4518,英 +4519,苴 +4520,苺 +4521,苻 +4522,茂 +4523,范 +4524,茄 +4525,茅 +4526,茉 +4527,茎 +4528,茗 +4529,茘 +4530,茜 +4531,茨 +4532,茫 +4533,茯 +4534,茱 +4535,茲 +4536,茶 +4537,茸 +4538,茹 +4539,荀 +4540,荃 +4541,草 +4542,荊 +4543,荏 +4544,荒 +4545,荘 +4546,荷 +4547,荻 +4548,荼 +4549,莆 +4550,莇 +4551,莉 +4552,莊 +4553,莎 +4554,莒 +4555,莘 +4556,莚 +4557,莞 +4558,莢 +4559,莫 +4560,莱 +4561,莽 +4562,菁 +4563,菅 +4564,菊 +4565,菌 +4566,菓 +4567,菖 +4568,菘 +4569,菜 +4570,菟 +4571,菩 +4572,菫 +4573,華 +4574,菰 +4575,菱 +4576,菲 +4577,菴 +4578,萄 +4579,萇 +4580,萊 +4581,萌 +4582,萍 +4583,萎 +4584,萠 +4585,萩 +4586,萬 +4587,萱 +4588,萸 +4589,萼 +4590,落 +4591,葆 +4592,葉 +4593,著 +4594,葛 +4595,葡 +4596,董 +4597,葦 +4598,葫 +4599,葬 +4600,葭 +4601,葯 +4602,葱 +4603,葵 +4604,葺 +4605,蒋 +4606,蒐 +4607,蒔 +4608,蒙 +4609,蒜 +4610,蒟 +4611,蒯 +4612,蒲 +4613,蒴 +4614,蒸 +4615,蒹 +4616,蒻 +4617,蒼 +4618,蒿 +4619,蓁 +4620,蓄 +4621,蓉 +4622,蓋 +4623,蓑 +4624,蓬 +4625,蓮 +4626,蓼 +4627,蔀 +4628,蔑 +4629,蔓 +4630,蔗 +4631,蔚 +4632,蔡 +4633,蔣 +4634,蔦 +4635,蔬 +4636,蔭 +4637,蔵 +4638,蔽 +4639,蕁 +4640,蕃 +4641,蕉 +4642,蕊 +4643,蕎 +4644,蕗 +4645,蕙 +4646,蕤 +4647,蕨 +4648,蕩 +4649,蕪 +4650,蕭 +4651,蕷 +4652,蕾 +4653,薀 +4654,薁 +4655,薄 +4656,薇 +4657,薈 +4658,薊 +4659,薔 +4660,薗 +4661,薙 +4662,薛 +4663,薦 +4664,薨 +4665,薩 +4666,薪 +4667,薫 +4668,薬 +4669,薭 +4670,薮 +4671,薯 +4672,藁 +4673,藉 +4674,藍 +4675,藏 +4676,藝 +4677,藤 +4678,藥 +4679,藩 +4680,藪 +4681,藷 +4682,藺 +4683,藻 +4684,蘂 +4685,蘄 +4686,蘆 +4687,蘇 +4688,蘊 +4689,蘋 +4690,蘚 +4691,蘭 +4692,蘿 +4693,虎 +4694,虐 +4695,虔 +4696,處 +4697,虚 +4698,虜 +4699,虞 +4700,號 +4701,虢 +4702,虫 +4703,虹 +4704,虻 +4705,蚊 +4706,蚕 +4707,蚤 +4708,蚩 +4709,蛆 +4710,蛇 +4711,蛉 +4712,蛋 +4713,蛍 +4714,蛎 +4715,蛙 +4716,蛛 +4717,蛟 +4718,蛤 +4719,蛭 +4720,蛮 +4721,蛯 +4722,蛸 +4723,蛹 +4724,蛾 +4725,蜀 +4726,蜂 +4727,蜃 +4728,蜆 +4729,蜉 +4730,蜘 +4731,蜚 +4732,蜜 +4733,蜥 +4734,蜴 +4735,蜷 +4736,蜻 +4737,蝉 +4738,蝋 +4739,蝎 +4740,蝕 +4741,蝗 +4742,蝙 +4743,蝠 +4744,蝣 +4745,蝦 +4746,蝮 +4747,蝶 +4748,蝸 +4749,蝿 +4750,螂 +4751,融 +4752,螢 +4753,螳 +4754,螺 +4755,蟄 +4756,蟇 +4757,蟠 +4758,蟲 +4759,蟷 +4760,蟹 +4761,蟻 +4762,蟾 +4763,蠅 +4764,蠍 +4765,蠕 +4766,蠡 +4767,蠢 +4768,蠣 +4769,蠱 +4770,血 +4771,衆 +4772,行 +4773,衍 +4774,衒 +4775,術 +4776,街 +4777,衙 +4778,衛 +4779,衝 +4780,衞 +4781,衡 +4782,衢 +4783,衣 +4784,表 +4785,衫 +4786,衰 +4787,衷 +4788,衾 +4789,衿 +4790,袁 +4791,袂 +4792,袈 +4793,袋 +4794,袍 +4795,袖 +4796,袞 +4797,袢 +4798,被 +4799,袰 +4800,袱 +4801,袴 +4802,袷 +4803,袿 +4804,裁 +4805,裂 +4806,裃 +4807,装 +4808,裏 +4809,裒 +4810,裔 +4811,裕 +4812,補 +4813,裝 +4814,裟 +4815,裡 +4816,裳 +4817,裴 +4818,裵 +4819,裸 +4820,製 +4821,裾 +4822,褄 +4823,複 +4824,褌 +4825,褐 +4826,褒 +4827,褚 +4828,褥 +4829,褪 +4830,褶 +4831,褸 +4832,褻 +4833,襄 +4834,襖 +4835,襞 +4836,襟 +4837,襤 +4838,襦 +4839,襲 +4840,襴 +4841,襷 +4842,西 +4843,要 +4844,覆 +4845,覇 +4846,覈 +4847,見 +4848,規 +4849,視 +4850,覗 +4851,覚 +4852,覧 +4853,親 +4854,覯 +4855,観 +4856,覺 +4857,覽 +4858,觀 +4859,视 +4860,角 +4861,觚 +4862,觜 +4863,解 +4864,触 +4865,言 +4866,訂 +4867,訃 +4868,計 +4869,訊 +4870,訌 +4871,討 +4872,訓 +4873,託 +4874,記 +4875,訛 +4876,訝 +4877,訟 +4878,訢 +4879,訣 +4880,訥 +4881,訪 +4882,設 +4883,許 +4884,訳 +4885,訴 +4886,訶 +4887,診 +4888,註 +4889,証 +4890,詁 +4891,詈 +4892,詐 +4893,詔 +4894,評 +4895,詛 +4896,詞 +4897,詠 +4898,詡 +4899,詢 +4900,詣 +4901,試 +4902,詧 +4903,詩 +4904,詫 +4905,詭 +4906,詮 +4907,詰 +4908,話 +4909,該 +4910,詳 +4911,詵 +4912,詹 +4913,誄 +4914,誅 +4915,誇 +4916,誉 +4917,誌 +4918,認 +4919,誑 +4920,誓 +4921,誕 +4922,誘 +4923,語 +4924,誠 +4925,誡 +4926,誣 +4927,誤 +4928,誥 +4929,誦 +4930,誨 +4931,說 +4932,説 +4933,読 +4934,誰 +4935,課 +4936,誹 +4937,誼 +4938,誾 +4939,調 +4940,談 +4941,請 +4942,諌 +4943,諍 +4944,諏 +4945,諒 +4946,論 +4947,諜 +4948,諝 +4949,諡 +4950,諦 +4951,諧 +4952,諫 +4953,諭 +4954,諮 +4955,諱 +4956,諳 +4957,諶 +4958,諷 +4959,諸 +4960,諺 +4961,諾 +4962,謀 +4963,謁 +4964,謂 +4965,謄 +4966,謎 +4967,謐 +4968,謔 +4969,謗 +4970,謙 +4971,講 +4972,謝 +4973,謡 +4974,謨 +4975,謬 +4976,謳 +4977,謹 +4978,證 +4979,譏 +4980,識 +4981,譙 +4982,譚 +4983,譜 +4984,警 +4985,譬 +4986,議 +4987,譲 +4988,譴 +4989,護 +4990,譽 +4991,讀 +4992,讃 +4993,變 +4994,讎 +4995,讐 +4996,讒 +4997,讓 +4998,讖 +4999,谷 +5000,谺 +5001,谿 +5002,豆 +5003,豉 +5004,豊 +5005,豎 +5006,豐 +5007,豚 +5008,象 +5009,豪 +5010,豫 +5011,豬 +5012,豳 +5013,豹 +5014,豺 +5015,貂 +5016,貉 +5017,貊 +5018,貌 +5019,貘 +5020,貝 +5021,貞 +5022,負 +5023,財 +5024,貢 +5025,貧 +5026,貨 +5027,販 +5028,貪 +5029,貫 +5030,責 +5031,貯 +5032,貰 +5033,貳 +5034,貴 +5035,貶 +5036,買 +5037,貸 +5038,費 +5039,貼 +5040,貽 +5041,貿 +5042,賀 +5043,賁 +5044,賂 +5045,賃 +5046,賄 +5047,資 +5048,賈 +5049,賊 +5050,賎 +5051,賑 +5052,賓 +5053,賛 +5054,賜 +5055,賞 +5056,賠 +5057,賢 +5058,賣 +5059,賤 +5060,賦 +5061,質 +5062,賭 +5063,購 +5064,賽 +5065,贄 +5066,贅 +5067,贈 +5068,贋 +5069,贍 +5070,贔 +5071,贖 +5072,贛 +5073,赛 +5074,赤 +5075,赦 +5076,赧 +5077,赫 +5078,赭 +5079,走 +5080,赳 +5081,赴 +5082,起 +5083,超 +5084,越 +5085,趙 +5086,趣 +5087,趨 +5088,足 +5089,趾 +5090,跆 +5091,跋 +5092,跎 +5093,跏 +5094,跗 +5095,跛 +5096,距 +5097,跡 +5098,跨 +5099,跪 +5100,路 +5101,跳 +5102,践 +5103,踊 +5104,踏 +5105,踞 +5106,踪 +5107,踰 +5108,踵 +5109,蹂 +5110,蹄 +5111,蹉 +5112,蹊 +5113,蹋 +5114,蹙 +5115,蹟 +5116,蹠 +5117,蹲 +5118,蹴 +5119,蹶 +5120,躁 +5121,躅 +5122,躇 +5123,躊 +5124,躍 +5125,躑 +5126,躓 +5127,躙 +5128,身 +5129,躬 +5130,躯 +5131,躰 +5132,躱 +5133,躾 +5134,軀 +5135,車 +5136,軋 +5137,軌 +5138,軍 +5139,軒 +5140,軕 +5141,軛 +5142,軟 +5143,転 +5144,軫 +5145,軸 +5146,軻 +5147,軼 +5148,軽 +5149,軾 +5150,較 +5151,載 +5152,輌 +5153,輓 +5154,輔 +5155,輛 +5156,輜 +5157,輝 +5158,輦 +5159,輩 +5160,輪 +5161,輯 +5162,輳 +5163,輸 +5164,輻 +5165,輿 +5166,轄 +5167,轅 +5168,轆 +5169,轍 +5170,轟 +5171,轡 +5172,轢 +5173,车 +5174,辛 +5175,辜 +5176,辞 +5177,辟 +5178,辣 +5179,辦 +5180,辨 +5181,辭 +5182,辮 +5183,辯 +5184,辰 +5185,辱 +5186,農 +5187,辷 +5188,辺 +5189,辻 +5190,込 +5191,辿 +5192,迂 +5193,迄 +5194,迅 +5195,迎 +5196,运 +5197,近 +5198,返 +5199,迢 +5200,迥 +5201,迦 +5202,迩 +5203,迪 +5204,迫 +5205,迭 +5206,述 +5207,迴 +5208,迷 +5209,迹 +5210,追 +5211,退 +5212,送 +5213,逃 +5214,逅 +5215,逆 +5216,逍 +5217,透 +5218,逐 +5219,逓 +5220,途 +5221,逖 +5222,逗 +5223,這 +5224,通 +5225,逝 +5226,逞 +5227,速 +5228,造 +5229,逡 +5230,逢 +5231,連 +5232,逮 +5233,週 +5234,進 +5235,逵 +5236,逸 +5237,逹 +5238,逼 +5239,遁 +5240,遂 +5241,遅 +5242,遇 +5243,遊 +5244,運 +5245,遍 +5246,過 +5247,遐 +5248,道 +5249,達 +5250,違 +5251,遙 +5252,遜 +5253,遠 +5254,遡 +5255,遣 +5256,遥 +5257,適 +5258,遭 +5259,遮 +5260,遵 +5261,遷 +5262,選 +5263,遹 +5264,遺 +5265,遼 +5266,遽 +5267,避 +5268,邀 +5269,邁 +5270,邂 +5271,邃 +5272,還 +5273,邇 +5274,邈 +5275,邉 +5276,邊 +5277,邏 +5278,邑 +5279,邕 +5280,邙 +5281,邠 +5282,邢 +5283,那 +5284,邦 +5285,邨 +5286,邪 +5287,邯 +5288,邱 +5289,邳 +5290,邵 +5291,邸 +5292,邽 +5293,邾 +5294,郁 +5295,郃 +5296,郅 +5297,郊 +5298,郎 +5299,郗 +5300,郛 +5301,郝 +5302,郞 +5303,郡 +5304,郢 +5305,郤 +5306,部 +5307,郭 +5308,郯 +5309,郵 +5310,郷 +5311,都 +5312,鄂 +5313,鄄 +5314,鄒 +5315,鄔 +5316,鄖 +5317,鄙 +5318,鄢 +5319,鄧 +5320,鄭 +5321,鄯 +5322,鄰 +5323,鄱 +5324,鄲 +5325,鄴 +5326,酈 +5327,酉 +5328,酊 +5329,酋 +5330,酌 +5331,配 +5332,酎 +5333,酒 +5334,酔 +5335,酘 +5336,酛 +5337,酢 +5338,酩 +5339,酪 +5340,酬 +5341,酵 +5342,酷 +5343,酸 +5344,醂 +5345,醇 +5346,醉 +5347,醍 +5348,醐 +5349,醒 +5350,醗 +5351,醜 +5352,醤 +5353,醪 +5354,醫 +5355,醸 +5356,采 +5357,釈 +5358,釉 +5359,釋 +5360,里 +5361,重 +5362,野 +5363,量 +5364,釐 +5365,金 +5366,釗 +5367,釘 +5368,釜 +5369,針 +5370,釣 +5371,釦 +5372,釧 +5373,釵 +5374,鈍 +5375,鈎 +5376,鈑 +5377,鈔 +5378,鈕 +5379,鈞 +5380,鈴 +5381,鈷 +5382,鈺 +5383,鈿 +5384,鉄 +5385,鉅 +5386,鉈 +5387,鉉 +5388,鉋 +5389,鉗 +5390,鉛 +5391,鉞 +5392,鉢 +5393,鉤 +5394,鉦 +5395,鉱 +5396,鉾 +5397,銀 +5398,銃 +5399,銅 +5400,銈 +5401,銑 +5402,銓 +5403,銕 +5404,銘 +5405,銚 +5406,銛 +5407,銜 +5408,銭 +5409,鋏 +5410,鋒 +5411,鋤 +5412,鋪 +5413,鋭 +5414,鋲 +5415,鋳 +5416,鋸 +5417,鋺 +5418,鋼 +5419,錆 +5420,錐 +5421,錕 +5422,錘 +5423,錚 +5424,錠 +5425,錢 +5426,錣 +5427,錦 +5428,錨 +5429,錫 +5430,錬 +5431,錮 +5432,錯 +5433,録 +5434,鍋 +5435,鍍 +5436,鍔 +5437,鍛 +5438,鍬 +5439,鍮 +5440,鍵 +5441,鍼 +5442,鍾 +5443,鎌 +5444,鎔 +5445,鎖 +5446,鎗 +5447,鎚 +5448,鎧 +5449,鎬 +5450,鎭 +5451,鎮 +5452,鎰 +5453,鎹 +5454,鏃 +5455,鏑 +5456,鏞 +5457,鏡 +5458,鏢 +5459,鐐 +5460,鐔 +5461,鐘 +5462,鐙 +5463,鐡 +5464,鐵 +5465,鐸 +5466,鑁 +5467,鑑 +5468,鑒 +5469,鑓 +5470,鑚 +5471,鑢 +5472,鑫 +5473,鑰 +5474,鑲 +5475,鑼 +5476,鑽 +5477,鑿 +5478,铁 +5479,長 +5480,长 +5481,門 +5482,閂 +5483,閃 +5484,閉 +5485,開 +5486,閏 +5487,閑 +5488,閒 +5489,間 +5490,閔 +5491,閖 +5492,閘 +5493,関 +5494,閣 +5495,閤 +5496,閥 +5497,閨 +5498,閩 +5499,閬 +5500,閭 +5501,閲 +5502,閻 +5503,閼 +5504,閾 +5505,闇 +5506,闊 +5507,闍 +5508,闐 +5509,闓 +5510,闕 +5511,闖 +5512,闘 +5513,關 +5514,闡 +5515,闢 +5516,闥 +5517,阜 +5518,阪 +5519,阮 +5520,阯 +5521,防 +5522,阻 +5523,阿 +5524,陀 +5525,陂 +5526,附 +5527,陋 +5528,陌 +5529,降 +5530,限 +5531,陕 +5532,陘 +5533,陛 +5534,陝 +5535,陞 +5536,陟 +5537,院 +5538,陣 +5539,除 +5540,陥 +5541,陪 +5542,陰 +5543,陳 +5544,陵 +5545,陶 +5546,陷 +5547,陸 +5548,険 +5549,陽 +5550,隅 +5551,隆 +5552,隈 +5553,隊 +5554,隋 +5555,階 +5556,随 +5557,隔 +5558,隕 +5559,隗 +5560,隘 +5561,隙 +5562,際 +5563,障 +5564,隠 +5565,隣 +5566,隧 +5567,隨 +5568,隴 +5569,隷 +5570,隻 +5571,隼 +5572,雀 +5573,雁 +5574,雄 +5575,雅 +5576,集 +5577,雇 +5578,雉 +5579,雌 +5580,雍 +5581,雎 +5582,雑 +5583,雒 +5584,雕 +5585,雖 +5586,雙 +5587,雛 +5588,雜 +5589,雞 +5590,離 +5591,難 +5592,雨 +5593,雪 +5594,雫 +5595,雰 +5596,雲 +5597,零 +5598,雷 +5599,雹 +5600,電 +5601,需 +5602,霄 +5603,霆 +5604,震 +5605,霊 +5606,霍 +5607,霑 +5608,霓 +5609,霖 +5610,霜 +5611,霞 +5612,霧 +5613,霰 +5614,露 +5615,霸 +5616,霹 +5617,霽 +5618,靂 +5619,靄 +5620,靈 +5621,靏 +5622,青 +5623,靖 +5624,静 +5625,靚 +5626,靜 +5627,非 +5628,靡 +5629,面 +5630,革 +5631,靫 +5632,靭 +5633,靱 +5634,靳 +5635,靴 +5636,靺 +5637,靼 +5638,鞄 +5639,鞅 +5640,鞆 +5641,鞋 +5642,鞍 +5643,鞏 +5644,鞘 +5645,鞜 +5646,鞠 +5647,鞨 +5648,鞬 +5649,鞭 +5650,鞮 +5651,鞴 +5652,韃 +5653,韋 +5654,韓 +5655,韜 +5656,韮 +5657,音 +5658,韶 +5659,韻 +5660,響 +5661,頁 +5662,頂 +5663,頃 +5664,項 +5665,順 +5666,須 +5667,頊 +5668,頌 +5669,預 +5670,頑 +5671,頒 +5672,頓 +5673,頗 +5674,領 +5675,頚 +5676,頠 +5677,頡 +5678,頤 +5679,頬 +5680,頭 +5681,頴 +5682,頷 +5683,頸 +5684,頻 +5685,頼 +5686,頽 +5687,顆 +5688,題 +5689,額 +5690,顎 +5691,顒 +5692,顓 +5693,顔 +5694,顕 +5695,顗 +5696,願 +5697,顛 +5698,類 +5699,顥 +5700,顧 +5701,顯 +5702,顰 +5703,風 +5704,颪 +5705,颯 +5706,飄 +5707,飛 +5708,飜 +5709,食 +5710,飡 +5711,飢 +5712,飫 +5713,飯 +5714,飲 +5715,飴 +5716,飼 +5717,飽 +5718,飾 +5719,餃 +5720,餅 +5721,餉 +5722,養 +5723,餌 +5724,餐 +5725,餓 +5726,餘 +5727,餞 +5728,餡 +5729,館 +5730,饅 +5731,饉 +5732,饋 +5733,饌 +5734,饒 +5735,饗 +5736,首 +5737,馗 +5738,香 +5739,馥 +5740,馨 +5741,馬 +5742,馮 +5743,馳 +5744,馴 +5745,駁 +5746,駄 +5747,駅 +5748,駆 +5749,駈 +5750,駐 +5751,駒 +5752,駕 +5753,駙 +5754,駝 +5755,駢 +5756,駱 +5757,駿 +5758,騎 +5759,騏 +5760,騒 +5761,験 +5762,騙 +5763,騨 +5764,騫 +5765,騭 +5766,騰 +5767,騸 +5768,驀 +5769,驃 +5770,驍 +5771,驕 +5772,驚 +5773,驛 +5774,驟 +5775,驢 +5776,驤 +5777,驥 +5778,驩 +5779,驪 +5780,骨 +5781,骸 +5782,髄 +5783,髏 +5784,髑 +5785,體 +5786,高 +5787,髙 +5788,髠 +5789,髢 +5790,髣 +5791,髦 +5792,髪 +5793,髭 +5794,髯 +5795,髴 +5796,髷 +5797,髻 +5798,鬆 +5799,鬘 +5800,鬚 +5801,鬢 +5802,鬣 +5803,鬨 +5804,鬬 +5805,鬱 +5806,鬲 +5807,鬼 +5808,魁 +5809,魂 +5810,魃 +5811,魄 +5812,魅 +5813,魍 +5814,魎 +5815,魏 +5816,魑 +5817,魔 +5818,魚 +5819,魯 +5820,鮎 +5821,鮑 +5822,鮒 +5823,鮓 +5824,鮨 +5825,鮪 +5826,鮫 +5827,鮭 +5828,鮮 +5829,鯉 +5830,鯏 +5831,鯖 +5832,鯛 +5833,鯨 +5834,鯰 +5835,鯱 +5836,鯵 +5837,鰊 +5838,鰍 +5839,鰐 +5840,鰒 +5841,鰓 +5842,鰕 +5843,鰭 +5844,鰯 +5845,鰹 +5846,鰺 +5847,鰻 +5848,鱈 +5849,鱒 +5850,鱗 +5851,鳥 +5852,鳧 +5853,鳩 +5854,鳰 +5855,鳳 +5856,鳴 +5857,鳶 +5858,鴇 +5859,鴈 +5860,鴉 +5861,鴎 +5862,鴛 +5863,鴦 +5864,鴨 +5865,鴫 +5866,鴬 +5867,鴻 +5868,鵄 +5869,鵜 +5870,鵝 +5871,鵞 +5872,鵠 +5873,鵡 +5874,鵬 +5875,鵯 +5876,鵰 +5877,鵲 +5878,鵺 +5879,鶉 +5880,鶏 +5881,鶚 +5882,鶯 +5883,鶴 +5884,鶻 +5885,鷗 +5886,鷦 +5887,鷯 +5888,鷲 +5889,鷹 +5890,鷺 +5891,鸕 +5892,鸚 +5893,鸞 +5894,鹵 +5895,鹸 +5896,鹹 +5897,鹽 +5898,鹿 +5899,麁 +5900,麒 +5901,麓 +5902,麗 +5903,麝 +5904,麟 +5905,麥 +5906,麦 +5907,麩 +5908,麵 +5909,麹 +5910,麺 +5911,麻 +5912,麾 +5913,麿 +5914,黃 +5915,黄 +5916,黌 +5917,黍 +5918,黎 +5919,黑 +5920,黒 +5921,黔 +5922,默 +5923,黙 +5924,黛 +5925,點 +5926,鼈 +5927,鼎 +5928,鼓 +5929,鼠 +5930,鼬 +5931,鼻 +5932,鼾 +5933,齊 +5934,齋 +5935,齎 +5936,齟 +5937,齢 +5938,齧 +5939,齬 +5940,齮 +5941,齲 +5942,龍 +5943,龐 +5944,龔 +5945,龕 +5946,龗 +5947,龙 +5948,龜 +5949,가 +5950,간 +5951,강 +5952,개 +5953,거 +5954,건 +5955,검 +5956,경 +5957,계 +5958,고 +5959,곡 +5960,공 +5961,과 +5962,관 +5963,광 +5964,교 +5965,구 +5966,국 +5967,군 +5968,권 +5969,규 +5970,그 +5971,글 +5972,금 +5973,기 +5974,길 +5975,김 +5976,나 +5977,낙 +5978,남 +5979,내 +5980,년 +5981,노 +5982,는 +5983,니 +5984,다 +5985,단 +5986,당 +5987,대 +5988,더 +5989,도 +5990,독 +5991,동 +5992,드 +5993,들 +5994,디 +5995,라 +5996,랑 +5997,래 +5998,레 +5999,력 +6000,로 +6001,르 +6002,리 +6003,립 +6004,마 +6005,만 +6006,말 +6007,면 +6008,명 +6009,몬 +6010,무 +6011,문 +6012,물 +6013,미 +6014,민 +6015,바 +6016,박 +6017,반 +6018,방 +6019,배 +6020,버 +6021,법 +6022,베 +6023,병 +6024,보 +6025,부 +6026,북 +6027,비 +6028,빠 +6029,사 +6030,산 +6031,삼 +6032,상 +6033,새 +6034,서 +6035,석 +6036,선 +6037,성 +6038,세 +6039,소 +6040,송 +6041,수 +6042,순 +6043,스 +6044,습 +6045,승 +6046,시 +6047,식 +6048,신 +6049,씨 +6050,아 +6051,안 +6052,야 +6053,약 +6054,양 +6055,어 +6056,언 +6057,에 +6058,여 +6059,역 +6060,연 +6061,영 +6062,오 +6063,온 +6064,와 +6065,완 +6066,요 +6067,용 +6068,우 +6069,운 +6070,울 +6071,원 +6072,위 +6073,유 +6074,윤 +6075,으 +6076,은 +6077,을 +6078,음 +6079,의 +6080,이 +6081,인 +6082,일 +6083,자 +6084,장 +6085,재 +6086,전 +6087,점 +6088,정 +6089,제 +6090,조 +6091,종 +6092,주 +6093,준 +6094,중 +6095,지 +6096,진 +6097,집 +6098,차 +6099,찬 +6100,천 +6101,철 +6102,총 +6103,추 +6104,츠 +6105,카 +6106,코 +6107,크 +6108,타 +6109,태 +6110,터 +6111,통 +6112,트 +6113,파 +6114,평 +6115,포 +6116,표 +6117,프 +6118,피 +6119,하 +6120,학 +6121,한 +6122,함 +6123,합 +6124,항 +6125,해 +6126,행 +6127,허 +6128,혁 +6129,현 +6130,협 +6131,호 +6132,홍 +6133,화 +6134,환 +6135,황 +6136,회 +6137,훈 +6138,휘 +6139,희 +6140,﨑 +6141,﨟 diff --git a/manga_ocr/ocr.py b/manga_ocr/ocr.py index b13dced..0ce5b50 100644 --- a/manga_ocr/ocr.py +++ b/manga_ocr/ocr.py @@ -21,7 +21,7 @@ class MangaOcr: else: logger.info('Using CPU') - self(Path(__file__).parent / 'assets/example.jpg') + self(Path(__file__).parent.parent / 'assets/example.jpg') logger.info('OCR ready') diff --git a/manga_ocr_dev/README.md b/manga_ocr_dev/README.md new file mode 100644 index 0000000..062b795 --- /dev/null +++ b/manga_ocr_dev/README.md @@ -0,0 +1,98 @@ +# Project structure + +``` +assets/ # assets (see description below) +manga_ocr/ # release code (inference only) +manga_ocr_dev/ # development code + env.py # global constants + data/ # data preprocessing + synthetic_data_generator/ # generation of synthetic image-text pairs + training/ # model training +``` + +## assets + +### fonts.csv +csv with columns: +- font_path: path to font file, relative to `FONTS_ROOT` +- supported_chars: string of characters supported by this font +- num_chars: number of supported characters +- label: common/regular/special (used to sample regular fonts more often than special) + +List of fonts with metadata used by synthetic data generator. +Provided file is just an example, you have to generate similar file for your own set of fonts, +using `manga_ocr_dev/synthetic_data_generator/scan_fonts.py` script. +Note that `label` will be filled with `regular` by default. You have to label your special fonts manually. + +### lines_example.csv +csv with columns: +- source: source of text +- id: unique id of the line +- line: line from language corpus + +Example of csv used for synthetic data generation. + +### len_to_p.csv +csv with columns: +- len: length of text +- p: probability of text of this length occurring in manga + +Used by synthetic data generator to more-or-less match the natural distribution of text lengths. +Computed based on Manga109-s dataset. + +### vocab.csv +List of all characters supported by tokenizer. + +# Training OCR + +`env.py` contains global constants used across the repo. Set your paths to data etc. there. + +1. Download [Manga109-s](http://www.manga109.org/en/download_s.html) dataset. +2. Set `MANGA109_ROOT`, so that your directory structure looks like this: + ``` + / + Manga109s_released_2021_02_28/ + annotations/ + annotations.v2018.05.31/ + images/ + books.txt + readme.txt + ``` +3. Preprocess Manga109-s with `data/process_manga109s.py` +4. Optionally generate synthetic data (see below) +5. Train with `manga_ocr_dev/training/train.py` + +# Synthetic data generation + +Generated data is split into packages (named `0000`, `0001` etc.) for easier management of large dataset. +Each package is assumed to have similar data distribution, so that a properly balanced dataset +can be built from any subset of packages. + +Data generation pipeline assumes following directory structure: + +``` +/ + img/ # generated images (output from generation pipeline) + 0000/ + 0001/ + ... + lines/ # lines from corpus (input to generation pipeline) + 0000.csv + 0001.csv + ... + meta/ # metadata (output from generation pipeline) + 0000.csv + 0001.csv + ... +``` + +To use a language corpus for data generation, `lines/*.csv` files must be provided. +For a small example of such file see `assets/lines_example.csv`. + +To generate synthetic data: +1. Generate backgrounds with `data/generate_backgrounds.py`. +2. Put your fonts in ``. +3. Generate fonts metadata with `synthetic_data_generator/scan_fonts.py`. +4. Optionally manually label your fonts with `common/regular/special` labels. +5. Provide `/lines/*.csv`. +6. Run `synthetic_data_generator/run_generate.py` for each package. diff --git a/manga_ocr_dev/__init__.py b/manga_ocr_dev/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/manga_ocr_dev/data/__init__.py b/manga_ocr_dev/data/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/manga_ocr_dev/data/generate_backgrounds.py b/manga_ocr_dev/data/generate_backgrounds.py new file mode 100644 index 0000000..a164e2f --- /dev/null +++ b/manga_ocr_dev/data/generate_backgrounds.py @@ -0,0 +1,85 @@ +from pathlib import Path + +import cv2 +import numpy as np +import pandas as pd +from tqdm import tqdm + +from manga_ocr_dev.env import MANGA109_ROOT, BACKGROUND_DIR + + +def find_rectangle(mask, y, x, aspect_ratio_range=(0.33, 3.0)): + ymin_ = ymax_ = y + xmin_ = xmax_ = x + + ymin = ymax = xmin = xmax = None + + while True: + if ymin is None: + ymin_ -= 1 + if ymin_ == 0 or mask[ymin_, xmin_:xmax_].any(): + ymin = ymin_ + + if ymax is None: + ymax_ += 1 + if ymax_ == mask.shape[0] - 1 or mask[ymax_, xmin_:xmax_].any(): + ymax = ymax_ + + if xmin is None: + xmin_ -= 1 + if xmin_ == 0 or mask[ymin_:ymax_, xmin_].any(): + xmin = xmin_ + + if xmax is None: + xmax_ += 1 + if xmax_ == mask.shape[1] - 1 or mask[ymin_:ymax_, xmax_].any(): + xmax = xmax_ + + h = ymax_ - ymin_ + w = xmax_ - xmin_ + if h > 1 and w > 1: + ratio = w / h + if ratio < aspect_ratio_range[0] or ratio > aspect_ratio_range[1]: + return ymin_, ymax_, xmin_, xmax_ + + if None not in (ymin, ymax, xmin, xmax): + return ymin, ymax, xmin, xmax + + +def generate_backgrounds(crops_per_page=5, min_size=40): + data = pd.read_csv(MANGA109_ROOT / 'data.csv') + frames_df = pd.read_csv(MANGA109_ROOT / 'frames.csv') + + BACKGROUND_DIR.mkdir(parents=True, exist_ok=True) + + page_paths = data.page_path.unique() + for page_path in tqdm(page_paths): + page = cv2.imread(str(MANGA109_ROOT / page_path)) + mask = np.zeros((page.shape[0], page.shape[1]), dtype=bool) + for row in data[data.page_path == page_path].itertuples(): + mask[row.ymin:row.ymax, row.xmin:row.xmax] = True + + frames_mask = np.zeros((page.shape[0], page.shape[1]), dtype=bool) + for row in frames_df[frames_df.page_path == page_path].itertuples(): + frames_mask[row.ymin:row.ymax, row.xmin:row.xmax] = True + + mask = mask | ~frames_mask + + if mask.all(): + continue + + unmasked_points = np.stack(np.where(~mask), axis=1) + for i in range(crops_per_page): + p = unmasked_points[np.random.randint(0, unmasked_points.shape[0])] + y, x = p + ymin, ymax, xmin, xmax = find_rectangle(mask, y, x) + crop = page[ymin:ymax, xmin:xmax] + + if crop.shape[0] >= min_size and crop.shape[1] >= min_size: + out_filename = '_'.join( + Path(page_path).with_suffix('').parts[-2:]) + f'_{ymin}_{ymax}_{xmin}_{xmax}.png' + cv2.imwrite(str(BACKGROUND_DIR / out_filename), crop) + + +if __name__ == '__main__': + generate_backgrounds() diff --git a/manga_ocr_dev/data/process_manga109s.py b/manga_ocr_dev/data/process_manga109s.py new file mode 100644 index 0000000..1e99796 --- /dev/null +++ b/manga_ocr_dev/data/process_manga109s.py @@ -0,0 +1,103 @@ +import xml.etree.ElementTree as ET +from pathlib import Path + +import cv2 +import pandas as pd +from tqdm import tqdm + +from manga_ocr_dev.env import MANGA109_ROOT + + +def get_books(): + root = MANGA109_ROOT / 'Manga109s_released_2021_02_28' + books = (root / 'books.txt').read_text().splitlines() + books = pd.DataFrame({ + 'book': books, + 'annotations': [str(root / 'annotations' / f'{book}.xml') for book in books], + 'images': [str(root / 'images' / book) for book in books], + }) + + return books + + +def export_frames(): + books = get_books() + + data = [] + for book in tqdm(books.itertuples(), total=len(books)): + tree = ET.parse(book.annotations) + root = tree.getroot() + for page in root.findall('./pages/page'): + for frame in page.findall('./frame'): + row = {} + row['book'] = book.book + row['page_index'] = int(page.attrib['index']) + row['page_path'] = str(Path(book.images) / f'{row["page_index"]:03d}.jpg') + row['page_width'] = int(page.attrib['width']) + row['page_height'] = int(page.attrib['height']) + row['id'] = frame.attrib['id'] + row['xmin'] = int(frame.attrib['xmin']) + row['ymin'] = int(frame.attrib['ymin']) + row['xmax'] = int(frame.attrib['xmax']) + row['ymax'] = int(frame.attrib['ymax']) + data.append(row) + data = pd.DataFrame(data) + + data.page_path = data.page_path.apply(lambda x: '/'.join(Path(x).parts[-4:])) + data.to_csv(MANGA109_ROOT / 'frames.csv', index=False) + + +def export_crops(): + crops_root = MANGA109_ROOT / 'crops' + crops_root.mkdir(parents=True, exist_ok=True) + margin = 10 + + books = get_books() + + data = [] + for book in tqdm(books.itertuples(), total=len(books)): + tree = ET.parse(book.annotations) + root = tree.getroot() + for page in root.findall('./pages/page'): + for text in page.findall('./text'): + row = {} + row['book'] = book.book + row['page_index'] = int(page.attrib['index']) + row['page_path'] = str(Path(book.images) / f'{row["page_index"]:03d}.jpg') + row['page_width'] = int(page.attrib['width']) + row['page_height'] = int(page.attrib['height']) + row['id'] = text.attrib['id'] + row['text'] = text.text + row['xmin'] = int(text.attrib['xmin']) + row['ymin'] = int(text.attrib['ymin']) + row['xmax'] = int(text.attrib['xmax']) + row['ymax'] = int(text.attrib['ymax']) + data.append(row) + data = pd.DataFrame(data) + + n_test = int(0.1 * len(data)) + data['split'] = 'train' + data.loc[data.sample(len(data)).iloc[:n_test].index, 'split'] = 'test' + + data['crop_path'] = str(crops_root) + '\\' + data.id + '.png' + + data.page_path = data.page_path.apply(lambda x: '/'.join(Path(x).parts[-4:])) + data.crop_path = data.crop_path.apply(lambda x: '/'.join(Path(x).parts[-2:])) + data.to_csv(MANGA109_ROOT / 'data.csv', index=False) + + for page_path, boxes in tqdm(data.groupby('page_path'), total=data.page_path.nunique()): + img = cv2.imread(str(MANGA109_ROOT / page_path)) + + for box in boxes.itertuples(): + xmin = max(box.xmin - margin, 0) + xmax = min(box.xmax + margin, img.shape[1]) + ymin = max(box.ymin - margin, 0) + ymax = min(box.ymax + margin, img.shape[0]) + crop = img[ymin:ymax, xmin:xmax] + out_path = (crops_root / box.id).with_suffix('.png') + cv2.imwrite(str(out_path), crop) + + +if __name__ == '__main__': + export_frames() + export_crops() diff --git a/manga_ocr_dev/env.py b/manga_ocr_dev/env.py new file mode 100644 index 0000000..70fe5e4 --- /dev/null +++ b/manga_ocr_dev/env.py @@ -0,0 +1,9 @@ +from pathlib import Path + +ASSETS_PATH = Path(__file__).parent.parent / 'assets' + +FONTS_ROOT = Path('~/data/jp_fonts').expanduser() +DATA_SYNTHETIC_ROOT = Path('~/data/manga/synthetic').expanduser() +BACKGROUND_DIR = Path('~/data/manga/Manga109s/background').expanduser() +MANGA109_ROOT = Path('~/data/manga/Manga109s').expanduser() +TRAIN_ROOT = Path('~/data/manga/out').expanduser() diff --git a/manga_ocr_dev/requirements.txt b/manga_ocr_dev/requirements.txt new file mode 100644 index 0000000..ff07cf5 --- /dev/null +++ b/manga_ocr_dev/requirements.txt @@ -0,0 +1,24 @@ +datasets +jiwer +torchinfo +transformers>=4.12.5 +unidic-lite +ipadic +mecab-python3 +fugashi +matplotlib +numpy +opencv-python +pandas +Pillow +scikit-image +scikit-learn +scipy +torch +torchvision +tqdm +wandb +fire +budou +albumentations>=1.1 +html2image diff --git a/manga_ocr_dev/synthetic_data_generator/README.md b/manga_ocr_dev/synthetic_data_generator/README.md new file mode 100644 index 0000000..7025469 --- /dev/null +++ b/manga_ocr_dev/synthetic_data_generator/README.md @@ -0,0 +1,38 @@ +# Synthetic data generator + +Generation of synthetic image-text pairs imitating Japanese manga for the purpose of training OCR. + +Features: +- using either text from corpus or random text +- text overlaid on background images +- drawing text bubbles +- various fonts and font styles +- variety of text layouts: + - vertical and horizontal text + - multi-line text + - [furigana](https://en.wikipedia.org/wiki/Furigana) (added randomly) + - [tate chū yoko](https://www.w3.org/International/articles/vertical-text/#tcy) + + +Text rendering is done with the usage of [html2image](https://github.com/vgalin/html2image), +which is a wrapper around Chrome/Chromium browser's headless mode. +It's not too elegant of a solution, and it is very slow, but it only needs to be run once, +and when parallelized, processing time is manageable (~17 min per 10000 images on a 16-thread machine). + +The upside of this approach is that a quite complex problem of typesetting and text rendering +(especially when dealing with both horizontal and vertical text) is offloaded to +the browser engine, keeping the codebase relatively simple and extendable. + +High-level generation pipeline is as follows: +1. Preprocess text (truncate and/or split into lines, add random furigana). +2. Render text on a transparent background, using HTML engine. +3. Select background image from backgrounds dataset. +4. Overlay the text on the background, optionally drawing a bubble around the text. + +# Examples + +## Images generated with text from [CC-100 Japanese corpus](https://data.statmt.org/cc-100/) +![](../../assets/examples/cc-100.jpg) + +## Images generated with random text +![](../../assets/examples/random.jpg) \ No newline at end of file diff --git a/manga_ocr_dev/synthetic_data_generator/__init__.py b/manga_ocr_dev/synthetic_data_generator/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/manga_ocr_dev/synthetic_data_generator/generator.py b/manga_ocr_dev/synthetic_data_generator/generator.py new file mode 100644 index 0000000..c24f6a2 --- /dev/null +++ b/manga_ocr_dev/synthetic_data_generator/generator.py @@ -0,0 +1,198 @@ +import budou +import numpy as np +import pandas as pd + +from manga_ocr_dev.env import ASSETS_PATH, FONTS_ROOT +from manga_ocr_dev.synthetic_data_generator.renderer import Renderer +from manga_ocr_dev.synthetic_data_generator.utils import get_font_meta, get_charsets, is_ascii, is_kanji + + +class SyntheticDataGenerator: + def __init__(self): + self.vocab, self.hiragana, self.katakana = get_charsets() + self.len_to_p = pd.read_csv(ASSETS_PATH / 'len_to_p.csv') + self.parser = budou.get_parser('tinysegmenter') + self.fonts_df, self.font_map = get_font_meta() + self.font_labels, self.font_p = self.get_font_labels_prob() + self.renderer = Renderer() + + def process(self, text=None, override_css_params=None): + """ + Generate image, text pair. Use source text if provided, otherwise generate random text. + """ + + if override_css_params is None: + override_css_params = {} + + if text is None: + # if using random text, choose font first, + # and then generate text using only characters supported by that font + if 'font_path' not in override_css_params: + font_path = self.get_random_font() + vocab = self.font_map[font_path] + override_css_params['font_path'] = font_path + else: + font_path = override_css_params['font_path'] + vocab = self.font_map[font_path] + + words = self.get_random_words(vocab) + + else: + text = text.replace(' ', ' ') + text = text.replace('…', '...') + words = self.split_into_words(text) + + lines = self.words_to_lines(words) + text_gt = '\n'.join(lines) + + if 'font_path' not in override_css_params: + override_css_params['font_path'] = self.get_random_font(text_gt) + + font_path = override_css_params.get('font_path') + if font_path: + vocab = self.font_map.get(font_path) + + # remove unsupported characters + lines = [''.join([c for c in line if c in vocab]) for line in lines] + text_gt = '\n'.join(lines) + else: + vocab = None + + if np.random.random() < 0.5: + word_prob = np.random.choice([0.33, 1.0], p=[0.3, 0.7]) + + lines = [self.add_random_furigana(line, word_prob, vocab) for line in lines] + + img, params = self.renderer.render(lines, override_css_params) + return img, text_gt, params + + def get_random_words(self, vocab): + vocab = list(vocab) + max_text_len = np.random.choice(self.len_to_p.len, p=self.len_to_p.p) + + words = [] + text_len = 0 + while True: + word = ''.join(np.random.choice(vocab, np.random.randint(1, 4))) + words.append(word) + text_len += len(word) + if text_len + len(word) >= max_text_len: + break + + return words + + def split_into_words(self, text): + max_text_len = np.random.choice(self.len_to_p.len, p=self.len_to_p.p) + + words = [] + text_len = 0 + for chunk in self.parser.parse(text)['chunks']: + words.append(chunk.word) + text_len += len(chunk.word) + if text_len + len(chunk.word) >= max_text_len: + break + + return words + + def words_to_lines(self, words): + text = ''.join(words) + + max_num_lines = 10 + min_line_len = len(text) // max_num_lines + max_line_len = 20 + max_line_len = np.clip(np.random.poisson(6), min_line_len, max_line_len) + lines = [] + line = '' + for word in words: + line += word + if len(line) >= max_line_len: + lines.append(line) + line = '' + if line: + lines.append(line) + + return lines + + def add_random_furigana(self, line, word_prob=1.0, vocab=None): + if vocab is None: + vocab = self.vocab + else: + vocab = list(vocab) + + processed = '' + kanji_group = '' + ascii_group = '' + for i, c in enumerate(line): + + if is_kanji(c): + c_type = 'kanji' + kanji_group += c + elif is_ascii(c): + c_type = 'ascii' + ascii_group += c + else: + c_type = 'other' + + if c_type != 'kanji' or i == len(line) - 1: + if kanji_group: + if np.random.uniform() < word_prob: + furigana_len = int(np.clip(np.random.normal(1.5, 0.5), 1, 4) * len(kanji_group)) + char_source = np.random.choice(['hiragana', 'katakana', 'all'], p=[0.8, 0.15, 0.05]) + char_source = { + 'hiragana': self.hiragana, + 'katakana': self.katakana, + 'all': vocab + }[char_source] + furigana = ''.join(np.random.choice(char_source, furigana_len)) + processed += f'{kanji_group}{furigana}' + else: + processed += kanji_group + kanji_group = '' + + if c_type != 'ascii' or i == len(line) - 1: + if ascii_group: + if len(ascii_group) <= 3 and np.random.uniform() < 0.7: + processed += f'{ascii_group}' + else: + processed += ascii_group + ascii_group = '' + + if c_type == 'other': + processed += c + + return processed + + def is_font_supporting_text(self, font_path, text): + chars = self.font_map[font_path] + for c in text: + if c.isspace(): + continue + if c not in chars: + return False + return True + + def get_font_labels_prob(self): + labels = { + 'common': 0.2, + 'regular': 0.75, + 'special': 0.05, + } + labels = {k: labels[k] for k in self.fonts_df.label.unique()} + p = np.array(list(labels.values())) + p = p / p.sum() + labels = list(labels.keys()) + return labels, p + + def get_random_font(self, text=None): + label = np.random.choice(self.font_labels, p=self.font_p) + df = self.fonts_df[self.fonts_df.label == label] + + if text is None: + return df.sample(1).iloc[0].font_path + + valid_mask = df.font_path.apply(lambda x: self.is_font_supporting_text(x, text)) + if not valid_mask.any(): + # if text contains characters not supported by any font, just pick some of the more capable fonts + valid_mask = (df.num_chars >= 4000) + + return str(FONTS_ROOT / df[valid_mask].sample(1).iloc[0].font_path) diff --git a/manga_ocr_dev/synthetic_data_generator/renderer.py b/manga_ocr_dev/synthetic_data_generator/renderer.py new file mode 100644 index 0000000..2fb84ce --- /dev/null +++ b/manga_ocr_dev/synthetic_data_generator/renderer.py @@ -0,0 +1,265 @@ +import os +import uuid + +import albumentations as A +import cv2 +import numpy as np +from html2image import Html2Image + +from manga_ocr_dev.env import BACKGROUND_DIR +from manga_ocr_dev.synthetic_data_generator.utils import get_background_df + + +class Renderer: + def __init__(self): + self.hti = Html2Image() + self.background_df = get_background_df(BACKGROUND_DIR) + self.max_size = 600 + + def render(self, lines, override_css_params=None): + img, params = self.render_text(lines, override_css_params) + img = self.render_background(img) + img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) + img = A.LongestMaxSize(self.max_size)(image=img)['image'] + return img, params + + def render_text(self, lines, override_css_params=None): + """Render text on transparent background and return as BGRA image.""" + + params = self.get_random_css_params() + if override_css_params: + params.update(override_css_params) + + css = get_css(**params) + + # this is just a rough estimate, image is cropped later anyway + size = ( + int(max(len(line) for line in lines) * params['font_size'] * 1.5), + int(len(lines) * params['font_size'] * (3 + params['line_height'])), + ) + if params['vertical']: + size = size[::-1] + html = self.lines_to_html(lines) + + filename = str(uuid.uuid4()) + '.png' + self.hti.screenshot(html_str=html, css_str=css, save_as=filename, size=size) + img = cv2.imread(filename, cv2.IMREAD_UNCHANGED) + os.remove(filename) + return img, params + + @staticmethod + def get_random_css_params(): + params = { + 'font_size': 48, + 'vertical': True if np.random.rand() < 0.7 else False, + 'line_height': 0.5, + 'background_color': 'transparent', + 'text_color': 'black', + } + + if np.random.rand() < 0.7: + params['text_orientation'] = 'upright' + + stroke_variant = np.random.choice(['stroke', 'shadow', 'none'], p=[0.8, 0.15, 0.05]) + if stroke_variant == 'stroke': + params['stroke_size'] = np.random.choice([1, 2, 3, 4, 8]) + params['stroke_color'] = 'white' + elif stroke_variant == 'shadow': + params['shadow_size'] = np.random.choice([2, 5, 10]) + params['shadow_color'] = 'white' if np.random.rand() < 0.8 else 'black', + elif stroke_variant == 'none': + pass + + return params + + def render_background(self, img): + """Add background and/or text bubble to a BGRA image, crop and return as BGR image.""" + draw_bubble = np.random.random() < 0.7 + + m0 = int(min(img.shape[:2]) * 0.3) + img = crop_by_alpha(img, m0) + + background_path = self.background_df.sample(1).iloc[0].path + background = cv2.imread(background_path) + + t = [ + A.HorizontalFlip(), + A.RandomRotate90(), + A.InvertImg(), + A.RandomBrightnessContrast((-0.2, 0.4), (-0.8, -0.3), p=0.5 if draw_bubble else 1), + A.Blur((3, 5), p=0.3), + A.Resize(img.shape[0], img.shape[1]), + ] + + background = A.Compose(t)(image=background)['image'] + + if not draw_bubble: + if np.random.rand() < 0.5: + img[:, :, :3] = 255 - img[:, :, :3] + + else: + radius = np.random.uniform(0.7, 1.) + thickness = np.random.choice([1, 2, 3]) + alpha = np.random.randint(60, 100) + sigma = np.random.randint(10, 15) + + ymin = m0 - int(min(img.shape[:2]) * np.random.uniform(0.07, 0.12)) + ymax = img.shape[0] - m0 + int(min(img.shape[:2]) * np.random.uniform(0.07, 0.12)) + xmin = m0 - int(min(img.shape[:2]) * np.random.uniform(0.07, 0.12)) + xmax = img.shape[1] - m0 + int(min(img.shape[:2]) * np.random.uniform(0.07, 0.12)) + + bubble_fill_color = (255, 255, 255, 255) + bubble_contour_color = (0, 0, 0, 255) + bubble = np.zeros((img.shape[0], img.shape[1], 4), dtype=np.uint8) + bubble = rounded_rectangle(bubble, (xmin, ymin), (xmax, ymax), radius=radius, color=bubble_fill_color, + thickness=-1) + bubble = rounded_rectangle(bubble, (xmin, ymin), (xmax, ymax), radius=radius, color=bubble_contour_color, + thickness=thickness) + + t = [ + A.ElasticTransform(alpha=alpha, sigma=sigma, alpha_affine=0, p=0.8), + ] + bubble = A.Compose(t)(image=bubble)['image'] + + background = blend(bubble, background) + + img = blend(img, background) + + ymin = m0 - int(min(img.shape[:2]) * np.random.uniform(0.01, 0.2)) + ymax = img.shape[0] - m0 + int(min(img.shape[:2]) * np.random.uniform(0.01, 0.2)) + xmin = m0 - int(min(img.shape[:2]) * np.random.uniform(0.01, 0.2)) + xmax = img.shape[1] - m0 + int(min(img.shape[:2]) * np.random.uniform(0.01, 0.2)) + img = img[ymin:ymax, xmin:xmax] + return img + + def lines_to_html(self, lines): + lines_str = '\n'.join(['

' + line + '

' for line in lines]) + html = f"\n{lines_str}\n" + return html + + +def crop_by_alpha(img, margin): + y, x = np.where(img[:, :, 3] > 0) + ymin = y.min() + ymax = y.max() + xmin = x.min() + xmax = x.max() + img = img[ymin:ymax, xmin:xmax] + img = np.pad(img, ((margin, margin), (margin, margin), (0, 0))) + return img + + +def blend(img, background): + alpha = (img[:, :, 3] / 255)[:, :, np.newaxis] + img = img[:, :, :3] + img = (background * (1 - alpha) + img * alpha).astype(np.uint8) + return img + + +def rounded_rectangle(src, top_left, bottom_right, radius=1, color=255, thickness=1, line_type=cv2.LINE_AA): + """From https://stackoverflow.com/a/60210706""" + + # corners: + # p1 - p2 + # | | + # p4 - p3 + + p1 = top_left + p2 = (bottom_right[0], top_left[1]) + p3 = bottom_right + p4 = (top_left[0], bottom_right[1]) + + height = abs(bottom_right[1] - top_left[1]) + width = abs(bottom_right[0] - top_left[0]) + + if radius > 1: + radius = 1 + + corner_radius = int(radius * (min(height, width) / 2)) + + if thickness < 0: + # big rect + top_left_main_rect = (int(p1[0] + corner_radius), int(p1[1])) + bottom_right_main_rect = (int(p3[0] - corner_radius), int(p3[1])) + + top_left_rect_left = (p1[0], p1[1] + corner_radius) + bottom_right_rect_left = (p4[0] + corner_radius, p4[1] - corner_radius) + + top_left_rect_right = (p2[0] - corner_radius, p2[1] + corner_radius) + bottom_right_rect_right = (p3[0], p3[1] - corner_radius) + + all_rects = [ + [top_left_main_rect, bottom_right_main_rect], + [top_left_rect_left, bottom_right_rect_left], + [top_left_rect_right, bottom_right_rect_right]] + + [cv2.rectangle(src, rect[0], rect[1], color, thickness) for rect in all_rects] + + # draw straight lines + cv2.line(src, (p1[0] + corner_radius, p1[1]), (p2[0] - corner_radius, p2[1]), color, abs(thickness), line_type) + cv2.line(src, (p2[0], p2[1] + corner_radius), (p3[0], p3[1] - corner_radius), color, abs(thickness), line_type) + cv2.line(src, (p3[0] - corner_radius, p4[1]), (p4[0] + corner_radius, p3[1]), color, abs(thickness), line_type) + cv2.line(src, (p4[0], p4[1] - corner_radius), (p1[0], p1[1] + corner_radius), color, abs(thickness), line_type) + + # draw arcs + cv2.ellipse(src, (p1[0] + corner_radius, p1[1] + corner_radius), (corner_radius, corner_radius), 180.0, 0, 90, + color, thickness, line_type) + cv2.ellipse(src, (p2[0] - corner_radius, p2[1] + corner_radius), (corner_radius, corner_radius), 270.0, 0, 90, + color, thickness, line_type) + cv2.ellipse(src, (p3[0] - corner_radius, p3[1] - corner_radius), (corner_radius, corner_radius), 0.0, 0, 90, color, + thickness, line_type) + cv2.ellipse(src, (p4[0] + corner_radius, p4[1] - corner_radius), (corner_radius, corner_radius), 90.0, 0, 90, color, + thickness, line_type) + + return src + + +def get_css( + font_size, + font_path, + vertical=True, + background_color='white', + text_color='black', + shadow_size=0, + shadow_color='black', + stroke_size=0, + stroke_color='black', + letter_spacing=None, + line_height=0.5, + text_orientation=None, +): + styles = [ + f"background-color: {background_color};", + f"font-size: {font_size}px;", + f"color: {text_color};", + "font-family: custom;", + f"line-height: {line_height};", + "margin: 20px;", + ] + + if text_orientation: + styles.append(f"text-orientation: {text_orientation};") + + if vertical: + styles.append("writing-mode: vertical-rl;") + + if shadow_size > 0: + styles.append(f"text-shadow: 0 0 {shadow_size}px {shadow_color};") + + if stroke_size > 0: + # stroke is simulated by shadow overlaid multiple times + styles.extend([ + f"text-shadow: " + ','.join([f"0 0 {stroke_size}px {stroke_color}"] * 10 * stroke_size) + ";", + "-webkit-font-smoothing: antialiased;", + ]) + + if letter_spacing: + styles.append(f"letter-spacing: {letter_spacing}em;") + + font_path = font_path.replace('\\', '/') + + styles_str = '\n'.join(styles) + css = "" + css += '\n@font-face {\nfont-family: custom;\nsrc: url("' + font_path + '");\n}\n' + css += "body {\n" + styles_str + "\n}" + return css diff --git a/manga_ocr_dev/synthetic_data_generator/run_generate.py b/manga_ocr_dev/synthetic_data_generator/run_generate.py new file mode 100644 index 0000000..f7e6aea --- /dev/null +++ b/manga_ocr_dev/synthetic_data_generator/run_generate.py @@ -0,0 +1,64 @@ +import traceback +from pathlib import Path + +import cv2 +import fire +import pandas as pd +from tqdm.contrib.concurrent import thread_map + +from manga_ocr_dev.env import FONTS_ROOT, DATA_SYNTHETIC_ROOT +from manga_ocr_dev.synthetic_data_generator.generator import SyntheticDataGenerator + +generator = SyntheticDataGenerator() + + +def f(args): + try: + i, source, id_, text = args + filename = f'{id_}.jpg' + img, text_gt, params = generator.process(text) + + cv2.imwrite(str(OUT_DIR / filename), img) + + font_path = Path(params['font_path']).relative_to(FONTS_ROOT) + ret = source, id_, text_gt, params['vertical'], str(font_path) + return ret + + except Exception as e: + print(traceback.format_exc()) + + +def run(package=0, n_random=1000, n_limit=None, max_workers=16): + """ + :param package: number of data package to generate + :param n_random: how many samples with random text to generate + :param n_limit: limit number of generated samples (for debugging) + :param max_workers: max number of workers + """ + + package = f'{package:04d}' + lines = pd.read_csv(DATA_SYNTHETIC_ROOT / f'lines/{package}.csv') + random_lines = pd.DataFrame({ + 'source': 'random', + 'id': [f'random_{package}_{i}' for i in range(n_random)], + 'line': None + }) + lines = pd.concat([lines, random_lines], ignore_index=True) + if n_limit: + lines = lines.sample(n_limit) + args = [(i, *values) for i, values in enumerate(lines.values)] + + global OUT_DIR + OUT_DIR = DATA_SYNTHETIC_ROOT / 'img' / package + OUT_DIR.mkdir(parents=True, exist_ok=True) + + data = thread_map(f, args, max_workers=max_workers, desc=f'Processing package {package}') + + data = pd.DataFrame(data, columns=['source', 'id', 'text', 'vertical', 'font_path']) + meta_path = DATA_SYNTHETIC_ROOT / f'meta/{package}.csv' + meta_path.parent.mkdir(parents=True, exist_ok=True) + data.to_csv(meta_path, index=False) + + +if __name__ == '__main__': + fire.Fire(run) diff --git a/manga_ocr_dev/synthetic_data_generator/scan_fonts.py b/manga_ocr_dev/synthetic_data_generator/scan_fonts.py new file mode 100644 index 0000000..3b2a939 --- /dev/null +++ b/manga_ocr_dev/synthetic_data_generator/scan_fonts.py @@ -0,0 +1,72 @@ +import PIL +import numpy as np +import pandas as pd +from PIL import ImageDraw, ImageFont +from fontTools.ttLib import TTFont +from tqdm.contrib.concurrent import process_map + +from manga_ocr_dev.env import ASSETS_PATH, FONTS_ROOT + +vocab = pd.read_csv(ASSETS_PATH / 'vocab.csv').char.values + + +def has_glyph(font, glyph): + for table in font['cmap'].tables: + if ord(glyph) in table.cmap.keys(): + return True + return False + + +def process(font_path): + """ + Get supported characters list for a given font. + Font metadata is not always reliable, so try to render each character and see if anything shows up. + Still not perfect, because sometimes unsupported characters show up as rectangles. + """ + + try: + font_path = str(font_path) + ttfont = TTFont(font_path) + pil_font = ImageFont.truetype(font_path, 24) + + supported_chars = [] + + for char in vocab: + if not has_glyph(ttfont, char): + continue + + image = PIL.Image.new('L', (40, 40), 255) + draw = ImageDraw.Draw(image) + draw.text((10, 0), char, 0, font=pil_font) + if (np.array(image) != 255).sum() == 0: + continue + + supported_chars.append(char) + + supported_chars = ''.join(supported_chars) + except Exception as e: + print(f'Error while processing {font_path}: {e}') + supported_chars = '' + + return supported_chars + + +def main(): + path_in = FONTS_ROOT + out_path = ASSETS_PATH / 'fonts.csv' + + suffixes = {'.TTF', '.otf', '.ttc', '.ttf'} + font_paths = [path for path in path_in.glob('**/*') if + path.suffix in suffixes] + + data = process_map(process, font_paths, max_workers=16) + + font_paths = [str(path.relative_to(FONTS_ROOT)) for path in font_paths] + data = pd.DataFrame({'font_path': font_paths, 'supported_chars': data}) + data['num_chars'] = data.supported_chars.str.len() + data['label'] = 'regular' + data.to_csv(out_path, index=False) + + +if __name__ == '__main__': + main() diff --git a/manga_ocr_dev/synthetic_data_generator/utils.py b/manga_ocr_dev/synthetic_data_generator/utils.py new file mode 100644 index 0000000..836b868 --- /dev/null +++ b/manga_ocr_dev/synthetic_data_generator/utils.py @@ -0,0 +1,54 @@ +import pandas as pd +import unicodedata + +from manga_ocr_dev.env import ASSETS_PATH, FONTS_ROOT + + +def get_background_df(background_dir): + background_df = [] + for path in background_dir.iterdir(): + ymin, ymax, xmin, xmax = [int(v) for v in path.stem.split('_')[-4:]] + h = ymax - ymin + w = xmax - xmin + ratio = w / h + + background_df.append({ + 'path': str(path), + 'h': h, + 'w': w, + 'ratio': ratio, + }) + background_df = pd.DataFrame(background_df) + return background_df + + +def is_kanji(ch): + return 'CJK UNIFIED IDEOGRAPH' in unicodedata.name(ch) + + +def is_hiragana(ch): + return 'HIRAGANA' in unicodedata.name(ch) + + +def is_katakana(ch): + return 'KATAKANA' in unicodedata.name(ch) + + +def is_ascii(ch): + return ord(ch) < 128 + + +def get_charsets(vocab_path=None): + if vocab_path is None: + vocab_path = ASSETS_PATH / 'vocab.csv' + vocab = pd.read_csv(vocab_path).char.values + hiragana = vocab[[is_hiragana(c) for c in vocab]][:-6] + katakana = vocab[[is_katakana(c) for c in vocab]][3:] + return vocab, hiragana, katakana + + +def get_font_meta(): + df = pd.read_csv(ASSETS_PATH / 'fonts.csv') + df.font_path = df.font_path.apply(lambda x: str(FONTS_ROOT / x)) + font_map = {row.font_path: set(row.supported_chars) for row in df.dropna().itertuples()} + return df, font_map diff --git a/manga_ocr_dev/training/__init__.py b/manga_ocr_dev/training/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/manga_ocr_dev/training/dataset.py b/manga_ocr_dev/training/dataset.py new file mode 100644 index 0000000..3a2e5e8 --- /dev/null +++ b/manga_ocr_dev/training/dataset.py @@ -0,0 +1,165 @@ +import albumentations as A +import cv2 +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +import torch +from torch.utils.data import Dataset + +from manga_ocr_dev.env import MANGA109_ROOT, DATA_SYNTHETIC_ROOT + + +class MangaDataset(Dataset): + def __init__(self, processor, split, max_target_length, limit_size=None, augment=False, skip_packages=None): + self.processor = processor + self.max_target_length = max_target_length + + data = [] + + print(f'Initializing dataset {split}...') + + if skip_packages is None: + skip_packages = set() + else: + skip_packages = {f'{x:04d}' for x in skip_packages} + + for path in sorted((DATA_SYNTHETIC_ROOT / 'meta').glob('*.csv')): + if path.stem in skip_packages: + print(f'Skipping package {path}') + continue + if not (DATA_SYNTHETIC_ROOT / 'img' / path.stem).is_dir(): + print(f'Missing image data for package {path}, skipping') + continue + df = pd.read_csv(path) + df = df.dropna() + df['path'] = df.id.apply(lambda x: str(DATA_SYNTHETIC_ROOT / 'img' / path.stem / f'{x}.jpg')) + df = df[['path', 'text']] + df['synthetic'] = True + data.append(df) + + df = pd.read_csv(MANGA109_ROOT / 'data.csv') + df = df[df.split == split].reset_index(drop=True) + df['path'] = df.crop_path.apply(lambda x: str(MANGA109_ROOT / x)) + df = df[['path', 'text']] + df['synthetic'] = False + data.append(df) + + data = pd.concat(data, ignore_index=True) + + if limit_size: + data = data.iloc[:limit_size] + self.data = data + + print(f'Dataset {split}: {len(self.data)}') + + self.augment = augment + self.transform_medium, self.transform_heavy = self.get_transforms() + + def __len__(self): + return len(self.data) + + def __getitem__(self, idx): + sample = self.data.loc[idx] + text = sample.text + + if self.augment: + medium_p = 0.8 + heavy_p = 0.02 + transform_variant = np.random.choice(['none', 'medium', 'heavy'], + p=[1 - medium_p - heavy_p, medium_p, heavy_p]) + transform = { + 'none': None, + 'medium': self.transform_medium, + 'heavy': self.transform_heavy, + }[transform_variant] + else: + transform = None + + pixel_values = self.read_image(self.processor, sample.path, transform) + labels = self.processor.tokenizer(text, + padding="max_length", + max_length=self.max_target_length, + truncation=True).input_ids + labels = np.array(labels) + # important: make sure that PAD tokens are ignored by the loss function + labels[labels == self.processor.tokenizer.pad_token_id] = -100 + + encoding = { + "pixel_values": pixel_values, + "labels": torch.tensor(labels), + } + return encoding + + @staticmethod + def read_image(processor, path, transform=None): + img = cv2.imread(str(path)) + + if transform is None: + transform = A.ToGray(always_apply=True) + + img = transform(image=img)['image'] + + pixel_values = processor(img, return_tensors="pt").pixel_values + return pixel_values.squeeze() + + @staticmethod + def get_transforms(): + t_medium = A.Compose([ + A.Rotate(5, border_mode=cv2.BORDER_REPLICATE, p=0.2), + A.Perspective((0.01, 0.05), pad_mode=cv2.BORDER_REPLICATE, p=0.2), + A.InvertImg(p=0.05), + + A.OneOf([ + A.Downscale(0.25, 0.5, interpolation=cv2.INTER_LINEAR), + A.Downscale(0.25, 0.5, interpolation=cv2.INTER_NEAREST), + ], p=0.1), + A.Blur(p=0.2), + A.Sharpen(p=0.2), + A.RandomBrightnessContrast(p=0.5), + A.GaussNoise((50, 200), p=0.3), + A.ImageCompression(0, 30, p=0.1), + A.ToGray(always_apply=True), + ]) + + t_heavy = A.Compose([ + A.Rotate(10, border_mode=cv2.BORDER_REPLICATE, p=0.2), + A.Perspective((0.01, 0.05), pad_mode=cv2.BORDER_REPLICATE, p=0.2), + A.InvertImg(p=0.05), + + A.OneOf([ + A.Downscale(0.1, 0.2, interpolation=cv2.INTER_LINEAR), + A.Downscale(0.1, 0.2, interpolation=cv2.INTER_NEAREST), + ], p=0.1), + A.Blur((4, 9), p=0.5), + A.Sharpen(p=0.5), + A.RandomBrightnessContrast(0.8, 0.8, p=1), + A.GaussNoise((1000, 10000), p=0.3), + A.ImageCompression(0, 10, p=0.5), + A.ToGray(always_apply=True), + ]) + + return t_medium, t_heavy + + +if __name__ == '__main__': + from manga_ocr_dev.training.get_model import get_processor + from manga_ocr_dev.training.utils import tensor_to_image + + encoder_name = 'facebook/deit-tiny-patch16-224' + decoder_name = 'cl-tohoku/bert-base-japanese-char-v2' + + max_length = 300 + + processor = get_processor(encoder_name, decoder_name) + ds = MangaDataset(processor, 'train', max_length, augment=True) + + for i in range(20): + sample = ds[0] + img = tensor_to_image(sample['pixel_values']) + tokens = sample['labels'] + tokens[tokens == -100] = processor.tokenizer.pad_token_id + text = ''.join(processor.decode(tokens, skip_special_tokens=True).split()) + + print(f'{i}:\n{text}\n') + plt.imshow(img) + plt.show() diff --git a/manga_ocr_dev/training/get_model.py b/manga_ocr_dev/training/get_model.py new file mode 100644 index 0000000..c121ef1 --- /dev/null +++ b/manga_ocr_dev/training/get_model.py @@ -0,0 +1,63 @@ +from transformers import AutoConfig, AutoModelForCausalLM, AutoModel, TrOCRProcessor, VisionEncoderDecoderModel, \ + AutoFeatureExtractor, AutoTokenizer, VisionEncoderDecoderConfig + + +class TrOCRProcessorCustom(TrOCRProcessor): + """The only point of this class is to bypass type checks of base class.""" + + def __init__(self, feature_extractor, tokenizer): + self.feature_extractor = feature_extractor + self.tokenizer = tokenizer + self.current_processor = self.feature_extractor + + +def get_processor(encoder_name, decoder_name): + feature_extractor = AutoFeatureExtractor.from_pretrained(encoder_name) + tokenizer = AutoTokenizer.from_pretrained(decoder_name) + processor = TrOCRProcessorCustom(feature_extractor, tokenizer) + return processor + + +def get_model(encoder_name, decoder_name, max_length, num_decoder_layers=None): + encoder_config = AutoConfig.from_pretrained(encoder_name) + encoder_config.is_decoder = False + encoder_config.add_cross_attention = False + encoder = AutoModel.from_config(encoder_config) + + decoder_config = AutoConfig.from_pretrained(decoder_name) + decoder_config.max_length = max_length + decoder_config.is_decoder = True + decoder_config.add_cross_attention = True + decoder = AutoModelForCausalLM.from_config(decoder_config) + + if num_decoder_layers is not None: + if decoder_config.model_type == 'bert': + decoder.bert.encoder.layer = decoder.bert.encoder.layer[-num_decoder_layers:] + elif decoder_config.model_type in ('roberta', 'xlm-roberta'): + decoder.roberta.encoder.layer = decoder.roberta.encoder.layer[-num_decoder_layers:] + else: + raise ValueError(f'Unsupported model_type: {decoder_config.model_type}') + + decoder_config.num_hidden_layers = num_decoder_layers + + config = VisionEncoderDecoderConfig.from_encoder_decoder_configs(encoder.config, decoder.config) + config.tie_word_embeddings = False + model = VisionEncoderDecoderModel(encoder=encoder, decoder=decoder, config=config) + + processor = get_processor(encoder_name, decoder_name) + + # set special tokens used for creating the decoder_input_ids from the labels + model.config.decoder_start_token_id = processor.tokenizer.cls_token_id + model.config.pad_token_id = processor.tokenizer.pad_token_id + # make sure vocab size is set correctly + model.config.vocab_size = model.config.decoder.vocab_size + + # set beam search parameters + model.config.eos_token_id = processor.tokenizer.sep_token_id + model.config.max_length = max_length + model.config.early_stopping = True + model.config.no_repeat_ngram_size = 3 + model.config.length_penalty = 2.0 + model.config.num_beams = 4 + + return model, processor diff --git a/manga_ocr_dev/training/metrics.py b/manga_ocr_dev/training/metrics.py new file mode 100644 index 0000000..c18b8a1 --- /dev/null +++ b/manga_ocr_dev/training/metrics.py @@ -0,0 +1,32 @@ +import numpy as np +from datasets import load_metric + + +class Metrics: + def __init__(self, processor): + self.cer_metric = load_metric("cer") + self.processor = processor + + def compute_metrics(self, pred): + label_ids = pred.label_ids + pred_ids = pred.predictions + print(label_ids.shape, pred_ids.shape) + + pred_str = self.processor.batch_decode(pred_ids, skip_special_tokens=True) + label_ids[label_ids == -100] = self.processor.tokenizer.pad_token_id + label_str = self.processor.batch_decode(label_ids, skip_special_tokens=True) + + pred_str = np.array([''.join(text.split()) for text in pred_str]) + label_str = np.array([''.join(text.split()) for text in label_str]) + + results = {} + try: + results['cer'] = self.cer_metric.compute(predictions=pred_str, references=label_str) + except Exception as e: + print(e) + print(pred_str) + print(label_str) + results['cer'] = 0 + results['accuracy'] = (pred_str == label_str).mean() + + return results diff --git a/manga_ocr_dev/training/train.py b/manga_ocr_dev/training/train.py new file mode 100644 index 0000000..fc8450a --- /dev/null +++ b/manga_ocr_dev/training/train.py @@ -0,0 +1,64 @@ +import fire +import wandb +from transformers import Seq2SeqTrainer, Seq2SeqTrainingArguments, default_data_collator + +from manga_ocr_dev.env import TRAIN_ROOT +from manga_ocr_dev.training.dataset import MangaDataset +from manga_ocr_dev.training.get_model import get_model +from manga_ocr_dev.training.metrics import Metrics + + +def run( + run_name='debug', + encoder_name='facebook/deit-tiny-patch16-224', + decoder_name='cl-tohoku/bert-base-japanese-char-v2', + max_len=300, + num_decoder_layers=2, + batch_size=64, + num_epochs=8, + fp16=True, +): + wandb.login() + + model, processor = get_model(encoder_name, decoder_name, max_len, num_decoder_layers) + + # keep package 0 for validation + train_dataset = MangaDataset(processor, 'train', max_len, augment=True, skip_packages=[0]) + eval_dataset = MangaDataset(processor, 'test', max_len, augment=False, skip_packages=range(1, 9999)) + + metrics = Metrics(processor) + + training_args = Seq2SeqTrainingArguments( + predict_with_generate=True, + evaluation_strategy='steps', + save_strategy='steps', + per_device_train_batch_size=batch_size, + per_device_eval_batch_size=batch_size, + fp16=fp16, + fp16_full_eval=fp16, + dataloader_num_workers=16, + output_dir=TRAIN_ROOT, + logging_steps=10, + save_steps=20000, + eval_steps=20000, + num_train_epochs=num_epochs, + run_name=run_name + ) + + # instantiate trainer + trainer = Seq2SeqTrainer( + model=model, + tokenizer=processor.feature_extractor, + args=training_args, + compute_metrics=metrics.compute_metrics, + train_dataset=train_dataset, + eval_dataset=eval_dataset, + data_collator=default_data_collator, + ) + trainer.train() + + wandb.finish() + + +if __name__ == '__main__': + fire.Fire(run) diff --git a/manga_ocr_dev/training/utils.py b/manga_ocr_dev/training/utils.py new file mode 100644 index 0000000..8fd1b70 --- /dev/null +++ b/manga_ocr_dev/training/utils.py @@ -0,0 +1,27 @@ +import numpy as np +import torch +from torchinfo import summary + + +def encoder_summary(model, batch_size=4): + img_size = model.config.encoder.image_size + return summary(model.encoder, input_size=(batch_size, 3, img_size, img_size), depth=3, + col_names=["output_size", "num_params", "mult_adds"], device='cpu') + + +def decoder_summary(model, batch_size=4): + img_size = model.config.encoder.image_size + encoder_hidden_shape = (batch_size, (img_size // 16) ** 2 + 1, model.config.decoder.hidden_size) + decoder_inputs = { + 'input_ids': torch.zeros(batch_size, 1, dtype=torch.int64), + 'attention_mask': torch.ones(batch_size, 1, dtype=torch.int64), + 'encoder_hidden_states': torch.rand(encoder_hidden_shape, dtype=torch.float32), + 'return_dict': False + } + return summary(model.decoder, input_data=decoder_inputs, depth=4, + col_names=["output_size", "num_params", "mult_adds"], + device='cpu') + + +def tensor_to_image(img): + return ((img.cpu().numpy() + 1) / 2 * 255).clip(0, 255).astype(np.uint8).transpose(1, 2, 0)