@@ -11,6 +11,7 @@ numpy
|
|||||||
opencv-python
|
opencv-python
|
||||||
pandas
|
pandas
|
||||||
Pillow
|
Pillow
|
||||||
|
pytest
|
||||||
scikit-image
|
scikit-image
|
||||||
scikit-learn
|
scikit-learn
|
||||||
scipy
|
scipy
|
||||||
|
|||||||
1
setup.py
@@ -26,6 +26,7 @@ setup(
|
|||||||
"numpy",
|
"numpy",
|
||||||
"Pillow",
|
"Pillow",
|
||||||
"pyperclip",
|
"pyperclip",
|
||||||
|
"sentencepiece",
|
||||||
"torch>=1.0",
|
"torch>=1.0",
|
||||||
"transformers>=4.12.5",
|
"transformers>=4.12.5",
|
||||||
"unidic_lite",
|
"unidic_lite",
|
||||||
|
|||||||
0
tests/__init__.py
Normal file
50
tests/data/expected_results.json
Normal file
@@ -0,0 +1,50 @@
|
|||||||
|
[
|
||||||
|
{
|
||||||
|
"filename": "00.jpg",
|
||||||
|
"result": "素直にあやまるしか"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"filename": "01.jpg",
|
||||||
|
"result": "立川で見た〝穴〟の下の巨大な眼は:"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"filename": "02.jpg",
|
||||||
|
"result": "実戦剣術も一流です"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"filename": "03.jpg",
|
||||||
|
"result": "第30話重苦しい闇の奥で静かに呼吸づきながら"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"filename": "04.jpg",
|
||||||
|
"result": "きのうハンパーヶとって、ゴメン!!!"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"filename": "05.jpg",
|
||||||
|
"result": "ぎゃっ"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"filename": "06.jpg",
|
||||||
|
"result": "ピンポーーン"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"filename": "07.jpg",
|
||||||
|
"result": "LINK!私達7人の力でガノンの塔の結界をやぶります"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"filename": "08.jpg",
|
||||||
|
"result": "ファイアパンチ"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"filename": "09.jpg",
|
||||||
|
"result": "少し黙っている"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"filename": "10.jpg",
|
||||||
|
"result": "わかるかな〜?"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"filename": "11.jpg",
|
||||||
|
"result": "警察にも先生にも町中の人達に!!"
|
||||||
|
}
|
||||||
|
]
|
||||||
BIN
tests/data/images/00.jpg
Normal file
|
After Width: | Height: | Size: 9.2 KiB |
BIN
tests/data/images/01.jpg
Normal file
|
After Width: | Height: | Size: 34 KiB |
BIN
tests/data/images/02.jpg
Normal file
|
After Width: | Height: | Size: 2.8 KiB |
BIN
tests/data/images/03.jpg
Normal file
|
After Width: | Height: | Size: 18 KiB |
BIN
tests/data/images/04.jpg
Normal file
|
After Width: | Height: | Size: 89 KiB |
BIN
tests/data/images/05.jpg
Normal file
|
After Width: | Height: | Size: 3.8 KiB |
BIN
tests/data/images/06.jpg
Normal file
|
After Width: | Height: | Size: 9.2 KiB |
BIN
tests/data/images/07.jpg
Normal file
|
After Width: | Height: | Size: 15 KiB |
BIN
tests/data/images/08.jpg
Normal file
|
After Width: | Height: | Size: 6.9 KiB |
BIN
tests/data/images/09.jpg
Normal file
|
After Width: | Height: | Size: 6.2 KiB |
BIN
tests/data/images/10.jpg
Normal file
|
After Width: | Height: | Size: 3.4 KiB |
BIN
tests/data/images/11.jpg
Normal file
|
After Width: | Height: | Size: 15 KiB |
25
tests/generate_expected_results.py
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
import json
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from tqdm import tqdm
|
||||||
|
|
||||||
|
from manga_ocr import MangaOcr
|
||||||
|
|
||||||
|
TEST_DATA_ROOT = Path(__file__).parent / 'data'
|
||||||
|
|
||||||
|
|
||||||
|
def generate_expected_results():
|
||||||
|
mocr = MangaOcr()
|
||||||
|
|
||||||
|
results = []
|
||||||
|
|
||||||
|
for path in tqdm(sorted((TEST_DATA_ROOT / 'images').iterdir())):
|
||||||
|
result = mocr(path)
|
||||||
|
results.append({'filename': path.name, 'result': result})
|
||||||
|
|
||||||
|
(TEST_DATA_ROOT / 'expected_results.json').write_text(json.dumps(results, ensure_ascii=False, indent=2),
|
||||||
|
encoding='utf-8')
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
generate_expected_results()
|
||||||
16
tests/test_ocr.py
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
import json
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from manga_ocr import MangaOcr
|
||||||
|
|
||||||
|
TEST_DATA_ROOT = Path(__file__).parent / 'data'
|
||||||
|
|
||||||
|
|
||||||
|
def test_ocr():
|
||||||
|
mocr = MangaOcr()
|
||||||
|
|
||||||
|
expected_results = json.loads((TEST_DATA_ROOT / 'expected_results.json').read_text(encoding='utf-8'))
|
||||||
|
|
||||||
|
for item in expected_results:
|
||||||
|
result = mocr(TEST_DATA_ROOT / 'images' / item['filename'])
|
||||||
|
assert result == item['result']
|
||||||