diff --git a/manga_ocr_dev/requirements.txt b/manga_ocr_dev/requirements.txt index ff07cf5..5f05d09 100644 --- a/manga_ocr_dev/requirements.txt +++ b/manga_ocr_dev/requirements.txt @@ -11,6 +11,7 @@ numpy opencv-python pandas Pillow +pytest scikit-image scikit-learn scipy diff --git a/setup.py b/setup.py index 3a87b84..5ad5007 100644 --- a/setup.py +++ b/setup.py @@ -26,6 +26,7 @@ setup( "numpy", "Pillow", "pyperclip", + "sentencepiece", "torch>=1.0", "transformers>=4.12.5", "unidic_lite", diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/data/expected_results.json b/tests/data/expected_results.json new file mode 100644 index 0000000..3930795 --- /dev/null +++ b/tests/data/expected_results.json @@ -0,0 +1,50 @@ +[ + { + "filename": "00.jpg", + "result": "素直にあやまるしか" + }, + { + "filename": "01.jpg", + "result": "立川で見た〝穴〟の下の巨大な眼は:" + }, + { + "filename": "02.jpg", + "result": "実戦剣術も一流です" + }, + { + "filename": "03.jpg", + "result": "第30話重苦しい闇の奥で静かに呼吸づきながら" + }, + { + "filename": "04.jpg", + "result": "きのうハンパーヶとって、ゴメン!!!" + }, + { + "filename": "05.jpg", + "result": "ぎゃっ" + }, + { + "filename": "06.jpg", + "result": "ピンポーーン" + }, + { + "filename": "07.jpg", + "result": "LINK!私達7人の力でガノンの塔の結界をやぶります" + }, + { + "filename": "08.jpg", + "result": "ファイアパンチ" + }, + { + "filename": "09.jpg", + "result": "少し黙っている" + }, + { + "filename": "10.jpg", + "result": "わかるかな〜?" + }, + { + "filename": "11.jpg", + "result": "警察にも先生にも町中の人達に!!" + } +] \ No newline at end of file diff --git a/tests/data/images/00.jpg b/tests/data/images/00.jpg new file mode 100644 index 0000000..faef4b4 Binary files /dev/null and b/tests/data/images/00.jpg differ diff --git a/tests/data/images/01.jpg b/tests/data/images/01.jpg new file mode 100644 index 0000000..0bd3c27 Binary files /dev/null and b/tests/data/images/01.jpg differ diff --git a/tests/data/images/02.jpg b/tests/data/images/02.jpg new file mode 100644 index 0000000..9ed906a Binary files /dev/null and b/tests/data/images/02.jpg differ diff --git a/tests/data/images/03.jpg b/tests/data/images/03.jpg new file mode 100644 index 0000000..65f4c1a Binary files /dev/null and b/tests/data/images/03.jpg differ diff --git a/tests/data/images/04.jpg b/tests/data/images/04.jpg new file mode 100644 index 0000000..8241abb Binary files /dev/null and b/tests/data/images/04.jpg differ diff --git a/tests/data/images/05.jpg b/tests/data/images/05.jpg new file mode 100644 index 0000000..c202c7e Binary files /dev/null and b/tests/data/images/05.jpg differ diff --git a/tests/data/images/06.jpg b/tests/data/images/06.jpg new file mode 100644 index 0000000..34cd7b8 Binary files /dev/null and b/tests/data/images/06.jpg differ diff --git a/tests/data/images/07.jpg b/tests/data/images/07.jpg new file mode 100644 index 0000000..91048e0 Binary files /dev/null and b/tests/data/images/07.jpg differ diff --git a/tests/data/images/08.jpg b/tests/data/images/08.jpg new file mode 100644 index 0000000..95ce304 Binary files /dev/null and b/tests/data/images/08.jpg differ diff --git a/tests/data/images/09.jpg b/tests/data/images/09.jpg new file mode 100644 index 0000000..91537a2 Binary files /dev/null and b/tests/data/images/09.jpg differ diff --git a/tests/data/images/10.jpg b/tests/data/images/10.jpg new file mode 100644 index 0000000..2ed92cb Binary files /dev/null and b/tests/data/images/10.jpg differ diff --git a/tests/data/images/11.jpg b/tests/data/images/11.jpg new file mode 100644 index 0000000..e51e5e0 Binary files /dev/null and b/tests/data/images/11.jpg differ diff --git a/tests/generate_expected_results.py b/tests/generate_expected_results.py new file mode 100644 index 0000000..fa7c27a --- /dev/null +++ b/tests/generate_expected_results.py @@ -0,0 +1,25 @@ +import json +from pathlib import Path + +from tqdm import tqdm + +from manga_ocr import MangaOcr + +TEST_DATA_ROOT = Path(__file__).parent / 'data' + + +def generate_expected_results(): + mocr = MangaOcr() + + results = [] + + for path in tqdm(sorted((TEST_DATA_ROOT / 'images').iterdir())): + result = mocr(path) + results.append({'filename': path.name, 'result': result}) + + (TEST_DATA_ROOT / 'expected_results.json').write_text(json.dumps(results, ensure_ascii=False, indent=2), + encoding='utf-8') + + +if __name__ == '__main__': + generate_expected_results() diff --git a/tests/test_ocr.py b/tests/test_ocr.py new file mode 100644 index 0000000..ed7e89e --- /dev/null +++ b/tests/test_ocr.py @@ -0,0 +1,16 @@ +import json +from pathlib import Path + +from manga_ocr import MangaOcr + +TEST_DATA_ROOT = Path(__file__).parent / 'data' + + +def test_ocr(): + mocr = MangaOcr() + + expected_results = json.loads((TEST_DATA_ROOT / 'expected_results.json').read_text(encoding='utf-8')) + + for item in expected_results: + result = mocr(TEST_DATA_ROOT / 'images' / item['filename']) + assert result == item['result']