Add manual language support to engines, make text fullwidth only for JP and CN, replace RapidOCR support with latest 3.4.0/PPv5
This commit is contained in:
128
owocr/ocr.py
128
owocr/ocr.py
@@ -52,8 +52,8 @@ except ImportError:
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from rapidocr_onnxruntime import RapidOCR as ROCR
|
from rapidocr import RapidOCR as ROCR
|
||||||
import urllib.request
|
from rapidocr import EngineType, LangDet, LangRec, ModelType, OCRVersion
|
||||||
except ImportError:
|
except ImportError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@@ -85,6 +85,8 @@ try:
|
|||||||
except:
|
except:
|
||||||
optimized_png_encode = False
|
optimized_png_encode = False
|
||||||
|
|
||||||
|
cj_regex = re.compile(r'[\u3041-\u3096\u30A1-\u30FA\u4E00-\u9FFF]')
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class BoundingBox:
|
class BoundingBox:
|
||||||
@@ -135,10 +137,15 @@ def empty_post_process(text):
|
|||||||
return text
|
return text
|
||||||
|
|
||||||
def post_process(text):
|
def post_process(text):
|
||||||
text = ' '.join([''.join(i.split()) for i in text.splitlines()])
|
is_cj_text = cj_regex.search(text)
|
||||||
|
if is_cj_text:
|
||||||
|
text = ' '.join([''.join(i.split()) for i in text.splitlines()])
|
||||||
|
else:
|
||||||
|
text = ' '.join([re.sub(r'\s+', ' ', i).strip() for i in text.splitlines()])
|
||||||
text = text.replace('…', '...')
|
text = text.replace('…', '...')
|
||||||
text = re.sub('[・.]{2,}', lambda x: (x.end() - x.start()) * '.', text)
|
text = re.sub('[・.]{2,}', lambda x: (x.end() - x.start()) * '.', text)
|
||||||
text = jaconv.h2z(text, ascii=True, digit=True)
|
if is_cj_text:
|
||||||
|
text = jaconv.h2z(text, ascii=True, digit=True)
|
||||||
return text
|
return text
|
||||||
|
|
||||||
def input_to_pil_image(img):
|
def input_to_pil_image(img):
|
||||||
@@ -203,6 +210,7 @@ class MangaOcr:
|
|||||||
readable_name = 'Manga OCR'
|
readable_name = 'Manga OCR'
|
||||||
key = 'm'
|
key = 'm'
|
||||||
available = False
|
available = False
|
||||||
|
manual_language = False
|
||||||
coordinate_support = False
|
coordinate_support = False
|
||||||
|
|
||||||
def __init__(self, config={'pretrained_model_name_or_path':'kha-white/manga-ocr-base','force_cpu': False}):
|
def __init__(self, config={'pretrained_model_name_or_path':'kha-white/manga-ocr-base','force_cpu': False}):
|
||||||
@@ -234,6 +242,7 @@ class GoogleVision:
|
|||||||
readable_name = 'Google Vision'
|
readable_name = 'Google Vision'
|
||||||
key = 'g'
|
key = 'g'
|
||||||
available = False
|
available = False
|
||||||
|
manual_language = False
|
||||||
coordinate_support = False
|
coordinate_support = False
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
@@ -279,6 +288,7 @@ class GoogleLens:
|
|||||||
readable_name = 'Google Lens'
|
readable_name = 'Google Lens'
|
||||||
key = 'l'
|
key = 'l'
|
||||||
available = False
|
available = False
|
||||||
|
manual_language = False
|
||||||
coordinate_support = True
|
coordinate_support = True
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
@@ -424,6 +434,7 @@ class GoogleLensWeb:
|
|||||||
readable_name = 'Google Lens (web)'
|
readable_name = 'Google Lens (web)'
|
||||||
key = 'k'
|
key = 'k'
|
||||||
available = False
|
available = False
|
||||||
|
manual_language = False
|
||||||
coordinate_support = False
|
coordinate_support = False
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
@@ -520,6 +531,7 @@ class Bing:
|
|||||||
readable_name = 'Bing'
|
readable_name = 'Bing'
|
||||||
key = 'b'
|
key = 'b'
|
||||||
available = False
|
available = False
|
||||||
|
manual_language = False
|
||||||
coordinate_support = True
|
coordinate_support = True
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
@@ -697,15 +709,17 @@ class AppleVision:
|
|||||||
readable_name = 'Apple Vision'
|
readable_name = 'Apple Vision'
|
||||||
key = 'a'
|
key = 'a'
|
||||||
available = False
|
available = False
|
||||||
|
manual_language = True
|
||||||
coordinate_support = False
|
coordinate_support = False
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self, language='ja'):
|
||||||
if sys.platform != 'darwin':
|
if sys.platform != 'darwin':
|
||||||
logger.warning('Apple Vision is not supported on non-macOS platforms!')
|
logger.warning('Apple Vision is not supported on non-macOS platforms!')
|
||||||
elif int(platform.mac_ver()[0].split('.')[0]) < 13:
|
elif int(platform.mac_ver()[0].split('.')[0]) < 13:
|
||||||
logger.warning('Apple Vision is not supported on macOS older than Ventura/13.0!')
|
logger.warning('Apple Vision is not supported on macOS older than Ventura/13.0!')
|
||||||
else:
|
else:
|
||||||
self.available = True
|
self.available = True
|
||||||
|
self.language = [language, 'en']
|
||||||
logger.info('Apple Vision ready')
|
logger.info('Apple Vision ready')
|
||||||
|
|
||||||
def __call__(self, img):
|
def __call__(self, img):
|
||||||
@@ -719,7 +733,7 @@ class AppleVision:
|
|||||||
req.setRevision_(Vision.VNRecognizeTextRequestRevision3)
|
req.setRevision_(Vision.VNRecognizeTextRequestRevision3)
|
||||||
req.setRecognitionLevel_(Vision.VNRequestTextRecognitionLevelAccurate)
|
req.setRecognitionLevel_(Vision.VNRequestTextRecognitionLevelAccurate)
|
||||||
req.setUsesLanguageCorrection_(True)
|
req.setUsesLanguageCorrection_(True)
|
||||||
req.setRecognitionLanguages_(['ja','en'])
|
req.setRecognitionLanguages_(self.language)
|
||||||
|
|
||||||
handler = Vision.VNImageRequestHandler.alloc().initWithData_options_(
|
handler = Vision.VNImageRequestHandler.alloc().initWithData_options_(
|
||||||
self._preprocess(img), None
|
self._preprocess(img), None
|
||||||
@@ -746,9 +760,10 @@ class AppleLiveText:
|
|||||||
readable_name = 'Apple Live Text'
|
readable_name = 'Apple Live Text'
|
||||||
key = 'd'
|
key = 'd'
|
||||||
available = False
|
available = False
|
||||||
|
manual_language = True
|
||||||
coordinate_support = True
|
coordinate_support = True
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self, language='ja'):
|
||||||
if sys.platform != 'darwin':
|
if sys.platform != 'darwin':
|
||||||
logger.warning('Apple Live Text is not supported on non-macOS platforms!')
|
logger.warning('Apple Live Text is not supported on non-macOS platforms!')
|
||||||
elif int(platform.mac_ver()[0].split('.')[0]) < 13:
|
elif int(platform.mac_ver()[0].split('.')[0]) < 13:
|
||||||
@@ -785,6 +800,7 @@ class AppleLiveText:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
self.language = [language, 'en']
|
||||||
self.available = True
|
self.available = True
|
||||||
logger.info('Apple Live Text ready')
|
logger.info('Apple Live Text ready')
|
||||||
|
|
||||||
@@ -798,7 +814,7 @@ class AppleLiveText:
|
|||||||
with objc.autorelease_pool():
|
with objc.autorelease_pool():
|
||||||
analyzer = self.VKCImageAnalyzer.alloc().init()
|
analyzer = self.VKCImageAnalyzer.alloc().init()
|
||||||
req = self.VKCImageAnalyzerRequest.alloc().initWithImage_requestType_(self._preprocess(img), 1) #VKAnalysisTypeText
|
req = self.VKCImageAnalyzerRequest.alloc().initWithImage_requestType_(self._preprocess(img), 1) #VKAnalysisTypeText
|
||||||
req.setLocales_(['ja','en'])
|
req.setLocales_(self.language)
|
||||||
analyzer.processRequest_progressHandler_completionHandler_(req, lambda progress: None, self._process)
|
analyzer.processRequest_progressHandler_completionHandler_(req, lambda progress: None, self._process)
|
||||||
|
|
||||||
CFRunLoopRunInMode(kCFRunLoopDefaultMode, 10.0, False)
|
CFRunLoopRunInMode(kCFRunLoopDefaultMode, 10.0, False)
|
||||||
@@ -883,20 +899,23 @@ class WinRTOCR:
|
|||||||
readable_name = 'WinRT OCR'
|
readable_name = 'WinRT OCR'
|
||||||
key = 'w'
|
key = 'w'
|
||||||
available = False
|
available = False
|
||||||
|
manual_language = True
|
||||||
coordinate_support = False
|
coordinate_support = False
|
||||||
|
|
||||||
def __init__(self, config={}):
|
def __init__(self, config={}, language='ja'):
|
||||||
if sys.platform == 'win32':
|
if sys.platform == 'win32':
|
||||||
if int(platform.release()) < 10:
|
if int(platform.release()) < 10:
|
||||||
logger.warning('WinRT OCR is not supported on Windows older than 10!')
|
logger.warning('WinRT OCR is not supported on Windows older than 10!')
|
||||||
elif 'winocr' not in sys.modules:
|
elif 'winocr' not in sys.modules:
|
||||||
logger.warning('winocr not available, WinRT OCR will not work!')
|
logger.warning('winocr not available, WinRT OCR will not work!')
|
||||||
else:
|
else:
|
||||||
|
self.language = language
|
||||||
self.available = True
|
self.available = True
|
||||||
logger.info('WinRT OCR ready')
|
logger.info('WinRT OCR ready')
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
self.url = config['url']
|
self.url = config['url']
|
||||||
|
self.language = language
|
||||||
self.available = True
|
self.available = True
|
||||||
logger.info('WinRT OCR ready')
|
logger.info('WinRT OCR ready')
|
||||||
except:
|
except:
|
||||||
@@ -908,9 +927,9 @@ class WinRTOCR:
|
|||||||
return (False, 'Invalid image provided')
|
return (False, 'Invalid image provided')
|
||||||
|
|
||||||
if sys.platform == 'win32':
|
if sys.platform == 'win32':
|
||||||
res = winocr.recognize_pil_sync(img, lang='ja')['text']
|
res = winocr.recognize_pil_sync(img, lang=self.language)['text']
|
||||||
else:
|
else:
|
||||||
params = {'lang': 'ja'}
|
params = {'lang': self.language}
|
||||||
try:
|
try:
|
||||||
res = requests.post(self.url, params=params, data=self._preprocess(img), timeout=3)
|
res = requests.post(self.url, params=params, data=self._preprocess(img), timeout=3)
|
||||||
except requests.exceptions.Timeout:
|
except requests.exceptions.Timeout:
|
||||||
@@ -937,6 +956,7 @@ class OneOCR:
|
|||||||
readable_name = 'OneOCR'
|
readable_name = 'OneOCR'
|
||||||
key = 'z'
|
key = 'z'
|
||||||
available = False
|
available = False
|
||||||
|
manual_language = False
|
||||||
coordinate_support = True
|
coordinate_support = True
|
||||||
|
|
||||||
def __init__(self, config={}):
|
def __init__(self, config={}):
|
||||||
@@ -1058,6 +1078,7 @@ class AzureImageAnalysis:
|
|||||||
readable_name = 'Azure Image Analysis'
|
readable_name = 'Azure Image Analysis'
|
||||||
key = 'v'
|
key = 'v'
|
||||||
available = False
|
available = False
|
||||||
|
manual_language = False
|
||||||
coordinate_support = False
|
coordinate_support = False
|
||||||
|
|
||||||
def __init__(self, config={}):
|
def __init__(self, config={}):
|
||||||
@@ -1112,15 +1133,16 @@ class EasyOCR:
|
|||||||
readable_name = 'EasyOCR'
|
readable_name = 'EasyOCR'
|
||||||
key = 'e'
|
key = 'e'
|
||||||
available = False
|
available = False
|
||||||
|
manual_language = True
|
||||||
coordinate_support = False
|
coordinate_support = False
|
||||||
|
|
||||||
def __init__(self, config={'gpu': True}):
|
def __init__(self, config={'gpu': True}, language='ja'):
|
||||||
if 'easyocr' not in sys.modules:
|
if 'easyocr' not in sys.modules:
|
||||||
logger.warning('easyocr not available, EasyOCR will not work!')
|
logger.warning('easyocr not available, EasyOCR will not work!')
|
||||||
else:
|
else:
|
||||||
logger.info('Loading EasyOCR model')
|
logger.info('Loading EasyOCR model')
|
||||||
logging.getLogger('easyocr.easyocr').setLevel(logging.ERROR)
|
logging.getLogger('easyocr.easyocr').setLevel(logging.ERROR)
|
||||||
self.model = easyocr.Reader(['ja','en'], gpu=config['gpu'])
|
self.model = easyocr.Reader([language,'en'], gpu=config['gpu'])
|
||||||
self.available = True
|
self.available = True
|
||||||
logger.info('EasyOCR ready')
|
logger.info('EasyOCR ready')
|
||||||
|
|
||||||
@@ -1148,40 +1170,56 @@ class RapidOCR:
|
|||||||
readable_name = 'RapidOCR'
|
readable_name = 'RapidOCR'
|
||||||
key = 'r'
|
key = 'r'
|
||||||
available = False
|
available = False
|
||||||
|
manual_language = True
|
||||||
coordinate_support = False
|
coordinate_support = False
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self, config={'high_accuracy_detection': False, 'high_accuracy_recognition': True}, language='ja'):
|
||||||
if 'rapidocr_onnxruntime' not in sys.modules:
|
if 'rapidocr' not in sys.modules:
|
||||||
logger.warning('rapidocr_onnxruntime not available, RapidOCR will not work!')
|
logger.warning('rapidocr not available, RapidOCR will not work!')
|
||||||
else:
|
else:
|
||||||
rapidocr_model_file = os.path.join(os.path.expanduser('~'),'.cache','rapidocr_japan_PP-OCRv4_rec_infer.onnx')
|
|
||||||
if not os.path.isfile(rapidocr_model_file):
|
|
||||||
logger.info('Downloading RapidOCR model ' + rapidocr_model_file)
|
|
||||||
try:
|
|
||||||
cache_folder = os.path.join(os.path.expanduser('~'),'.cache')
|
|
||||||
if not os.path.isdir(cache_folder):
|
|
||||||
os.makedirs(cache_folder)
|
|
||||||
urllib.request.urlretrieve('https://github.com/AuroraWright/owocr/raw/master/rapidocr_japan_PP-OCRv4_rec_infer.onnx', rapidocr_model_file)
|
|
||||||
except:
|
|
||||||
logger.warning('Download failed. RapidOCR will not work!')
|
|
||||||
return
|
|
||||||
|
|
||||||
logger.info('Loading RapidOCR model')
|
logger.info('Loading RapidOCR model')
|
||||||
self.model = ROCR(rec_model_path=rapidocr_model_file)
|
lang_det, lang_rec = self.language_to_model_language(language)
|
||||||
|
self.model = ROCR(params={
|
||||||
|
'Det.engine_type': EngineType.ONNXRUNTIME,
|
||||||
|
'Det.lang_type': lang_det,
|
||||||
|
'Det.model_type': ModelType.SERVER if config['high_accuracy_detection'] else ModelType.MOBILE,
|
||||||
|
'Det.ocr_version': OCRVersion.PPOCRV5,
|
||||||
|
'Rec.engine_type': EngineType.ONNXRUNTIME,
|
||||||
|
'Rec.lang_type': lang_rec,
|
||||||
|
'Rec.model_type': ModelType.SERVER if config['high_accuracy_recognition'] else ModelType.MOBILE,
|
||||||
|
'Rec.ocr_version': OCRVersion.PPOCRV5,
|
||||||
|
'Global.log_level': 'error'
|
||||||
|
})
|
||||||
logging.getLogger().setLevel(logging.ERROR)
|
logging.getLogger().setLevel(logging.ERROR)
|
||||||
self.available = True
|
self.available = True
|
||||||
logger.info('RapidOCR ready')
|
logger.info('RapidOCR ready')
|
||||||
|
|
||||||
|
def language_to_model_language(self, language):
|
||||||
|
if language == 'ja':
|
||||||
|
return LangDet.CH, LangRec.CH
|
||||||
|
if language == 'zh':
|
||||||
|
return LangDet.CH, LangRec.CH
|
||||||
|
elif language == 'ko':
|
||||||
|
return LangDet.MULTI, LangRec.KOREAN
|
||||||
|
elif language == 'ru':
|
||||||
|
return LangDet.MULTI, LangRec.ESLAV
|
||||||
|
elif language == 'el':
|
||||||
|
return LangDet.MULTI, LangRec.EL
|
||||||
|
elif language == 'th':
|
||||||
|
return LangDet.MULTI, LangRec.TH
|
||||||
|
else:
|
||||||
|
return LangDet.MULTI, LangRec.LATIN
|
||||||
|
|
||||||
def __call__(self, img):
|
def __call__(self, img):
|
||||||
img, is_path = input_to_pil_image(img)
|
img, is_path = input_to_pil_image(img)
|
||||||
if not img:
|
if not img:
|
||||||
return (False, 'Invalid image provided')
|
return (False, 'Invalid image provided')
|
||||||
|
|
||||||
res = ''
|
res = ''
|
||||||
read_results, elapsed = self.model(self._preprocess(img))
|
read_results = self.model(self._preprocess(img))
|
||||||
if read_results:
|
if read_results:
|
||||||
for read_result in read_results:
|
for read_result in read_results.txts:
|
||||||
res += read_result[1] + '\n'
|
res += read_result + '\n'
|
||||||
|
|
||||||
x = (True, res)
|
x = (True, res)
|
||||||
|
|
||||||
@@ -1197,17 +1235,38 @@ class OCRSpace:
|
|||||||
readable_name = 'OCRSpace'
|
readable_name = 'OCRSpace'
|
||||||
key = 'o'
|
key = 'o'
|
||||||
available = False
|
available = False
|
||||||
|
manual_language = True
|
||||||
coordinate_support = False
|
coordinate_support = False
|
||||||
|
|
||||||
def __init__(self, config={}):
|
def __init__(self, config={}, language='ja'):
|
||||||
try:
|
try:
|
||||||
self.api_key = config['api_key']
|
self.api_key = config['api_key']
|
||||||
self.max_byte_size = config.get('file_size_limit', 1000000)
|
self.max_byte_size = config.get('file_size_limit', 1000000)
|
||||||
|
self.engine_version = config.get('engine_version', 2)
|
||||||
|
self.language = self.language_to_model_language(language)
|
||||||
self.available = True
|
self.available = True
|
||||||
logger.info('OCRSpace ready')
|
logger.info('OCRSpace ready')
|
||||||
except:
|
except:
|
||||||
logger.warning('Error reading API key from config, OCRSpace will not work!')
|
logger.warning('Error reading API key from config, OCRSpace will not work!')
|
||||||
|
|
||||||
|
def language_to_model_language(self, language):
|
||||||
|
if language == 'ja':
|
||||||
|
return 'jpn'
|
||||||
|
if language == 'zh':
|
||||||
|
return 'chs'
|
||||||
|
elif language == 'ko':
|
||||||
|
return 'kor'
|
||||||
|
elif language == 'ar':
|
||||||
|
return 'ara'
|
||||||
|
elif language == 'ru':
|
||||||
|
return 'rus'
|
||||||
|
elif language == 'el':
|
||||||
|
return 'gre'
|
||||||
|
elif language == 'th':
|
||||||
|
return 'tha'
|
||||||
|
else:
|
||||||
|
return 'auto'
|
||||||
|
|
||||||
def __call__(self, img):
|
def __call__(self, img):
|
||||||
img, is_path = input_to_pil_image(img)
|
img, is_path = input_to_pil_image(img)
|
||||||
if not img:
|
if not img:
|
||||||
@@ -1219,7 +1278,8 @@ class OCRSpace:
|
|||||||
|
|
||||||
data = {
|
data = {
|
||||||
'apikey': self.api_key,
|
'apikey': self.api_key,
|
||||||
'language': 'jpn'
|
'language': self.language,
|
||||||
|
'OCREngine': str(self.engine_version)
|
||||||
}
|
}
|
||||||
files = {'file': ('image.' + img_extension, img_bytes, 'image/' + img_extension)}
|
files = {'file': ('image.' + img_extension, img_bytes, 'image/' + img_extension)}
|
||||||
|
|
||||||
|
|||||||
10
owocr/run.py
10
owocr/run.py
@@ -917,9 +917,15 @@ def run():
|
|||||||
for _,engine_class in sorted(inspect.getmembers(sys.modules[__name__], lambda x: hasattr(x, '__module__') and x.__module__ and __package__ + '.ocr' in x.__module__ and inspect.isclass(x) and hasattr(x, 'name'))):
|
for _,engine_class in sorted(inspect.getmembers(sys.modules[__name__], lambda x: hasattr(x, '__module__') and x.__module__ and __package__ + '.ocr' in x.__module__ and inspect.isclass(x) and hasattr(x, 'name'))):
|
||||||
if len(config_engines) == 0 or engine_class.name in config_engines:
|
if len(config_engines) == 0 or engine_class.name in config_engines:
|
||||||
if config.get_engine(engine_class.name) == None:
|
if config.get_engine(engine_class.name) == None:
|
||||||
engine_instance = engine_class()
|
if engine_class.manual_language:
|
||||||
|
engine_instance = engine_class(language=config.get_general('language'))
|
||||||
|
else:
|
||||||
|
engine_instance = engine_class()
|
||||||
else:
|
else:
|
||||||
engine_instance = engine_class(config.get_engine(engine_class.name))
|
if engine_class.manual_language:
|
||||||
|
engine_instance = engine_class(config=config.get_engine(engine_class.name), language=config.get_general('language'))
|
||||||
|
else:
|
||||||
|
engine_instance = engine_class(config=config.get_engine(engine_class.name))
|
||||||
|
|
||||||
if engine_instance.available:
|
if engine_instance.available:
|
||||||
engine_instances.append(engine_instance)
|
engine_instances.append(engine_instance)
|
||||||
|
|||||||
@@ -50,3 +50,7 @@
|
|||||||
;gpu = True
|
;gpu = True
|
||||||
;[ocrspace]
|
;[ocrspace]
|
||||||
;api_key = api_key_here
|
;api_key = api_key_here
|
||||||
|
;engine_version = 2
|
||||||
|
;[rapidocr]
|
||||||
|
;high_accuracy_detection = False
|
||||||
|
;high_accuracy_recognition = True
|
||||||
@@ -48,9 +48,8 @@ easyocr = [
|
|||||||
]
|
]
|
||||||
|
|
||||||
rapidocr = [
|
rapidocr = [
|
||||||
"rapidocr",
|
"rapidocr>=3.4.0",
|
||||||
"onnxruntime",
|
"onnxruntime"
|
||||||
"rapidocr_onnxruntime"
|
|
||||||
]
|
]
|
||||||
|
|
||||||
mangaocr = [
|
mangaocr = [
|
||||||
|
|||||||
Binary file not shown.
Reference in New Issue
Block a user