diff --git a/owocr/run.py b/owocr/run.py index 0b65031..df213f9 100644 --- a/owocr/run.py +++ b/owocr/run.py @@ -302,10 +302,11 @@ class RequestHandler(socketserver.BaseRequestHandler): class TextFiltering: accurate_filtering = False - def __init__(self, language='ja'): + def __init__(self): from pysbd import Segmenter - self.segmenter = Segmenter(language=language, clean=True) - self.regex = self.get_regex(language) + self.language = config.get_general('language') + self.segmenter = Segmenter(language=self.language, clean=True) + self.regex = self.get_regex() try: from transformers import pipeline, AutoTokenizer @@ -330,22 +331,22 @@ class TextFiltering: import langid self.classify = langid.classify - def get_regex(self, language): - if language == 'ja': + def get_regex(self): + if self.language == 'ja': return re.compile(r'[\u3041-\u3096\u30A1-\u30FA\u4E00-\u9FFF]') - elif language == 'zh': + elif self.language == 'zh': return re.compile(r'[\u4E00-\u9FFF]') - elif language == 'ko': + elif self.language == 'ko': return re.compile(r'[\uAC00-\uD7AF]') - elif language == 'ar': + elif self.language == 'ar': return re.compile(r'[\u0600-\u06FF\u0750-\u077F\u08A0-\u08FF\uFB50-\uFDFF\uFE70-\uFEFF]') - elif language == 'ru': + elif self.language == 'ru': return re.compile(r'[\u0400-\u04FF\u0500-\u052F\u2DE0-\u2DFF\uA640-\uA69F\u1C80-\u1C8F]') - elif language == 'el': + elif self.language == 'el': return re.compile(r'[\u0370-\u03FF\u1F00-\u1FFF]') - elif language == 'he': + elif self.language == 'he': return re.compile(r'[\u0590-\u05FF\uFB1D-\uFB4F]') - elif language == 'th': + elif self.language == 'th': return re.compile(r'[\u0E00-\u0E7F]') else: # Latin Extended regex for many European languages/English @@ -937,7 +938,6 @@ def run(): terminated = False paused = config.get_general('pause_at_startup') auto_pause = config.get_general('auto_pause') - language = config.get_general('language') output_format = config.get_general('output_format') clipboard_thread = None websocket_server_thread = None @@ -982,7 +982,7 @@ def run(): screenshot_event = threading.Event() screenshot_thread = ScreenshotThread(screen_capture_on_combo) screenshot_thread.start() - filtering = TextFiltering(language=language) + filtering = TextFiltering() read_from_readable.append('screen capture') if 'websocket' in (read_from, read_from_secondary): read_from_readable.append('websocket')