Push nitpick from a while back
This commit is contained in:
28
owocr/run.py
28
owocr/run.py
@@ -302,10 +302,11 @@ class RequestHandler(socketserver.BaseRequestHandler):
|
|||||||
class TextFiltering:
|
class TextFiltering:
|
||||||
accurate_filtering = False
|
accurate_filtering = False
|
||||||
|
|
||||||
def __init__(self, language='ja'):
|
def __init__(self):
|
||||||
from pysbd import Segmenter
|
from pysbd import Segmenter
|
||||||
self.segmenter = Segmenter(language=language, clean=True)
|
self.language = config.get_general('language')
|
||||||
self.regex = self.get_regex(language)
|
self.segmenter = Segmenter(language=self.language, clean=True)
|
||||||
|
self.regex = self.get_regex()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from transformers import pipeline, AutoTokenizer
|
from transformers import pipeline, AutoTokenizer
|
||||||
@@ -330,22 +331,22 @@ class TextFiltering:
|
|||||||
import langid
|
import langid
|
||||||
self.classify = langid.classify
|
self.classify = langid.classify
|
||||||
|
|
||||||
def get_regex(self, language):
|
def get_regex(self):
|
||||||
if language == 'ja':
|
if self.language == 'ja':
|
||||||
return re.compile(r'[\u3041-\u3096\u30A1-\u30FA\u4E00-\u9FFF]')
|
return re.compile(r'[\u3041-\u3096\u30A1-\u30FA\u4E00-\u9FFF]')
|
||||||
elif language == 'zh':
|
elif self.language == 'zh':
|
||||||
return re.compile(r'[\u4E00-\u9FFF]')
|
return re.compile(r'[\u4E00-\u9FFF]')
|
||||||
elif language == 'ko':
|
elif self.language == 'ko':
|
||||||
return re.compile(r'[\uAC00-\uD7AF]')
|
return re.compile(r'[\uAC00-\uD7AF]')
|
||||||
elif language == 'ar':
|
elif self.language == 'ar':
|
||||||
return re.compile(r'[\u0600-\u06FF\u0750-\u077F\u08A0-\u08FF\uFB50-\uFDFF\uFE70-\uFEFF]')
|
return re.compile(r'[\u0600-\u06FF\u0750-\u077F\u08A0-\u08FF\uFB50-\uFDFF\uFE70-\uFEFF]')
|
||||||
elif language == 'ru':
|
elif self.language == 'ru':
|
||||||
return re.compile(r'[\u0400-\u04FF\u0500-\u052F\u2DE0-\u2DFF\uA640-\uA69F\u1C80-\u1C8F]')
|
return re.compile(r'[\u0400-\u04FF\u0500-\u052F\u2DE0-\u2DFF\uA640-\uA69F\u1C80-\u1C8F]')
|
||||||
elif language == 'el':
|
elif self.language == 'el':
|
||||||
return re.compile(r'[\u0370-\u03FF\u1F00-\u1FFF]')
|
return re.compile(r'[\u0370-\u03FF\u1F00-\u1FFF]')
|
||||||
elif language == 'he':
|
elif self.language == 'he':
|
||||||
return re.compile(r'[\u0590-\u05FF\uFB1D-\uFB4F]')
|
return re.compile(r'[\u0590-\u05FF\uFB1D-\uFB4F]')
|
||||||
elif language == 'th':
|
elif self.language == 'th':
|
||||||
return re.compile(r'[\u0E00-\u0E7F]')
|
return re.compile(r'[\u0E00-\u0E7F]')
|
||||||
else:
|
else:
|
||||||
# Latin Extended regex for many European languages/English
|
# Latin Extended regex for many European languages/English
|
||||||
@@ -937,7 +938,6 @@ def run():
|
|||||||
terminated = False
|
terminated = False
|
||||||
paused = config.get_general('pause_at_startup')
|
paused = config.get_general('pause_at_startup')
|
||||||
auto_pause = config.get_general('auto_pause')
|
auto_pause = config.get_general('auto_pause')
|
||||||
language = config.get_general('language')
|
|
||||||
output_format = config.get_general('output_format')
|
output_format = config.get_general('output_format')
|
||||||
clipboard_thread = None
|
clipboard_thread = None
|
||||||
websocket_server_thread = None
|
websocket_server_thread = None
|
||||||
@@ -982,7 +982,7 @@ def run():
|
|||||||
screenshot_event = threading.Event()
|
screenshot_event = threading.Event()
|
||||||
screenshot_thread = ScreenshotThread(screen_capture_on_combo)
|
screenshot_thread = ScreenshotThread(screen_capture_on_combo)
|
||||||
screenshot_thread.start()
|
screenshot_thread.start()
|
||||||
filtering = TextFiltering(language=language)
|
filtering = TextFiltering()
|
||||||
read_from_readable.append('screen capture')
|
read_from_readable.append('screen capture')
|
||||||
if 'websocket' in (read_from, read_from_secondary):
|
if 'websocket' in (read_from, read_from_secondary):
|
||||||
read_from_readable.append('websocket')
|
read_from_readable.append('websocket')
|
||||||
|
|||||||
Reference in New Issue
Block a user