diff --git a/README.md b/README.md
index b519b0e..c1ace75 100644
--- a/README.md
+++ b/README.md
@@ -38,7 +38,6 @@ Additionally:
## Cloud providers
- Google Lens: Google Vision in disguise (no need for API keys!), install with `pip install owocr[lens]` ("l" key)
-- Google Lens (web): alternative version of Lens (Google webpage version). Results should be the same but it's much slower. Install with `pip install owocr[lensweb]` ("k" key)
- Bing: Azure in disguise (no need for API keys!) ("b" key)
- Google Vision: install with `pip install owocr[gvision]`, you also need a service account .json file named google_vision.json in `user directory/.config/` ("g" key)
- Azure Image Analysis: install with `pip install owocr[azure]`, you also need to specify an api key and an endpoint in the config file ("v" key)
diff --git a/owocr/config.py b/owocr/config.py
index 9d82564..d9f8dd2 100644
--- a/owocr/config.py
+++ b/owocr/config.py
@@ -25,7 +25,7 @@ parser.add_argument('-rs', '--read_from_secondary', type=str, default=argparse.S
parser.add_argument('-w', '--write_to', type=str, default=argparse.SUPPRESS,
help='Where to save recognized texts to. Can be either "clipboard", "websocket", or a path to a text file.')
parser.add_argument('-e', '--engine', type=str, default=argparse.SUPPRESS,
- help='OCR engine to use. Available: "mangaocr", "glens", "glensweb", "bing", "gvision", "avision", "alivetext", "azure", "winrtocr", "oneocr", "easyocr", "rapidocr", "ocrspace".')
+ help='OCR engine to use. Available: "mangaocr", "glens", "bing", "gvision", "avision", "alivetext", "azure", "winrtocr", "oneocr", "easyocr", "rapidocr", "ocrspace".')
parser.add_argument('-es', '--engine_secondary', type=str, default=argparse.SUPPRESS,
help='OCR engine to use for two-pass processing.')
parser.add_argument('-p', '--pause_at_startup', type=str2bool, nargs='?', const=True, default=argparse.SUPPRESS,
@@ -62,6 +62,10 @@ parser.add_argument('-l', '--language', type=str, default=argparse.SUPPRESS,
help='Two letter language code for filtering screencapture OCR results. Ex. "ja" for Japanese, "zh" for Chinese, "ko" for Korean, "ar" for Arabic, "ru" for Russian, "el" for Greek, "he" for Hebrew, "th" for Thai. Any other value will use Latin Extended (for most European languages and English).')
parser.add_argument('-of', '--output_format', type=str, default=argparse.SUPPRESS,
help='The output format for OCR results. Can be "text" (default) or "json" (to include coordinates).')
+parser.add_argument('-wp', '--websocket_port', type=int, default=argparse.SUPPRESS,
+ help='Websocket port to use if reading or writing to websocket.')
+parser.add_argument('-ds', '--delay_seconds', type=float, default=argparse.SUPPRESS,
+ help='Delay (in seconds) between checks when reading from clipboard (on macOS/Linux) or a directory.')
parser.add_argument('-v', '--verbosity', type=int, default=argparse.SUPPRESS,
help='Terminal window verbosity. Can be -2 (all recognized text is showed whole, default), -1 (only timestamps are shown), 0 (nothing is shown but errors), or larger than 0 to cut displayed text to that amount of characters.')
parser.add_argument('--uwu', type=str2bool, nargs='?', const=True, default=argparse.SUPPRESS, help=argparse.SUPPRESS)
diff --git a/owocr/ocr.py b/owocr/ocr.py
index 0ed01df..5c52079 100644
--- a/owocr/ocr.py
+++ b/owocr/ocr.py
@@ -67,11 +67,6 @@ try:
except ImportError:
pass
-try:
- import pyjson5
-except ImportError:
- pass
-
try:
import betterproto
from .lens_betterproto import *
@@ -224,53 +219,146 @@ def quad_to_bounding_box(x1, y1, x2, y2, x3, y3, x4, y4, img_width=None, img_hei
rotation_z=angle
)
-def merge_bounding_boxes(ocr_element_list):
- all_corners = []
+def merge_bounding_boxes(ocr_element_list, rotated=False):
+ def _get_all_corners(ocr_element_list):
+ corners = []
+ for element in ocr_element_list:
+ bbox = element.bounding_box
+ angle = bbox.rotation_z or 0.0
+ hw, hh = bbox.width / 2.0, bbox.height / 2.0
+ cx, cy = bbox.center_x, bbox.center_y
- for element in ocr_element_list:
- bbox = element.bounding_box
- angle = bbox.rotation_z
- hw = bbox.width / 2
- hh = bbox.height / 2
+ # Local corner offsets
+ local = np.array([[-hw, -hh], [hw, -hh], [hw, hh], [-hw, hh]])
- if not angle:
- corners = [
- (bbox.center_x - hw, bbox.center_y - hh), # Top-left
- (bbox.center_x + hw, bbox.center_y - hh), # Top-right
- (bbox.center_x + hw, bbox.center_y + hh), # Bottom-right
- (bbox.center_x - hw, bbox.center_y + hh) # Bottom-left
- ]
- all_corners.extend(corners)
- else:
- local_corners = [
- (-hw, -hh), # Top-left
- ( hw, -hh), # Top-right
- ( hw, hh), # Bottom-right
- (-hw, hh) # Bottom-left
- ]
+ if abs(angle) < 1e-12:
+ corners.append(local + [cx, cy])
+ else:
+ # Rotation matrix
+ cos_a, sin_a = np.cos(angle), np.sin(angle)
+ rot = np.array([[cos_a, -sin_a], [sin_a, cos_a]])
+ corners.append(local @ rot.T + [cx, cy])
+
+ return np.vstack(corners) if corners else np.empty((0, 2))
- # Rotate and translate corners
- cos_angle = cos(angle)
- sin_angle = sin(angle)
+ def _convex_hull(points):
+ if len(points) <= 3:
+ return points
- for x_local, y_local in local_corners:
- x_rotated = x_local * cos_angle - y_local * sin_angle
- y_rotated = x_local * sin_angle + y_local * cos_angle
- x_global = bbox.center_x + x_rotated
- y_global = bbox.center_y + y_rotated
- all_corners.append((x_global, y_global))
+ pts = np.unique(points, axis=0)
+ pts = pts[np.lexsort((pts[:, 1], pts[:, 0]))]
- xs, ys = zip(*all_corners)
- min_x, max_x = min(xs), max(xs)
- min_y, max_y = min(ys), max(ys)
+ if len(pts) <= 1:
+ return pts
+
+ def cross(o, a, b):
+ return (a[0] - o[0]) * (b[1] - o[1]) - (a[1] - o[1]) * (b[0] - o[0])
+
+ lower, upper = [], []
+ for p in pts:
+ while len(lower) >= 2 and cross(lower[-2], lower[-1], p) <= 0:
+ lower.pop()
+ lower.append(p)
+ for p in pts[::-1]:
+ while len(upper) >= 2 and cross(upper[-2], upper[-1], p) <= 0:
+ upper.pop()
+ upper.append(p)
+
+ return np.array(lower[:-1] + upper[:-1])
+
+ all_corners = _get_all_corners(ocr_element_list)
+
+ # Axis-aligned case
+ if not rotated:
+ min_pt, max_pt = all_corners.min(axis=0), all_corners.max(axis=0)
+ center = (min_pt + max_pt) / 2
+ size = max_pt - min_pt
+ return BoundingBox(
+ center_x=center[0],
+ center_y=center[1],
+ width=size[0],
+ height=size[1]
+ )
+
+ hull = _convex_hull(all_corners)
+ m = len(hull)
+
+ # Trivial cases
+ if m == 1:
+ return BoundingBox(
+ center_x=hull[0, 0],
+ center_y=hull[0, 1],
+ width=0.0,
+ height=0.0,
+ rotation_z=0.0
+ )
+
+ if m == 2:
+ diff = hull[1] - hull[0]
+ length = np.linalg.norm(diff)
+ center = hull.mean(axis=0)
+ return BoundingBox(
+ center_x=center[0],
+ center_y=center[1],
+ width=length,
+ height=0.0,
+ rotation_z=np.arctan2(diff[1], diff[0])
+ )
+
+ # Test each edge orientation
+ edges = np.roll(hull, -1, axis=0) - hull
+ edge_lengths = np.linalg.norm(edges, axis=1)
+ valid = edge_lengths > 1e-12
+
+ if not valid.any():
+ # Fallback to axis-aligned
+ min_pt, max_pt = all_corners.min(axis=0), all_corners.max(axis=0)
+ center = (min_pt + max_pt) / 2
+ size = max_pt - min_pt
+ return BoundingBox(
+ center_x=center[0],
+ center_y=center[1],
+ width=size[0],
+ height=size[1]
+ )
+
+ angles = np.arctan2(edges[valid, 1], edges[valid, 0])
+ best_area, best_idx = np.inf, -1
+
+ for idx, angle in enumerate(angles):
+ # Rotation matrix (rotate by -angle)
+ cos_a, sin_a = np.cos(angle), np.sin(angle)
+ rot = np.array([[cos_a, sin_a], [-sin_a, cos_a]])
+ rotated = hull @ rot.T
+
+ min_pt, max_pt = rotated.min(axis=0), rotated.max(axis=0)
+ area = np.prod(max_pt - min_pt)
+
+ if area < best_area:
+ best_area, best_idx = area, idx
+ best_bounds = (min_pt, max_pt, angle)
+
+ min_pt, max_pt, angle = best_bounds
+ width, height = max_pt - min_pt
+ center_rot = (min_pt + max_pt) / 2
+
+ # Rotate center back to global coordinates
+ cos_a, sin_a = np.cos(angle), np.sin(angle)
+ rot_back = np.array([[cos_a, -sin_a], [sin_a, cos_a]])
+ center = rot_back @ center_rot
+
+ # Normalize angle to [-π, π]
+ angle = np.mod(angle + np.pi, 2 * np.pi) - np.pi
return BoundingBox(
- center_x=(min_x + max_x) / 2,
- center_y=(min_y + max_y) / 2,
- width=max_x - min_x,
- height=max_y - min_y
+ center_x=center[0],
+ center_y=center[1],
+ width=width,
+ height=height,
+ rotation_z=angle
)
+
class MangaOcr:
name = 'mangaocr'
readable_name = 'Manga OCR'
@@ -312,7 +400,7 @@ class GoogleVision:
available = False
local = False
manual_language = False
- coordinate_support = False
+ coordinate_support = True
threading_support = True
def __init__(self):
@@ -336,20 +424,103 @@ class GoogleVision:
image_bytes = self._preprocess(img)
image = vision.Image(content=image_bytes)
+
try:
- response = self.client.text_detection(image=image)
+ response = self.client.document_text_detection(image=image)
except ServiceUnavailable:
return (False, 'Connection error!')
- except:
+ except Exception as e:
return (False, 'Unknown error!')
- texts = response.text_annotations
- res = texts[0].description if len(texts) > 0 else ''
- x = (True, res)
+
+ ocr_result = self._to_generic_result(response.full_text_annotation, img.width, img.height)
+ x = (True, ocr_result)
if is_path:
img.close()
return x
+ def _to_generic_result(self, full_text_annotation, img_width, img_height):
+ paragraphs = []
+
+ if full_text_annotation:
+ for page in full_text_annotation.pages:
+ if page.width == img_width and page.height == img_height:
+ for block in page.blocks:
+ for google_paragraph in block.paragraphs:
+ p_bbox = self._convert_bbox(google_paragraph.bounding_box, img_width, img_height)
+ lines = self._create_lines_from_google_paragraph(google_paragraph, img_width, img_height)
+ paragraph = Paragraph(bounding_box=p_bbox, lines=lines)
+ paragraphs.append(paragraph)
+
+ return OcrResult(
+ image_properties=ImageProperties(width=img_width, height=img_height),
+ paragraphs=paragraphs
+ )
+
+ def _create_lines_from_google_paragraph(self, google_paragraph, img_width, img_height):
+ lines = []
+ words = []
+ for google_word in google_paragraph.words:
+ word = self._create_word_from_google_word(google_word, img_width, img_height)
+ words.append(word)
+ if word.separator == '\n':
+ l_bbox = merge_bounding_boxes(words, True)
+ line = Line(bounding_box=l_bbox, words=words)
+ lines.append(line)
+ words = []
+
+ return lines
+
+ def _create_word_from_google_word(self, google_word, img_width, img_height):
+ w_bbox = self._convert_bbox(google_word.bounding_box, img_width, img_height)
+
+ w_separator = ''
+ w_text_parts = []
+ for i, symbol in enumerate(google_word.symbols):
+ separator = None
+ if hasattr(symbol, 'property') and hasattr(symbol.property, 'detected_break'):
+ detected_break = symbol.property.detected_break
+ detected_separator = self._break_type_to_char(detected_break.type_)
+ if i == len(google_word.symbols) - 1:
+ w_separator = detected_separator
+ else:
+ separator = detected_separator
+ symbol_text = symbol.text
+ w_text_parts.append(symbol_text)
+ if separator:
+ w_text_parts.append(separator)
+ word_text = ''.join(w_text_parts)
+
+ return Word(
+ text=word_text,
+ bounding_box=w_bbox,
+ separator=w_separator
+ )
+
+ def _break_type_to_char(self, break_type):
+ if break_type == vision.TextAnnotation.DetectedBreak.BreakType.SPACE:
+ return ' '
+ elif break_type == vision.TextAnnotation.DetectedBreak.BreakType.SURE_SPACE:
+ return ' '
+ elif break_type == vision.TextAnnotation.DetectedBreak.BreakType.EOL_SURE_SPACE:
+ return '\n'
+ elif break_type == vision.TextAnnotation.DetectedBreak.BreakType.HYPHEN:
+ return '-'
+ elif break_type == vision.TextAnnotation.DetectedBreak.BreakType.LINE_BREAK:
+ return '\n'
+ return ''
+
+ def _convert_bbox(self, quad, img_width, img_height):
+ vertices = quad.vertices
+
+ return quad_to_bounding_box(
+ vertices[0].x, vertices[0].y,
+ vertices[1].x, vertices[1].y,
+ vertices[2].x, vertices[2].y,
+ vertices[3].x, vertices[3].y,
+ img_width, img_height
+ )
+
def _preprocess(self, img):
return pil_image_to_bytes(img)
@@ -501,104 +672,6 @@ class GoogleLens:
return (pil_image_to_bytes(img), img.width, img.height)
-class GoogleLensWeb:
- name = 'glensweb'
- readable_name = 'Google Lens (web)'
- key = 'k'
- available = False
- local = False
- manual_language = False
- coordinate_support = False
- threading_support = True
-
- def __init__(self):
- if 'pyjson5' not in sys.modules:
- logger.warning('pyjson5 not available, Google Lens (web) will not work!')
- else:
- self.requests_session = requests.Session()
- self.available = True
- logger.info('Google Lens (web) ready')
-
- def __call__(self, img):
- img, is_path = input_to_pil_image(img)
- if not img:
- return (False, 'Invalid image provided')
-
- url = 'https://lens.google.com/v3/upload'
- files = {'encoded_image': ('image.png', self._preprocess(img), 'image/png')}
- headers = {
- 'Host': 'lens.google.com',
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:136.0) Gecko/20100101 Firefox/136.0',
- 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
- 'Accept-Language': 'ja-JP;q=0.6,ja;q=0.5',
- 'Accept-Encoding': 'gzip, deflate, br, zstd',
- 'Referer': 'https://www.google.com/',
- 'Origin': 'https://www.google.com',
- 'Alt-Used': 'lens.google.com',
- 'Connection': 'keep-alive',
- 'Upgrade-Insecure-Requests': '1',
- 'Sec-Fetch-Dest': 'document',
- 'Sec-Fetch-Mode': 'navigate',
- 'Sec-Fetch-Site': 'same-site',
- 'Priority': 'u=0, i',
- 'TE': 'trailers'
- }
- cookies = {'SOCS': 'CAESEwgDEgk0ODE3Nzk3MjQaAmVuIAEaBgiA_LyaBg'}
-
- try:
- res = self.requests_session.post(url, files=files, headers=headers, cookies=cookies, timeout=20, allow_redirects=False)
- except requests.exceptions.Timeout:
- return (False, 'Request timeout!')
- except requests.exceptions.ConnectionError:
- return (False, 'Connection error!')
-
- if res.status_code != 303:
- return (False, 'Unknown error!')
-
- redirect_url = res.headers.get('Location')
- if not redirect_url:
- return (False, 'Error getting redirect URL!')
-
- parsed_url = urlparse(redirect_url)
- query_params = parse_qs(parsed_url.query)
-
- if ('vsrid' not in query_params) or ('gsessionid' not in query_params):
- return (False, 'Unknown error!')
-
- try:
- res = self.requests_session.get(f"https://lens.google.com/qfmetadata?vsrid={query_params['vsrid'][0]}&gsessionid={query_params['gsessionid'][0]}", timeout=20)
- except requests.exceptions.Timeout:
- return (False, 'Request timeout!')
- except requests.exceptions.ConnectionError:
- return (False, 'Connection error!')
-
- if (len(res.text.splitlines()) != 3):
- return (False, 'Unknown error!')
-
- lens_object = pyjson5.loads(res.text.splitlines()[2])
-
- res = []
- text = lens_object[0][2][0][0]
- for paragraph in text:
- for line in paragraph[1]:
- for word in line[0]:
- res.append(word[1] + word[2])
-
- x = (True, res)
-
- if is_path:
- img.close()
- return x
-
- def _preprocess(self, img):
- if img.width * img.height > 3000000:
- aspect_ratio = img.width / img.height
- new_w = int(sqrt(3000000 * aspect_ratio))
- new_h = int(new_w / aspect_ratio)
- img = img.resize((new_w, new_h), Image.Resampling.LANCZOS)
-
- return pil_image_to_bytes(img)
-
class Bing:
name = 'bing'
readable_name = 'Bing'
@@ -1131,9 +1204,9 @@ class OneOCR:
def _convert_bbox(self, rect, img_width, img_height):
return quad_to_bounding_box(
- rect['x1'], rect['y1'],
- rect['x2'], rect['y2'],
- rect['x3'], rect['y3'],
+ rect['x1'], rect['y1'],
+ rect['x2'], rect['y2'],
+ rect['x3'], rect['y3'],
rect['x4'], rect['y4'],
img_width, img_height
)
@@ -1234,7 +1307,7 @@ class AzureImageAnalysis:
available = False
local = False
manual_language = False
- coordinate_support = False
+ coordinate_support = True
threading_support = True
def __init__(self, config={}):
@@ -1261,20 +1334,55 @@ class AzureImageAnalysis:
except:
return (False, 'Unknown error!')
- res = []
- if read_result.read:
- for block in read_result.read.blocks:
- for line in block.lines:
- res.append(line.text)
- else:
- return (False, 'Unknown error!')
-
- x = (True, res)
+ ocr_result = self._to_generic_result(read_result, img.width, img.height)
+ x = (True, ocr_result)
if is_path:
img.close()
return x
+ def _to_generic_result(self, read_result, img_width, img_height):
+ paragraphs = []
+ if read_result.read:
+ for block in read_result.read.blocks:
+ lines = []
+ for azure_line in block.lines:
+ l_bbox = self._convert_bbox(azure_line.bounding_polygon, img_width, img_height)
+
+ words = []
+ for azure_word in azure_line.words:
+ w_bbox = self._convert_bbox(azure_word.bounding_polygon, img_width, img_height)
+ word = Word(
+ text=azure_word.text,
+ bounding_box=w_bbox
+ )
+ words.append(word)
+
+ line = Line(
+ bounding_box=l_bbox,
+ words=words,
+ text=azure_line.text
+ )
+ lines.append(line)
+
+ p_bbox = merge_bounding_boxes(lines)
+ paragraph = Paragraph(bounding_box=p_bbox, lines=lines)
+ paragraphs.append(paragraph)
+
+ return OcrResult(
+ image_properties=ImageProperties(width=img_width, height=img_height),
+ paragraphs=paragraphs
+ )
+
+ def _convert_bbox(self, rect, img_width, img_height):
+ return quad_to_bounding_box(
+ rect[0]['x'], rect[0]['y'],
+ rect[1]['x'], rect[1]['y'],
+ rect[2]['x'], rect[2]['y'],
+ rect[3]['x'], rect[3]['y'],
+ img_width, img_height
+ )
+
def _preprocess(self, img):
min_pixel_size = 50
max_pixel_size = 10000
@@ -1461,7 +1569,7 @@ class OCRSpace:
available = False
local = False
manual_language = True
- coordinate_support = False
+ coordinate_support = True
threading_support = True
def __init__(self, config={}, language='ja'):
@@ -1498,14 +1606,16 @@ class OCRSpace:
if not img:
return (False, 'Invalid image provided')
- img_bytes, img_extension, _ = self._preprocess(img)
+ og_img_width, og_img_height = img.size
+ img_bytes, img_extension, img_size = self._preprocess(img)
if not img_bytes:
return (False, 'Image is too big!')
data = {
'apikey': self.api_key,
'language': self.language,
- 'OCREngine': str(self.engine_version)
+ 'OCREngine': str(self.engine_version),
+ 'isOverlayRequired': 'True'
}
files = {'file': ('image.' + img_extension, img_bytes, 'image/' + img_extension)}
@@ -1526,12 +1636,57 @@ class OCRSpace:
if res['IsErroredOnProcessing']:
return (False, res['ErrorMessage'])
- res = res['ParsedResults'][0]['ParsedText']
- x = (True, res)
+ img_width, img_height = img_size
+ ocr_result = self._to_generic_result(res, img_width, img_height, og_img_width, og_img_height)
+ x = (True, ocr_result)
if is_path:
img.close()
return x
+ def _to_generic_result(self, api_result, img_width, img_height, og_img_width, og_img_height):
+ parsed_result = api_result['ParsedResults'][0]
+ text_overlay = parsed_result.get('TextOverlay', {})
+
+ image_props = ImageProperties(width=og_img_width, height=og_img_height)
+ ocr_result = OcrResult(image_properties=image_props)
+
+ lines_data = text_overlay.get('Lines', [])
+
+ lines = []
+ for line_data in lines_data:
+ words = []
+ for word_data in line_data.get('Words', []):
+ w_bbox = self._convert_bbox(word_data, img_width, img_height)
+ words.append(Word(text=word_data['WordText'], bounding_box=w_bbox))
+
+ l_bbox = merge_bounding_boxes(words)
+ lines.append(Line(bounding_box=l_bbox, words=words))
+
+ if lines:
+ p_bbox = merge_bounding_boxes(lines)
+ paragraph = Paragraph(bounding_box=p_bbox, lines=lines)
+ ocr_result.paragraphs = [paragraph]
+ else:
+ ocr_result.paragraphs = []
+
+ return ocr_result
+
+ def _convert_bbox(self, word_data, img_width, img_height):
+ left = word_data['Left'] / img_width
+ top = word_data['Top'] / img_height
+ width = word_data['Width'] / img_width
+ height = word_data['Height'] / img_height
+
+ center_x = left + width / 2
+ center_y = top + height / 2
+
+ return BoundingBox(
+ center_x=center_x,
+ center_y=center_y,
+ width=width,
+ height=height
+ )
+
def _preprocess(self, img):
return limit_image_size(img, self.max_byte_size)
diff --git a/owocr/run.py b/owocr/run.py
index d0f14c7..6e3a943 100644
--- a/owocr/run.py
+++ b/owocr/run.py
@@ -434,7 +434,7 @@ class TextFiltering:
self.frame_stabilization_timestamp = time.time()
return 0, 0, None
- def _find_changed_lines_impl(self, current_result, previous_result, next_result = None):
+ def _find_changed_lines_impl(self, current_result, previous_result, next_result=None):
if not current_result:
return None
@@ -1341,8 +1341,6 @@ class OutputResult:
else:
output_string = self._post_process(result_data_text, False)
log_message = output_string
- if output_format == 'json':
- logger.opt(colors=True).warning(f"<{engine_color}>{engine_name}{engine_color}> does not support JSON output. Falling back to text.")
if verbosity != 0:
if verbosity < -1:
@@ -1494,6 +1492,7 @@ def run():
global engine_instances
global engine_keys
+ output_format = config.get_general('output_format')
engine_instances = []
config_engines = []
engine_keys = []
@@ -1506,6 +1505,11 @@ def run():
for _,engine_class in sorted(inspect.getmembers(sys.modules[__name__], lambda x: hasattr(x, '__module__') and x.__module__ and __package__ + '.ocr' in x.__module__ and inspect.isclass(x) and hasattr(x, 'name'))):
if len(config_engines) == 0 or engine_class.name in config_engines:
+
+ if output_format == 'json' and not engine_class.coordinate_support:
+ logger.warning(f"Skipping {engine_class.readable_name} as it does not support JSON output.")
+ continue
+
if config.get_engine(engine_class.name) == None:
if engine_class.manual_language:
engine_instance = engine_class(language=config.get_general('language'))
@@ -1545,7 +1549,6 @@ def run():
read_from_path = None
read_from_readable = []
write_to = config.get_general('write_to')
- output_format = config.get_general('output_format')
terminated = threading.Event()
paused = threading.Event()
if config.get_general('pause_at_startup'):
diff --git a/owocr_config.ini b/owocr_config.ini
index d484e7b..eda9f55 100644
--- a/owocr_config.ini
+++ b/owocr_config.ini
@@ -11,9 +11,8 @@
;a path to a text file.
;write_to = clipboard
-;OCR engine to use. Available: "mangaocr", "glens", "glensweb", "bing",
-;"gvision", "avision", "alivetext", "azure", "winrtocr", "oneocr", "easyocr",
-;"rapidocr", "ocrspace".
+;OCR engine to use. Available: "mangaocr", "glens", "bing","gvision", "avision",
+;"alivetext", "azure", "winrtocr", "oneocr", "easyocr", "rapidocr", "ocrspace".
;engine =
;OCR engine to use for two-pass processing.
@@ -30,15 +29,18 @@
;delete_images = False
;Available:
-;avision,alivetext,bing,glens,glensweb,gvision,azure,mangaocr,winrtocr,oneocr,easyocr,rapidocr,ocrspace
-;engines = avision,alivetext,bing,glens,glensweb,gvision,azure,mangaocr,winrtocr,oneocr,easyocr,rapidocr,ocrspace
+;avision,alivetext,bing,glens,gvision,azure,mangaocr,winrtocr,oneocr,easyocr,rapidocr,ocrspace
+;engines = avision,alivetext,bing,glens,gvision,azure,mangaocr,winrtocr,oneocr,easyocr,rapidocr,ocrspace
;logger_format = {time:HH:mm:ss.SSS} | {message}
;engine_color = cyan
+;Delay (in seconds) between checks when reading from clipboard (on macOS/Linux)
+;or a directory.
;delay_secs = 0.5
+;Websocket port to use if reading or writing to websocket.
;websocket_port = 7331
;Show an operating system notification with the detected text. Will be ignored
diff --git a/pyproject.toml b/pyproject.toml
index c40deb1..6a7af46 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -61,10 +61,6 @@ lens = [
"betterproto==2.0.0b7"
]
-lensweb = [
- "pyjson5"
-]
-
gvision = [
"google-cloud-vision"
]