From 4e497ac3c32758dc8c4bd2488b0a9d6bfdad2875 Mon Sep 17 00:00:00 2001 From: AuroraWright Date: Sun, 30 Nov 2025 02:43:05 +0100 Subject: [PATCH] Switch from requests to curl_cffi, enable Chrome impersonation for Lens and Bing (fixes Bing instability). Add custom line/paragraph separator feature --- owocr/config.py | 23 ++++++++++----- owocr/ocr.py | 74 ++++++++++++++++++++---------------------------- owocr/run.py | 23 +++++++++------ owocr_config.ini | 22 +++++++++----- pyproject.toml | 4 +-- 5 files changed, 78 insertions(+), 68 deletions(-) diff --git a/owocr/config.py b/owocr/config.py index 170fc49..f55a880 100644 --- a/owocr/config.py +++ b/owocr/config.py @@ -37,9 +37,9 @@ parser.add_argument('-n', '--notifications', type=str2bool, nargs='?', const=Tru parser.add_argument('-a', '--auto_pause', type=float, default=argparse.SUPPRESS, help='Automatically pause the program after the specified amount of seconds since the last successful text recognition. 0 to disable.') parser.add_argument('-cp', '--combo_pause', type=str, default=argparse.SUPPRESS, - help='Combo to wait on for pausing the program. As an example: "++p". The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key') + help='Combo to wait on for pausing the program. As an example: ++p. The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key') parser.add_argument('-cs', '--combo_engine_switch', type=str, default=argparse.SUPPRESS, - help='Combo to wait on for switching the OCR engine. As an example: "++a". The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key') + help='Combo to wait on for switching the OCR engine. As an example: ++a. The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key') parser.add_argument('-sa', '--screen_capture_area', type=str, default=argparse.SUPPRESS, help='Area to target when reading with screen capture. Can be either empty (automatic selector), a set of coordinates (x,y,width,height), "screen_N" (captures a whole screen, where N is the screen number starting from 1) or a window name (the first matching window title will be used).') parser.add_argument('-swa', '--screen_capture_window_area', type=str, default=argparse.SUPPRESS, @@ -53,17 +53,21 @@ parser.add_argument('-sf', '--screen_capture_frame_stabilization', type=float, d parser.add_argument('-sl', '--screen_capture_line_recovery', type=str2bool, nargs='?', const=True, default=argparse.SUPPRESS, help='When reading with screen capture and frame stabilization is on, try to recover missed lines from unstable frames. Can lead to increased glitches.') parser.add_argument('-sr', '--screen_capture_regex_filter', type=str, default=argparse.SUPPRESS, - help='When reading with screen capture, regex to filter unwanted text from the output. Example value: "▶|♥|・" to remove either of those characters.') + help='When reading with screen capture, regex to filter unwanted text from the output. Example value: ▶|♥|・ to remove either of those characters.') parser.add_argument('-sc', '--screen_capture_combo', type=str, default=argparse.SUPPRESS, - help='When reading with screen capture, combo to wait on for taking a screenshot. If periodic screenshots are also enabled, any screenshot taken this way bypasses the filtering. Example value: "++s". The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key') + help='When reading with screen capture, combo to wait on for taking a screenshot. If periodic screenshots are also enabled, any screenshot taken this way bypasses the filtering. Example value: ++s. The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key') parser.add_argument('-scc', '--coordinate_selector_combo', type=str, default=argparse.SUPPRESS, - help='When reading with screen capture, combo to wait on for invoking the coordinate picker to change the screen/window area. Example value: "++c". The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key') + help='When reading with screen capture, combo to wait on for invoking the coordinate picker to change the screen/window area. Example value: ++c. The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key') parser.add_argument('-l', '--language', type=str, default=argparse.SUPPRESS, help='Two letter language code to use for some engines and for filtering screen capture OCR results. Ex. "ja" for Japanese, "zh" for Chinese, "ko" for Korean, "ar" for Arabic, "ru" for Russian, "el" for Greek, "he" for Hebrew, "th" for Thai. Any other value will use Latin Extended (for most European languages and English).') parser.add_argument('-j', '--join_lines', type=str2bool, nargs='?', const=True, default=argparse.SUPPRESS, - help='Display lines in the text output without a space between them.') + help='Display lines in the text output without spaces/separators between them.') parser.add_argument('-jp', '--join_paragraphs', type=str2bool, nargs='?', const=True, default=argparse.SUPPRESS, - help='Display paragraphs in the text output without a space between them.') + help='Display paragraphs in the text output without spaces/separators between them.') +parser.add_argument('-ls', '--line_separator', type=str, default=argparse.SUPPRESS, + help='Custom line separator to use. Supports Python escape characters like \\n for newlines.') +parser.add_argument('-ps', '--paragraph_separator', type=str, default=argparse.SUPPRESS, + help='Custom paragraph separator to use. Supports Python escape characters like \\n for newlines.') parser.add_argument('-rt', '--reorder_text', type=str2bool, nargs='?', const=True, default=argparse.SUPPRESS, help='Regroup and reorder text instead of using paragraphs/order provided by the OCR engine.') parser.add_argument('-f', '--furigana_filter', type=str2bool, nargs='?', const=True, default=argparse.SUPPRESS, @@ -109,6 +113,9 @@ class Config: 'screen_capture_line_recovery': True, 'screen_capture_regex_filter': '', 'join_lines': False, + 'join_paragraphs': False, + 'line_separator': ' ', + 'paragraph_separator': ' ', 'reorder_text': True, 'furigana_filter': True, 'screen_capture_combo': '', @@ -136,6 +143,8 @@ class Config: return float(value) except ValueError: pass + if (value.startswith('"') and value.endswith('"')) or (value.startswith("'") and value.endswith("'")): + value = value[1:-1] return value def __init__(self): diff --git a/owocr/ocr.py b/owocr/ocr.py index 5ef986a..2ecb13c 100644 --- a/owocr/ocr.py +++ b/owocr/ocr.py @@ -17,7 +17,7 @@ import jaconv import numpy as np from PIL import Image from loguru import logger -import requests +import curl_cffi try: from manga_ocr import MangaOcr as MOCR @@ -907,19 +907,15 @@ class GoogleLens: 'Connection': 'keep-alive', 'Content-Type': 'application/x-protobuf', 'X-Goog-Api-Key': 'AIzaSyDr2UxVnv_U85AbhhY8XSHSIavUW0DC-sY', - 'Sec-Fetch-Site': 'none', 'Sec-Fetch-Mode': 'no-cors', - 'Sec-Fetch-Dest': 'empty', - 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36', - 'Accept-Encoding': 'gzip, deflate, br, zstd', - 'Accept-Language': 'ja-JP;q=0.6,ja;q=0.5' + 'Sec-Fetch-Dest': 'empty' } try: - res = requests.post('https://lensfrontend-pa.googleapis.com/v1/crupload', data=payload, headers=headers, timeout=20) - except requests.exceptions.Timeout: + res = curl_cffi.post('https://lensfrontend-pa.googleapis.com/v1/crupload', data=payload, headers=headers, impersonate='chrome', timeout=20) + except curl_cffi.requests.exceptions.Timeout: return (False, 'Request timeout!') - except requests.exceptions.ConnectionError: + except curl_cffi.requests.exceptions.ConnectionError: return (False, 'Connection error!') if res.status_code != 200: @@ -964,7 +960,7 @@ class Bing: ) def __init__(self): - self.requests_session = requests.Session() + self.requests_session = curl_cffi.Session() self.available = True logger.info('Bing ready') @@ -1033,25 +1029,20 @@ class Bing: upload_url = 'https://www.bing.com/images/search?view=detailv2&iss=sbiupload' upload_headers = { - 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7', - 'accept-language': 'ja-JP;q=0.6,ja;q=0.5', - 'cache-control': 'max-age=0', - 'origin': 'https://www.bing.com', - 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:136.0) Gecko/20100101 Firefox/136.0', - } - files = { - 'imgurl': (None, ''), - 'cbir': (None, 'sbi'), - 'imageBin': (None, img_bytes) + 'origin': 'https://www.bing.com' } + mp = curl_cffi.CurlMime() + mp.addpart(name='imgurl', data='') + mp.addpart(name='cbir', data='sbi') + mp.addpart(name='imageBin', data=img_bytes) for _ in range(2): api_host = urlparse(upload_url).netloc try: - res = self.requests_session.post(upload_url, headers=upload_headers, files=files, timeout=20, allow_redirects=False) - except requests.exceptions.Timeout: + res = self.requests_session.post(upload_url, headers=upload_headers, multipart=mp, allow_redirects=False, impersonate='chrome', timeout=20) + except curl_cffi.requests.exceptions.Timeout: return (False, 'Request timeout!') - except requests.exceptions.ConnectionError: + except curl_cffi.requests.exceptions.ConnectionError: return (False, 'Connection error!') if res.status_code != 302: @@ -1074,25 +1065,21 @@ class Bing: api_url = f'https://{api_host}/images/api/custom/knowledge' api_headers = { - 'accept': '*/*', - 'accept-language': 'ja-JP;q=0.6,ja;q=0.5', 'origin': 'https://www.bing.com', - 'referer': f'https://www.bing.com/images/search?view=detailV2&insightstoken={image_insights_token}', - 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:136.0) Gecko/20100101 Firefox/136.0', + 'referer': f'https://www.bing.com/images/search?view=detailV2&insightstoken={image_insights_token}' } api_data_json = { 'imageInfo': {'imageInsightsToken': image_insights_token, 'source': 'Url'}, 'knowledgeRequest': {'invokedSkills': ['OCR'], 'index': 1} } - files = { - 'knowledgeRequest': (None, json.dumps(api_data_json), 'application/json') - } + mp2 = curl_cffi.CurlMime() + mp2.addpart(name='knowledgeRequest', content_type='application/json', data=json.dumps(api_data_json)) try: - res = self.requests_session.post(api_url, headers=api_headers, files=files, timeout=20) - except requests.exceptions.Timeout: + res = self.requests_session.post(api_url, headers=api_headers, multipart=mp2, impersonate='chrome', timeout=20) + except curl_cffi.requests.exceptions.Timeout: return (False, 'Request timeout!') - except requests.exceptions.ConnectionError: + except curl_cffi.requests.exceptions.ConnectionError: return (False, 'Connection error!') if res.status_code != 200: @@ -1461,10 +1448,10 @@ class WinRTOCR: else: params = {'lang': self.language} try: - res = requests.post(self.url, params=params, data=self._preprocess(img), timeout=3) - except requests.exceptions.Timeout: + res = curl_cffi.post(self.url, params=params, data=self._preprocess(img), timeout=3) + except curl_cffi.requests.exceptions.Timeout: return (False, 'Request timeout!') - except requests.exceptions.ConnectionError: + except curl_cffi.requests.exceptions.ConnectionError: return (False, 'Connection error!') if res.status_code != 200: @@ -1578,10 +1565,10 @@ class OneOCR: else: img_processed, img_width, img_height = self._preprocess_notwindows(img) try: - res = requests.post(self.url, data=img_processed, timeout=3) - except requests.exceptions.Timeout: + res = curl_cffi.post(self.url, data=img_processed, timeout=3) + except curl_cffi.requests.exceptions.Timeout: return (False, 'Request timeout!') - except requests.exceptions.ConnectionError: + except curl_cffi.requests.exceptions.ConnectionError: return (False, 'Connection error!') if res.status_code != 200: @@ -2111,13 +2098,14 @@ class OCRSpace: 'OCREngine': str(self.engine_version), 'isOverlayRequired': 'True' } - files = {'file': ('image.' + img_extension, img_bytes, 'image/' + img_extension)} + mp = curl_cffi.CurlMime() + mp.addpart(name='file', filename=f'image.{img_extension}', content_type=f'image/{img_extension}', data=img_bytes) try: - res = requests.post('https://api.ocr.space/parse/image', data=data, files=files, timeout=20) - except requests.exceptions.Timeout: + res = curl_cffi.post('https://api.ocr.space/parse/image', data=data, multipart=mp, timeout=20) + except curl_cffi.requests.exceptions.Timeout: return (False, 'Request timeout!') - except requests.exceptions.ConnectionError: + except curl_cffi.requests.exceptions.ConnectionError: return (False, 'Connection error!') if res.status_code != 200: diff --git a/owocr/run.py b/owocr/run.py index 6de4e43..1ebdbff 100644 --- a/owocr/run.py +++ b/owocr/run.py @@ -1796,28 +1796,33 @@ class OutputResult: self.verbosity = config.get_general('verbosity') self.notifications = config.get_general('notifications') self.reorder_text = config.get_general('reorder_text') - self.line_separator = '' if config.get_general('join_lines') else ' ' - self.paragraph_separator = '' if config.get_general('join_paragraphs') else ' ' + self.line_separator = '' if config.get_general('join_lines') else config.get_general('line_separator').encode().decode('unicode_escape') + self.paragraph_separator = '' if config.get_general('join_paragraphs') else config.get_general('paragraph_separator').encode().decode('unicode_escape') self.write_to = config.get_general('write_to') self.filtering = TextFiltering() self.second_pass_thread = SecondPassThread() def _post_process(self, text, strip_spaces): - lines = [] + line_separator = '' if strip_spaces else self.line_separator + paragraphs = [] + + current_paragraph = [] for line in text: if line == '\n': - lines.append(self.paragraph_separator) + if current_paragraph: + paragraph = line_separator.join(current_paragraph) + paragraphs.append(paragraph) + current_paragraph = [] continue line = line.replace('…', '...') line = re.sub('[・.]{2,}', lambda x: (x.end() - x.start()) * '.', line) is_cj_text = self.filtering.cj_regex.search(line) if is_cj_text: - lines.append(jaconv.h2z(''.join(line.split()), ascii=True, digit=True)) + current_paragraph.append(jaconv.h2z(''.join(line.split()), ascii=True, digit=True)) else: - lines.append(line.strip()) - line_separator = '' if strip_spaces else self.line_separator - text = line_separator.join(lines) - text = re.sub(r'\s+', ' ', text).strip() + current_paragraph.append(re.sub(r'\s+', ' ', line).strip()) + + text = self.paragraph_separator.join(paragraphs) return text def _extract_lines_from_result(self, result_data): diff --git a/owocr_config.ini b/owocr_config.ini index 7a617bc..6618554 100644 --- a/owocr_config.ini +++ b/owocr_config.ini @@ -51,13 +51,13 @@ ;when reading with screen capture and periodic screenshots. ;notifications = False -;Combo to wait on for pausing the program. As an example: "++p". +;Combo to wait on for pausing the program. As an example: ++p. ;The list of keys can be found here: ;https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key ;combo_pause = ;Combo to wait on for switching the OCR engine. As an example: -;"++a". The list of keys can be found here: +;++a. The list of keys can be found here: ;https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key ;combo_engine_switch = @@ -92,15 +92,23 @@ ;screen_capture_line_recovery = True ;When reading with screen capture, regex to filter unwanted text from the output. -;Example value: "▶|♥|・" to remove either of those characters. +;Example value: ▶|♥|・ to remove either of those characters. ;screen_capture_regex_filter = -;Display lines in the text output without a space between them. +;Display lines in the text output without spaces/separators between them. ;join_lines = False -;Display paragraphs in the text output without a space between them. +;Display paragraphs in the text output without spaces/separators between them. ;join_paragraphs = False +;Custom line separator to use. Supports Python escape characters like \n for +;newlines. +;line_separator = " " + +;Custom line separator to use. Supports Python escape characters like \n for +;newlines. +;paragraph_separator = " " + ;Regroup and reorder text instead of using paragraphs/order provided by the OCR ;engine. ;reorder_text = True @@ -110,14 +118,14 @@ ;When reading with screen capture, combo to wait on for taking a screenshot. ;If periodic screenshots are also enabled, any screenshot taken this way -;bypasses the filtering. Example value: "++s". The list of keys +;bypasses the filtering. Example value: ++s. The list of keys ;can be found here: ;https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key ;screen_capture_combo = ;When reading with screen capture, combo to wait on for invoking the ;coordinate picker to change the screen/window area. Example value: -;"++c". The list of keys can be found here: +;++c. The list of keys can be found here: ;https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key ;coordinate_selector_combo = diff --git a/pyproject.toml b/pyproject.toml index c408b11..ce7ffcd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "owocr" -version = "1.20" +version = "1.20.1" description = "Japanese OCR" readme = "README.md" requires-python = ">=3.11" @@ -26,7 +26,7 @@ dependencies = [ "desktop-notifier>=6.1.0", "mss>=10.1.0", "psutil", - "requests", + "curl_cffi", "pywin32;platform_system=='Windows'", "pyobjc;platform_system=='Darwin'" ]