Switch from requests to curl_cffi, enable Chrome impersonation for Lens and Bing (fixes Bing instability). Add custom line/paragraph separator feature

This commit is contained in:
AuroraWright
2025-11-30 02:43:05 +01:00
parent 9d833d4338
commit 4e497ac3c3
5 changed files with 78 additions and 68 deletions

View File

@@ -37,9 +37,9 @@ parser.add_argument('-n', '--notifications', type=str2bool, nargs='?', const=Tru
parser.add_argument('-a', '--auto_pause', type=float, default=argparse.SUPPRESS,
help='Automatically pause the program after the specified amount of seconds since the last successful text recognition. 0 to disable.')
parser.add_argument('-cp', '--combo_pause', type=str, default=argparse.SUPPRESS,
help='Combo to wait on for pausing the program. As an example: "<ctrl>+<shift>+p". The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key')
help='Combo to wait on for pausing the program. As an example: <ctrl>+<shift>+p. The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key')
parser.add_argument('-cs', '--combo_engine_switch', type=str, default=argparse.SUPPRESS,
help='Combo to wait on for switching the OCR engine. As an example: "<ctrl>+<shift>+a". The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key')
help='Combo to wait on for switching the OCR engine. As an example: <ctrl>+<shift>+a. The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key')
parser.add_argument('-sa', '--screen_capture_area', type=str, default=argparse.SUPPRESS,
help='Area to target when reading with screen capture. Can be either empty (automatic selector), a set of coordinates (x,y,width,height), "screen_N" (captures a whole screen, where N is the screen number starting from 1) or a window name (the first matching window title will be used).')
parser.add_argument('-swa', '--screen_capture_window_area', type=str, default=argparse.SUPPRESS,
@@ -53,17 +53,21 @@ parser.add_argument('-sf', '--screen_capture_frame_stabilization', type=float, d
parser.add_argument('-sl', '--screen_capture_line_recovery', type=str2bool, nargs='?', const=True, default=argparse.SUPPRESS,
help='When reading with screen capture and frame stabilization is on, try to recover missed lines from unstable frames. Can lead to increased glitches.')
parser.add_argument('-sr', '--screen_capture_regex_filter', type=str, default=argparse.SUPPRESS,
help='When reading with screen capture, regex to filter unwanted text from the output. Example value: "▶|♥|・" to remove either of those characters.')
help='When reading with screen capture, regex to filter unwanted text from the output. Example value: ▶|♥|・ to remove either of those characters.')
parser.add_argument('-sc', '--screen_capture_combo', type=str, default=argparse.SUPPRESS,
help='When reading with screen capture, combo to wait on for taking a screenshot. If periodic screenshots are also enabled, any screenshot taken this way bypasses the filtering. Example value: "<ctrl>+<shift>+s". The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key')
help='When reading with screen capture, combo to wait on for taking a screenshot. If periodic screenshots are also enabled, any screenshot taken this way bypasses the filtering. Example value: <ctrl>+<shift>+s. The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key')
parser.add_argument('-scc', '--coordinate_selector_combo', type=str, default=argparse.SUPPRESS,
help='When reading with screen capture, combo to wait on for invoking the coordinate picker to change the screen/window area. Example value: "<ctrl>+<shift>+c". The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key')
help='When reading with screen capture, combo to wait on for invoking the coordinate picker to change the screen/window area. Example value: <ctrl>+<shift>+c. The list of keys can be found here: https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key')
parser.add_argument('-l', '--language', type=str, default=argparse.SUPPRESS,
help='Two letter language code to use for some engines and for filtering screen capture OCR results. Ex. "ja" for Japanese, "zh" for Chinese, "ko" for Korean, "ar" for Arabic, "ru" for Russian, "el" for Greek, "he" for Hebrew, "th" for Thai. Any other value will use Latin Extended (for most European languages and English).')
parser.add_argument('-j', '--join_lines', type=str2bool, nargs='?', const=True, default=argparse.SUPPRESS,
help='Display lines in the text output without a space between them.')
help='Display lines in the text output without spaces/separators between them.')
parser.add_argument('-jp', '--join_paragraphs', type=str2bool, nargs='?', const=True, default=argparse.SUPPRESS,
help='Display paragraphs in the text output without a space between them.')
help='Display paragraphs in the text output without spaces/separators between them.')
parser.add_argument('-ls', '--line_separator', type=str, default=argparse.SUPPRESS,
help='Custom line separator to use. Supports Python escape characters like \\n for newlines.')
parser.add_argument('-ps', '--paragraph_separator', type=str, default=argparse.SUPPRESS,
help='Custom paragraph separator to use. Supports Python escape characters like \\n for newlines.')
parser.add_argument('-rt', '--reorder_text', type=str2bool, nargs='?', const=True, default=argparse.SUPPRESS,
help='Regroup and reorder text instead of using paragraphs/order provided by the OCR engine.')
parser.add_argument('-f', '--furigana_filter', type=str2bool, nargs='?', const=True, default=argparse.SUPPRESS,
@@ -109,6 +113,9 @@ class Config:
'screen_capture_line_recovery': True,
'screen_capture_regex_filter': '',
'join_lines': False,
'join_paragraphs': False,
'line_separator': ' ',
'paragraph_separator': ' ',
'reorder_text': True,
'furigana_filter': True,
'screen_capture_combo': '',
@@ -136,6 +143,8 @@ class Config:
return float(value)
except ValueError:
pass
if (value.startswith('"') and value.endswith('"')) or (value.startswith("'") and value.endswith("'")):
value = value[1:-1]
return value
def __init__(self):

View File

@@ -17,7 +17,7 @@ import jaconv
import numpy as np
from PIL import Image
from loguru import logger
import requests
import curl_cffi
try:
from manga_ocr import MangaOcr as MOCR
@@ -907,19 +907,15 @@ class GoogleLens:
'Connection': 'keep-alive',
'Content-Type': 'application/x-protobuf',
'X-Goog-Api-Key': 'AIzaSyDr2UxVnv_U85AbhhY8XSHSIavUW0DC-sY',
'Sec-Fetch-Site': 'none',
'Sec-Fetch-Mode': 'no-cors',
'Sec-Fetch-Dest': 'empty',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36',
'Accept-Encoding': 'gzip, deflate, br, zstd',
'Accept-Language': 'ja-JP;q=0.6,ja;q=0.5'
'Sec-Fetch-Dest': 'empty'
}
try:
res = requests.post('https://lensfrontend-pa.googleapis.com/v1/crupload', data=payload, headers=headers, timeout=20)
except requests.exceptions.Timeout:
res = curl_cffi.post('https://lensfrontend-pa.googleapis.com/v1/crupload', data=payload, headers=headers, impersonate='chrome', timeout=20)
except curl_cffi.requests.exceptions.Timeout:
return (False, 'Request timeout!')
except requests.exceptions.ConnectionError:
except curl_cffi.requests.exceptions.ConnectionError:
return (False, 'Connection error!')
if res.status_code != 200:
@@ -964,7 +960,7 @@ class Bing:
)
def __init__(self):
self.requests_session = requests.Session()
self.requests_session = curl_cffi.Session()
self.available = True
logger.info('Bing ready')
@@ -1033,25 +1029,20 @@ class Bing:
upload_url = 'https://www.bing.com/images/search?view=detailv2&iss=sbiupload'
upload_headers = {
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
'accept-language': 'ja-JP;q=0.6,ja;q=0.5',
'cache-control': 'max-age=0',
'origin': 'https://www.bing.com',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:136.0) Gecko/20100101 Firefox/136.0',
}
files = {
'imgurl': (None, ''),
'cbir': (None, 'sbi'),
'imageBin': (None, img_bytes)
'origin': 'https://www.bing.com'
}
mp = curl_cffi.CurlMime()
mp.addpart(name='imgurl', data='')
mp.addpart(name='cbir', data='sbi')
mp.addpart(name='imageBin', data=img_bytes)
for _ in range(2):
api_host = urlparse(upload_url).netloc
try:
res = self.requests_session.post(upload_url, headers=upload_headers, files=files, timeout=20, allow_redirects=False)
except requests.exceptions.Timeout:
res = self.requests_session.post(upload_url, headers=upload_headers, multipart=mp, allow_redirects=False, impersonate='chrome', timeout=20)
except curl_cffi.requests.exceptions.Timeout:
return (False, 'Request timeout!')
except requests.exceptions.ConnectionError:
except curl_cffi.requests.exceptions.ConnectionError:
return (False, 'Connection error!')
if res.status_code != 302:
@@ -1074,25 +1065,21 @@ class Bing:
api_url = f'https://{api_host}/images/api/custom/knowledge'
api_headers = {
'accept': '*/*',
'accept-language': 'ja-JP;q=0.6,ja;q=0.5',
'origin': 'https://www.bing.com',
'referer': f'https://www.bing.com/images/search?view=detailV2&insightstoken={image_insights_token}',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:136.0) Gecko/20100101 Firefox/136.0',
'referer': f'https://www.bing.com/images/search?view=detailV2&insightstoken={image_insights_token}'
}
api_data_json = {
'imageInfo': {'imageInsightsToken': image_insights_token, 'source': 'Url'},
'knowledgeRequest': {'invokedSkills': ['OCR'], 'index': 1}
}
files = {
'knowledgeRequest': (None, json.dumps(api_data_json), 'application/json')
}
mp2 = curl_cffi.CurlMime()
mp2.addpart(name='knowledgeRequest', content_type='application/json', data=json.dumps(api_data_json))
try:
res = self.requests_session.post(api_url, headers=api_headers, files=files, timeout=20)
except requests.exceptions.Timeout:
res = self.requests_session.post(api_url, headers=api_headers, multipart=mp2, impersonate='chrome', timeout=20)
except curl_cffi.requests.exceptions.Timeout:
return (False, 'Request timeout!')
except requests.exceptions.ConnectionError:
except curl_cffi.requests.exceptions.ConnectionError:
return (False, 'Connection error!')
if res.status_code != 200:
@@ -1461,10 +1448,10 @@ class WinRTOCR:
else:
params = {'lang': self.language}
try:
res = requests.post(self.url, params=params, data=self._preprocess(img), timeout=3)
except requests.exceptions.Timeout:
res = curl_cffi.post(self.url, params=params, data=self._preprocess(img), timeout=3)
except curl_cffi.requests.exceptions.Timeout:
return (False, 'Request timeout!')
except requests.exceptions.ConnectionError:
except curl_cffi.requests.exceptions.ConnectionError:
return (False, 'Connection error!')
if res.status_code != 200:
@@ -1578,10 +1565,10 @@ class OneOCR:
else:
img_processed, img_width, img_height = self._preprocess_notwindows(img)
try:
res = requests.post(self.url, data=img_processed, timeout=3)
except requests.exceptions.Timeout:
res = curl_cffi.post(self.url, data=img_processed, timeout=3)
except curl_cffi.requests.exceptions.Timeout:
return (False, 'Request timeout!')
except requests.exceptions.ConnectionError:
except curl_cffi.requests.exceptions.ConnectionError:
return (False, 'Connection error!')
if res.status_code != 200:
@@ -2111,13 +2098,14 @@ class OCRSpace:
'OCREngine': str(self.engine_version),
'isOverlayRequired': 'True'
}
files = {'file': ('image.' + img_extension, img_bytes, 'image/' + img_extension)}
mp = curl_cffi.CurlMime()
mp.addpart(name='file', filename=f'image.{img_extension}', content_type=f'image/{img_extension}', data=img_bytes)
try:
res = requests.post('https://api.ocr.space/parse/image', data=data, files=files, timeout=20)
except requests.exceptions.Timeout:
res = curl_cffi.post('https://api.ocr.space/parse/image', data=data, multipart=mp, timeout=20)
except curl_cffi.requests.exceptions.Timeout:
return (False, 'Request timeout!')
except requests.exceptions.ConnectionError:
except curl_cffi.requests.exceptions.ConnectionError:
return (False, 'Connection error!')
if res.status_code != 200:

View File

@@ -1796,28 +1796,33 @@ class OutputResult:
self.verbosity = config.get_general('verbosity')
self.notifications = config.get_general('notifications')
self.reorder_text = config.get_general('reorder_text')
self.line_separator = '' if config.get_general('join_lines') else ' '
self.paragraph_separator = '' if config.get_general('join_paragraphs') else ' '
self.line_separator = '' if config.get_general('join_lines') else config.get_general('line_separator').encode().decode('unicode_escape')
self.paragraph_separator = '' if config.get_general('join_paragraphs') else config.get_general('paragraph_separator').encode().decode('unicode_escape')
self.write_to = config.get_general('write_to')
self.filtering = TextFiltering()
self.second_pass_thread = SecondPassThread()
def _post_process(self, text, strip_spaces):
lines = []
line_separator = '' if strip_spaces else self.line_separator
paragraphs = []
current_paragraph = []
for line in text:
if line == '\n':
lines.append(self.paragraph_separator)
if current_paragraph:
paragraph = line_separator.join(current_paragraph)
paragraphs.append(paragraph)
current_paragraph = []
continue
line = line.replace('', '...')
line = re.sub('[・.]{2,}', lambda x: (x.end() - x.start()) * '.', line)
is_cj_text = self.filtering.cj_regex.search(line)
if is_cj_text:
lines.append(jaconv.h2z(''.join(line.split()), ascii=True, digit=True))
current_paragraph.append(jaconv.h2z(''.join(line.split()), ascii=True, digit=True))
else:
lines.append(line.strip())
line_separator = '' if strip_spaces else self.line_separator
text = line_separator.join(lines)
text = re.sub(r'\s+', ' ', text).strip()
current_paragraph.append(re.sub(r'\s+', ' ', line).strip())
text = self.paragraph_separator.join(paragraphs)
return text
def _extract_lines_from_result(self, result_data):

View File

@@ -51,13 +51,13 @@
;when reading with screen capture and periodic screenshots.
;notifications = False
;Combo to wait on for pausing the program. As an example: "<ctrl>+<shift>+p".
;Combo to wait on for pausing the program. As an example: <ctrl>+<shift>+p.
;The list of keys can be found here:
;https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key
;combo_pause =
;Combo to wait on for switching the OCR engine. As an example:
;"<ctrl>+<shift>+a". The list of keys can be found here:
;<ctrl>+<shift>+a. The list of keys can be found here:
;https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key
;combo_engine_switch =
@@ -92,15 +92,23 @@
;screen_capture_line_recovery = True
;When reading with screen capture, regex to filter unwanted text from the output.
;Example value: "▶|♥|・" to remove either of those characters.
;Example value: ▶|♥|・ to remove either of those characters.
;screen_capture_regex_filter =
;Display lines in the text output without a space between them.
;Display lines in the text output without spaces/separators between them.
;join_lines = False
;Display paragraphs in the text output without a space between them.
;Display paragraphs in the text output without spaces/separators between them.
;join_paragraphs = False
;Custom line separator to use. Supports Python escape characters like \n for
;newlines.
;line_separator = " "
;Custom line separator to use. Supports Python escape characters like \n for
;newlines.
;paragraph_separator = " "
;Regroup and reorder text instead of using paragraphs/order provided by the OCR
;engine.
;reorder_text = True
@@ -110,14 +118,14 @@
;When reading with screen capture, combo to wait on for taking a screenshot.
;If periodic screenshots are also enabled, any screenshot taken this way
;bypasses the filtering. Example value: "<ctrl>+<shift>+s". The list of keys
;bypasses the filtering. Example value: <ctrl>+<shift>+s. The list of keys
;can be found here:
;https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key
;screen_capture_combo =
;When reading with screen capture, combo to wait on for invoking the
;coordinate picker to change the screen/window area. Example value:
;"<ctrl>+<shift>+c". The list of keys can be found here:
;<ctrl>+<shift>+c. The list of keys can be found here:
;https://pynput.readthedocs.io/en/latest/keyboard.html#pynput.keyboard.Key
;coordinate_selector_combo =

View File

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
[project]
name = "owocr"
version = "1.20"
version = "1.20.1"
description = "Japanese OCR"
readme = "README.md"
requires-python = ">=3.11"
@@ -26,7 +26,7 @@ dependencies = [
"desktop-notifier>=6.1.0",
"mss>=10.1.0",
"psutil",
"requests",
"curl_cffi",
"pywin32;platform_system=='Windows'",
"pyobjc;platform_system=='Darwin'"
]