forked from 0ssamaak0/CLIPPyX
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathOCR.py
More file actions
48 lines (41 loc) · 1.33 KB
/
Copy pathOCR.py
File metadata and controls
48 lines (41 loc) · 1.33 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import torch
from doctr.models import ocr_predictor
import matplotlib.pyplot as plt
device = "cuda" if torch.cuda.is_available() else "cpu"
model = ocr_predictor(
"db_mobilenet_v3_large", "crnn_mobilenet_v3_large", pretrained=True
)
model.to(device)
def apply_OCR(image_path, OCR_threshold=0.5):
"""
Applies Optical Character Recognition (OCR) on an image and returns the recognized text.
Args:
image_path (str): The path to the image file.
OCR_threshold (float, optional): The confidence threshold for the OCR detection. Defaults to 0.5.
Returns:
str or None: The recognized text if any text is detected, otherwise None.
"""
try:
image = plt.imread(image_path)
if image.shape[-1] == 4:
image = image[..., :3]
except Exception as e:
# print(f"Error: {e} in {image_path}")
return None
results = model([image])
blocks = results.pages[0].blocks
try:
text = " ".join(
word.value
for block in blocks
for line in block.lines
for word in line.words
if word.confidence > OCR_threshold
)
except:
text = None
if text == "" or (
text is not None and (not any(char.isalpha() for char in text) or len(text) < 3)
):
text = None
return text