forked from RandomwalkDev/ai-ml-task
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmodel.py
More file actions
99 lines (81 loc) · 3.48 KB
/
Copy pathmodel.py
File metadata and controls
99 lines (81 loc) · 3.48 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
import cv2
import numpy as np
from skimage import exposure
import easyocr
from collections import defaultdict
import json
import re
#initialize OCR_reader
OCR_reader = easyocr.Reader(['en'])
# Preparing the image
def preprocess_image(image):
image = cv2.imread(image)
gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
cv2.imwrite('grayscale_image.jpg', gray_image)
denoised_image = cv2.fastNlMeansDenoising(gray_image, h=30)
cv2.imwrite('denoised_image.jpg', denoised_image)
thresholded_image = cv2.adaptiveThreshold(
denoised_image, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2)
cv2.imwrite('thresholded_image.jpg', thresholded_image)
contrast_enhanced_image = exposure.equalize_adapthist(thresholded_image, clip_limit=0.03)
sharpening_kernel = np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]])
sharpened_image = cv2.filter2D(contrast_enhanced_image, -1, sharpening_kernel)
sharpened_image = (sharpened_image * 255).astype(np.uint8)
cv2.imwrite('sharpened_image.jpg', sharpened_image)
return sharpened_image
def extract_text_regions(image):
ocr_results = OCR_reader.readtext(image, detail=1, paragraph=True)
extracted_texts = []
for result in ocr_results:
if len(result) == 3:
bbox, text, _ = result
elif len(result) == 2:
bbox, text = result
else:
continue
x_min = int(min([point[0] for point in bbox]))
y_min = int(min([point[1] for point in bbox]))
x_max = int(max([point[0] for point in bbox]))
y_max = int(max([point[1] for point in bbox]))
text_height = y_max - y_min
y_center = (y_min + y_max) // 2
extracted_texts.append((text, y_center, text_height))
print(f"Detected Text: '{text}', Y-Center: {y_center}, Height: {text_height}")
return extracted_texts
def refine_headers_and_content(extracted_texts, header_height_threshold=80):
organized_data = defaultdict(list)
current_header = None
extracted_texts.sort(key=lambda x: x[1])
for text, y_center, height in extracted_texts:
words = text.split()
if re.match(r'^[A-Z][a-z]+', words[0]) or height > header_height_threshold:
current_header = words[0]
content_words = words[1:] if len(words) > 1 else []
if current_header:
organized_data[current_header] = []
if content_words:
organized_data[current_header].extend(content_words)
elif current_header:
organized_data[current_header].extend(words)
organized_data = {
header: ', '.join(re.sub(r'[;]+', '', word) for word in content)
for header, content in organized_data.items()
}
return organized_data
def save_to_json(data, output_path):
with open(output_path, 'w') as json_file:
json.dump(data, json_file, indent=4)
# main function
def main(image, output_file, header_height_threshold=100):
preprocessed_image = preprocess_image(image)
extracted_texts = extract_text_regions(preprocessed_image)
organized_data = refine_headers_and_content(extracted_texts, header_height_threshold)
save_to_json(organized_data, output_file)
print("Text extraction and organization completed successfully.")
return organized_data
#function calling
if __name__ == "__main__":
image = 'sample.jpeg'
output_file = 'extracted_text.json'
organized_data = main(image, output_file)
print(json.dumps(organized_data, indent=4))