-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtext_detection.py
117 lines (87 loc) · 3.59 KB
/
text_detection.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
from imutils.object_detection import non_max_suppression
import numpy as np
import argparse
import pytesseract
import cv2
ap = argparse.ArgumentParser()
ap.add_argument('-i', '--image', type=str, help='path to input image')
ap.add_argument('-e', '--east', type=str, help='path to input EAST text detector')
ap.add_argument('-c', '--min-confidence', type=float, default=0.5,
help='minimum probability required to inspect a region')
ap.add_argument('-w', '--width', type=int, default=320, help='resized image width (should be a multiple of 32)')
ap.add_argument('-t', '--height', type=int, default=320, help='resized image height (should be a multiple of 32)')
ap.add_argument("-p", "--padding", type=float, default=0.0, help="amount of padding to add to each border of ROI")
args = vars(ap.parse_args())
def decode_predictions(scores, geometry):
(num_rows, num_cols) = scores.shape[2:4]
rects = []
confidences = []
for y in range(0, num_rows):
scores_data = scores[0, 0, y]
x_data_0 = geometry[0, 0, y]
x_data_1 = geometry[0, 1, y]
x_data_2 = geometry[0, 2, y]
x_data_3 = geometry[0, 3, y]
angles_data = geometry[0, 4, y]
for x in range(0, num_cols):
if scores_data[x] < args['min_confidence']:
continue
(offset_x, offset_y) = (x*4.0, y*4.0)
angle = angles_data[x]
cos = np.cos(angle)
sin = np.sin(angle)
h = x_data_0[x] + x_data_2[x]
w = x_data_1[x] + x_data_3[x]
end_x = int(offset_x + (cos * x_data_1[x]) + (sin * x_data_2[x]))
end_y = int(offset_y - (sin * x_data_1[x]) + (cos * x_data_2[x]))
start_x = int(end_x - w)
start_y = int(end_y - h)
rects.append((start_x, start_y, end_x, end_y))
confidences.append(scores_data[x])
return rects, confidences
image = cv2.imread(args['image'])
orig = image.copy()
(orig_h, orig_w) = image.shape[:2]
(new_w, new_h) = (args['width'], args['height'])
r_w = orig_w / float(new_w)
r_h = orig_h / float(new_h)
image = cv2.resize(image, (new_w, new_h))
(H, W) = image.shape[:2]
layer_names = [
"feature_fusion/Conv_7/Sigmoid",
'feature_fusion/concat_3']
print("[INFO] loading EAST text detector")
net = cv2.dnn.readNet(args['east'])
blob = cv2.dnn.blobFromImage(image, 1.0, (W, H), (123.68, 116.78, 103.94), swapRB=True, crop=False)
net.setInput(blob)
(scores, geometry) = net.forward(layer_names)
(rects, confidences) = decode_predictions(scores, geometry)
boxes = non_max_suppression(np.array(rects), probs=confidences)
results = []
for (x1, y1, x2, y2) in boxes:
x1 = int(x1 * r_w)
y1 = int(y1 * r_h)
x2 = int(x2 * r_w)
y2 = int(y2 * r_h)
dx = int((x2 - x1) * args['padding'])
dy = int((y2 - y1) * args['padding'])
x1 = max(0, x1 - dx)
y1 = max(0, y1 - dy)
x2 = min(orig_w, x2 + (dx * 2))
y2 = min(orig_h, y2 + (dy * 2))
roi = orig[y1:y2, x1:x2]
config = ("-l eng --oem 1 --psm 7")
text = pytesseract.image_to_string(roi, config=config)
results.append(((x1, y1, x2, y2), text))
results = sorted(results, key=lambda r: (r[0][0], r[0][1]))
for ((x1, y1, x2, y2), text) in results:
print("OCR text")
print("========")
print("{}\n".format(text))
text = "".join([c if ord(c) < 128 else "" for c in text]).strip()
output = orig.copy()
cv2.rectangle(output, (x1, y1), (x2, y2), (0, 0, 255, 2))
cv2.putText(output, text, (x1, y1 - 20), cv2.FONT_HERSHEY_SIMPLEX, 1.2
, (0, 0, 255), 3)
cv2.imshow("text detection", output)
cv2.waitKey(0)