Browse Source

Improve text filtering with OpenCV

main
Ashton Charbonneau 3 years ago
parent
commit
d9c9fb0e72
1 changed files with 31 additions and 18 deletions
  1. 31
    18
      rok-reader.py

+ 31
- 18
rok-reader.py View File

1
 import argparse
1
 import argparse
2
+import cv2
2
 import glob
3
 import glob
3
 import os
4
 import os
4
 import multiprocessing
5
 import multiprocessing
6
+import numpy
5
 from PIL import Image, ImageFilter
7
 from PIL import Image, ImageFilter
6
-from PIL.ImageOps import autocontrast, invert, grayscale, contain
8
+from PIL.ImageOps import autocontrast, invert, grayscale, contain, expand
7
 import pytesseract
9
 import pytesseract
8
 import signal
10
 import signal
9
 import sys
11
 import sys
10
 
12
 
13
+
11
 # ----
14
 # ----
12
 # Program
15
 # Program
13
 # ----
16
 # ----
71
 
74
 
72
 KILLS_TARGETS = [
75
 KILLS_TARGETS = [
73
     ("Kill Points", (1418, 312), (1694, 352), True, False, 0),
76
     ("Kill Points", (1418, 312), (1694, 352), True, False, 0),
74
-    ("T1 Kills", (1321, 637), (1538, 684), True, False, 0),
77
+    ("T1 Kills", (1325, 637), (1538, 684), True, False, 0),
75
     ("T1 Kill Points", (1986, 637), (2212, 684), True, False, 0),
78
     ("T1 Kill Points", (1986, 637), (2212, 684), True, False, 0),
76
-    ("T2 Kills", (1321, 702), (1538, 755), True, False, 0),
79
+    ("T2 Kills", (1325, 702), (1538, 755), True, False, 0),
77
     ("T2 Kill Points", (1986, 702), (2212, 755), True, False, 0),
80
     ("T2 Kill Points", (1986, 702), (2212, 755), True, False, 0),
78
-    ("T3 Kills", (1321, 770), (1538, 824), True, False, 0),
81
+    ("T3 Kills", (1325, 770), (1538, 824), True, False, 0),
79
     ("T3 Kill Points", (1986, 770), (2212, 824), True, False, 0),
82
     ("T3 Kill Points", (1986, 770), (2212, 824), True, False, 0),
80
-    ("T4 Kills", (1321, 847), (1538, 897), True, False, 0),
83
+    ("T4 Kills", (1325, 847), (1538, 897), True, False, 0),
81
     ("T4 Kill Points", (1986, 847), (2212, 897), True, False, 0),
84
     ("T4 Kill Points", (1986, 847), (2212, 897), True, False, 0),
82
-    ("T5 Kills", (1321, 918), (1538, 968), True, False, 0),
85
+    ("T5 Kills", (1325, 918), (1538, 968), True, False, 0),
83
     ("T5 Kill Points", (1986, 918), (2212, 968), True, False, 0),
86
     ("T5 Kill Points", (1986, 918), (2212, 968), True, False, 0),
84
-    ("Previous Kills", (1626, 985), (2228, 1039), False, False, 0)
87
+    ("Previous Kills", (1626, 985), (2228, 1039), False, False, -385)
85
 ]
88
 ]
86
 
89
 
87
 # ----
90
 # ----
132
 
135
 
133
 # Read text from a section of an image using Tesseract
136
 # Read text from a section of an image using Tesseract
134
 def read_string_from_image(rgbImage, box, is_number, inv, bonusRightTrim, debugFilePath):
137
 def read_string_from_image(rgbImage, box, is_number, inv, bonusRightTrim, debugFilePath):
135
-        # Crop to correct dimentions
138
+        # Crop to correct dimensions
136
         rgbImage = rgbImage.crop((box.x, box.y, box.x2, box.y2))
139
         rgbImage = rgbImage.crop((box.x, box.y, box.x2, box.y2))
137
 
140
 
138
         # Invert if flagged
141
         # Invert if flagged
139
         if inv: rgbImage = invert(rgbImage)
142
         if inv: rgbImage = invert(rgbImage)
140
 
143
 
141
-        # Apply filters
142
-        rgbImage = grayscale(rgbImage)
143
-        rgbImage = autocontrast(rgbImage, cutoff=(0, 75))
144
+        # Apply Pillow filters to cut off artifacts
145
+        rgbImage = autocontrast(rgbImage, cutoff=(0, 50))
146
+
147
+        # Convert to OpenCV
148
+        npImage=numpy.array(rgbImage)
149
+
150
+        # Set colors to grayscale
151
+        npImage=cv2.cvtColor(npImage, cv2.COLOR_BGR2GRAY)
152
+
153
+        # Apply OpenCV Filters
154
+        npImage = cv2.medianBlur(npImage, 3)
155
+        _, npImage = cv2.threshold(npImage, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)
156
+        # npImage = cv2.adaptiveThreshold(npImage, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2)
157
+
158
+        # Convert to Pillow
159
+        rgbImage = Image.fromarray(npImage)
144
 
160
 
145
         # Crop to content
161
         # Crop to content
146
-        bbox = autocontrast(invert(rgbImage), cutoff=(0, 90)).getbbox()
162
+        bbox = invert(rgbImage).getbbox()
147
         if bbox: rgbImage = rgbImage.crop((bbox[0], bbox[1], bbox[2] + bonusRightTrim, bbox[3]))
163
         if bbox: rgbImage = rgbImage.crop((bbox[0], bbox[1], bbox[2] + bonusRightTrim, bbox[3]))
148
-
149
-        # Resize and sharpen
150
-        rgbImage = contain(rgbImage, (800, 800), method=1)
151
-        rgbImage = rgbImage.filter(ImageFilter.EDGE_ENHANCE_MORE)
152
-        rgbImage = rgbImage.filter(ImageFilter.SHARPEN)
164
+        if bbox: rgbImage = rgbImage.crop(invert(rgbImage).getbbox())
165
+        rgbImage = expand(rgbImage, border=10, fill=255)
153
 
166
 
154
         if arguments.debug:
167
         if arguments.debug:
155
             rgbImage.save(debugFilePath)
168
             rgbImage.save(debugFilePath)
205
     # Get all previously scraped data # TODO: limit to filenames only
218
     # Get all previously scraped data # TODO: limit to filenames only
206
     alreadyScraped = ""
219
     alreadyScraped = ""
207
     for outputPath in [OUTPUT_PATH_PROFILE, OUTPUT_PATH_MOREINFO, OUTPUT_PATH_KILLS]:
220
     for outputPath in [OUTPUT_PATH_PROFILE, OUTPUT_PATH_MOREINFO, OUTPUT_PATH_KILLS]:
208
-        with open(projectFolder + outputPath, "r+", newline='', encoding='utf-8') as outputFile:
221
+        with open(projectFolder + outputPath, "w+", newline='', encoding='utf-8') as outputFile:
209
             alreadyScraped = alreadyScraped + outputFile.read()
222
             alreadyScraped = alreadyScraped + outputFile.read()
210
 
223
 
211
     # Mark as duplicates
224
     # Mark as duplicates

Loading…
Cancel
Save