Procházet zdrojové kódy

Improve text filtering with OpenCV

main
Ashton Charbonneau před 2 roky
rodič
revize
d9c9fb0e72
1 změnil soubory, kde provedl 31 přidání a 18 odebrání
  1. 31
    18
      rok-reader.py

+ 31
- 18
rok-reader.py Zobrazit soubor

@@ -1,13 +1,16 @@
1 1
 import argparse
2
+import cv2
2 3
 import glob
3 4
 import os
4 5
 import multiprocessing
6
+import numpy
5 7
 from PIL import Image, ImageFilter
6
-from PIL.ImageOps import autocontrast, invert, grayscale, contain
8
+from PIL.ImageOps import autocontrast, invert, grayscale, contain, expand
7 9
 import pytesseract
8 10
 import signal
9 11
 import sys
10 12
 
13
+
11 14
 # ----
12 15
 # Program
13 16
 # ----
@@ -71,17 +74,17 @@ MOREINFO_TARGETS = [
71 74
 
72 75
 KILLS_TARGETS = [
73 76
     ("Kill Points", (1418, 312), (1694, 352), True, False, 0),
74
-    ("T1 Kills", (1321, 637), (1538, 684), True, False, 0),
77
+    ("T1 Kills", (1325, 637), (1538, 684), True, False, 0),
75 78
     ("T1 Kill Points", (1986, 637), (2212, 684), True, False, 0),
76
-    ("T2 Kills", (1321, 702), (1538, 755), True, False, 0),
79
+    ("T2 Kills", (1325, 702), (1538, 755), True, False, 0),
77 80
     ("T2 Kill Points", (1986, 702), (2212, 755), True, False, 0),
78
-    ("T3 Kills", (1321, 770), (1538, 824), True, False, 0),
81
+    ("T3 Kills", (1325, 770), (1538, 824), True, False, 0),
79 82
     ("T3 Kill Points", (1986, 770), (2212, 824), True, False, 0),
80
-    ("T4 Kills", (1321, 847), (1538, 897), True, False, 0),
83
+    ("T4 Kills", (1325, 847), (1538, 897), True, False, 0),
81 84
     ("T4 Kill Points", (1986, 847), (2212, 897), True, False, 0),
82
-    ("T5 Kills", (1321, 918), (1538, 968), True, False, 0),
85
+    ("T5 Kills", (1325, 918), (1538, 968), True, False, 0),
83 86
     ("T5 Kill Points", (1986, 918), (2212, 968), True, False, 0),
84
-    ("Previous Kills", (1626, 985), (2228, 1039), False, False, 0)
87
+    ("Previous Kills", (1626, 985), (2228, 1039), False, False, -385)
85 88
 ]
86 89
 
87 90
 # ----
@@ -132,24 +135,34 @@ def read_file(fileTuple):
132 135
 
133 136
 # Read text from a section of an image using Tesseract
134 137
 def read_string_from_image(rgbImage, box, is_number, inv, bonusRightTrim, debugFilePath):
135
-        # Crop to correct dimentions
138
+        # Crop to correct dimensions
136 139
         rgbImage = rgbImage.crop((box.x, box.y, box.x2, box.y2))
137 140
 
138 141
         # Invert if flagged
139 142
         if inv: rgbImage = invert(rgbImage)
140 143
 
141
-        # Apply filters
142
-        rgbImage = grayscale(rgbImage)
143
-        rgbImage = autocontrast(rgbImage, cutoff=(0, 75))
144
+        # Apply Pillow filters to cut off artifacts
145
+        rgbImage = autocontrast(rgbImage, cutoff=(0, 50))
146
+
147
+        # Convert to OpenCV
148
+        npImage=numpy.array(rgbImage)
149
+
150
+        # Set colors to grayscale
151
+        npImage=cv2.cvtColor(npImage, cv2.COLOR_BGR2GRAY)
152
+
153
+        # Apply OpenCV Filters
154
+        npImage = cv2.medianBlur(npImage, 3)
155
+        _, npImage = cv2.threshold(npImage, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)
156
+        # npImage = cv2.adaptiveThreshold(npImage, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2)
157
+
158
+        # Convert to Pillow
159
+        rgbImage = Image.fromarray(npImage)
144 160
 
145 161
         # Crop to content
146
-        bbox = autocontrast(invert(rgbImage), cutoff=(0, 90)).getbbox()
162
+        bbox = invert(rgbImage).getbbox()
147 163
         if bbox: rgbImage = rgbImage.crop((bbox[0], bbox[1], bbox[2] + bonusRightTrim, bbox[3]))
148
-
149
-        # Resize and sharpen
150
-        rgbImage = contain(rgbImage, (800, 800), method=1)
151
-        rgbImage = rgbImage.filter(ImageFilter.EDGE_ENHANCE_MORE)
152
-        rgbImage = rgbImage.filter(ImageFilter.SHARPEN)
164
+        if bbox: rgbImage = rgbImage.crop(invert(rgbImage).getbbox())
165
+        rgbImage = expand(rgbImage, border=10, fill=255)
153 166
 
154 167
         if arguments.debug:
155 168
             rgbImage.save(debugFilePath)
@@ -205,7 +218,7 @@ if __name__ == '__main__':
205 218
     # Get all previously scraped data # TODO: limit to filenames only
206 219
     alreadyScraped = ""
207 220
     for outputPath in [OUTPUT_PATH_PROFILE, OUTPUT_PATH_MOREINFO, OUTPUT_PATH_KILLS]:
208
-        with open(projectFolder + outputPath, "r+", newline='', encoding='utf-8') as outputFile:
221
+        with open(projectFolder + outputPath, "w+", newline='', encoding='utf-8') as outputFile:
209 222
             alreadyScraped = alreadyScraped + outputFile.read()
210 223
 
211 224
     # Mark as duplicates

Načítá se…
Zrušit
Uložit