瀏覽代碼

Add basic file reader

Reads data from scraped screenshots. Supports profile, more info, and
more info with expanded kill screenshots. Only supports 1440x2560
screenshots. Breaks easily with shifted text, run with --debug to
validate that the bounding boxes are correct.
main
Ashton Charbonneau 3 年之前
父節點
當前提交
76285e8d2e
共有 1 個檔案被更改,包括 182 行新增0 行删除
  1. 182
    0
      rok-reader.py

+ 182
- 0
rok-reader.py 查看文件

@@ -0,0 +1,182 @@
1
+import argparse
2
+import glob
3
+import os
4
+from PIL import Image, ImageFilter
5
+from PIL.ImageOps import autocontrast, invert, grayscale, contain
6
+import pytesseract
7
+
8
+# ----
9
+# Program
10
+# ----
11
+
12
+PROGRAM_NAME = "RoK Reader"
13
+PROGRAM_VERSION = "0.3"
14
+PROGRAM_DESCRIPTION = "This program reads data from Rise of Kingdoms screenshots. It currently supports three user profile screenshots."
15
+
16
+# ----
17
+# Classes
18
+# ----
19
+
20
+class Box:
21
+    def __init__(self, x, y, x2, y2):
22
+        self.x = x
23
+        self.y = y
24
+        self.x2 = x2
25
+        self.y2 = y2
26
+
27
+class RelativeBox:
28
+    def __init__(self, x_distance, y_distance, width, height):
29
+        self.x_distance = x_distance
30
+        self.y_distance = y_distance
31
+        self.width = width
32
+        self.height = height
33
+
34
+# ----
35
+# Files
36
+# ----
37
+
38
+OUTPUT_PATH_PROFILE = "output-profile.csv"
39
+OUTPUT_PATH_MOREINFO = "output-more.csv"
40
+OUTPUT_PATH_KILLS = "output-kills.csv"
41
+
42
+# ----
43
+# Coordinates
44
+# ----
45
+
46
+# Name, Top Left, Bottom Right, Number, Invert, BonusRightTrim
47
+
48
+PROFILE_TARGETS = [
49
+    ("ID", (1246, 375), (1445, 430), True, True, -10),
50
+    ("Power", (1435, 585), (1733, 634), True, True, 0),
51
+    ("Kill Points", (1806, 585), (2112, 633), True, True, 0),
52
+    ("Alliance", (1025, 584), (1427, 637), False, True, 0),
53
+    ("Civilization", (1884, 420), (2132, 486), False, True, 0)
54
+]
55
+
56
+MOREINFO_TARGETS = [
57
+    ("Power", (1305, 223), (1540, 274), True, True, 0),
58
+    ("Kill Points", (1931, 222), (2188, 276), True, True, 0),
59
+    ("Highest Power", (1815, 416), (2105, 483), True, True, 0),
60
+    ("Victories", (1815, 515), (2105, 580), True, True, 0),
61
+    ("Defeats", (1815, 613), (2105, 675), True, True, 0),
62
+    ("Dead", (1815, 710), (2105, 771), True, True, 0),
63
+    ("Scout Times", (1815, 806), (2105, 871), True, True, 0),
64
+    ("Resources Gathered", (1815, 980), (2105, 1047), True, True, 0),
65
+    ("Resource Assistance", (1815, 1077), (2105, 1144), True, True, 0),
66
+    ("Alliance Help Times", (1815, 1174), (2105, 1238), True, True, 0)
67
+]
68
+
69
+KILLS_TARGETS = [
70
+    ("Kill Points", (1418, 312), (1694, 352), True, False, 0),
71
+    ("T1 Kills", (1321, 637), (1538, 684), True, False, 0),
72
+    ("T1 Kill Points", (1986, 637), (2212, 684), True, False, 0),
73
+    ("T2 Kills", (1321, 702), (1538, 755), True, False, 0),
74
+    ("T2 Kill Points", (1986, 702), (2212, 755), True, False, 0),
75
+    ("T3 Kills", (1321, 770), (1538, 824), True, False, 0),
76
+    ("T3 Kill Points", (1986, 770), (2212, 824), True, False, 0),
77
+    ("T4 Kills", (1321, 847), (1538, 897), True, False, 0),
78
+    ("T4 Kill Points", (1986, 847), (2212, 897), True, False, 0),
79
+    ("T5 Kills", (1321, 918), (1538, 968), True, False, 0),
80
+    ("T5 Kill Points", (1986, 918), (2212, 968), True, False, 0),
81
+    ("Previous Kills", (1626, 985), (2228, 1039), False, False, 0)
82
+]
83
+
84
+# ----
85
+# Functions
86
+# ----
87
+
88
+# Read text from a section of an image using Tesseract
89
+def read_string_from_image(file, box, is_number, inv, bonusRightTrim, debugFilePath):
90
+    with Image.open(file) as image:
91
+        # Crop to correct dimentions
92
+        image = image.crop((box.x, box.y, box.x2, box.y2))
93
+
94
+        # Switch to RGB mode
95
+        rgbimage = Image.new("RGB", image.size, (255, 255, 255))
96
+        rgbimage.paste(image, mask = image.split()[3])
97
+
98
+        # Invert if flagged
99
+        if inv: rgbimage = invert(rgbimage)
100
+
101
+        # Apply filters
102
+        rgbimage = grayscale(rgbimage)
103
+        rgbimage = autocontrast(rgbimage, cutoff=(0, 75))
104
+        bbox = autocontrast(invert(rgbimage), cutoff=(0, 90)).getbbox()
105
+        if bbox: rgbimage = rgbimage.crop((bbox[0], bbox[1], bbox[2] + bonusRightTrim, bbox[3]))
106
+        rgbimage = contain(rgbimage, (800, 800), method=1)
107
+        rgbimage = rgbimage.filter(ImageFilter.EDGE_ENHANCE_MORE)
108
+        rgbimage = rgbimage.filter(ImageFilter.SHARPEN)
109
+
110
+        if arguments.debug:
111
+            rgbimage.save(debugFilePath)
112
+
113
+        if is_number:
114
+            return pytesseract.image_to_string(rgbimage, config="--psm 6 -c tessedit_char_whitelist=0123456789,").strip().replace('\n', ' ').replace('\r', '').replace('.', '').replace(',', '').replace('\t', ' ').replace(' ', '')
115
+        else: 
116
+            return pytesseract.image_to_string(rgbimage, config="--psm 6").strip().replace('\n', ' ').replace('\r', '').replace('\t', ' ')
117
+
118
+# ----
119
+# Arguments
120
+# ----
121
+
122
+parser = argparse.ArgumentParser(description = PROGRAM_DESCRIPTION)
123
+parser.add_argument("-p", "--project", help = "project name", required = True)
124
+parser.add_argument("-f", "--file", help = "file name (globs accepted)", required = True)
125
+parser.add_argument("-o", "--output", help = "output file")
126
+parser.add_argument("-v", "--verbose", help = "be verbose", default = False, action = "store_true")
127
+parser.add_argument("--debug", help = "save debug images", default = False, action = "store_true")
128
+arguments = parser.parse_args()
129
+
130
+# ----
131
+# Program
132
+# ----
133
+
134
+if __name__ == '__main__':
135
+    # Create project folder
136
+    projectFolder = "output/" + arguments.project + "/"
137
+    if not os.path.exists(projectFolder):
138
+        os.makedirs(projectFolder)
139
+
140
+    debugFolder = "debug" + "/"
141
+    if arguments.debug:
142
+        if not os.path.exists(debugFolder):
143
+            os.makedirs(debugFolder)
144
+
145
+    # Get files
146
+    screenshots_to_read = glob.glob(arguments.file, recursive=True)
147
+    screenshot_count = len(screenshots_to_read)
148
+    if screenshot_count < 1: sys.exit("No files found.")
149
+
150
+    # Scrape
151
+    if arguments.verbose: print("Scraping", screenshot_count, "files")
152
+    for i, file in enumerate(screenshots_to_read):
153
+        filename = os.path.basename(file)
154
+
155
+        if arguments.verbose: print(i+1, "/", screenshot_count, ": ", filename, sep="")
156
+
157
+        if "profile" in filename:
158
+            targets = PROFILE_TARGETS
159
+            output = OUTPUT_PATH_PROFILE
160
+        elif "more" in filename:
161
+            targets = MOREINFO_TARGETS
162
+            output = OUTPUT_PATH_MOREINFO
163
+        elif "kills" in filename:
164
+            targets = KILLS_TARGETS
165
+            output = OUTPUT_PATH_KILLS
166
+        else:
167
+            sys.exit("File name doesn't contain type")
168
+
169
+        # TODO: bad time complexity
170
+        exists = False
171
+        with open(projectFolder + output, "a+", newline='', encoding='utf-8') as output_file:
172
+            if filename not in output_file.read():
173
+                if not arguments.debug: output_file.write(filename + "\t")
174
+                for i, target in enumerate(targets):
175
+                    debugFile = os.path.splitext(filename)[0] + "_" + str(i) + ".png"
176
+                    string = read_string_from_image(file, Box(target[1][0], target[1][1], target[2][0], target[2][1]), target[3], target[4], target[5], debugFolder + debugFile)
177
+                    if arguments.verbose: print("  ", target[0], ": ", string, sep="")
178
+                    if i and not arguments.debug: output_file.write("\t")
179
+                    if not arguments.debug: output_file.write(string)
180
+                if not arguments.debug: output_file.write("\n")
181
+            else:
182
+                if arguments.verbose: print("  ", "already scraped.")

Loading…
取消
儲存