123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182 |
- import argparse
- import glob
- import os
- from PIL import Image, ImageFilter
- from PIL.ImageOps import autocontrast, invert, grayscale, contain
- import pytesseract
-
- # ----
- # Program
- # ----
-
- PROGRAM_NAME = "RoK Reader"
- PROGRAM_VERSION = "0.3"
- PROGRAM_DESCRIPTION = "This program reads data from Rise of Kingdoms screenshots. It currently supports three user profile screenshots."
-
- # ----
- # Classes
- # ----
-
- class Box:
- def __init__(self, x, y, x2, y2):
- self.x = x
- self.y = y
- self.x2 = x2
- self.y2 = y2
-
- class RelativeBox:
- def __init__(self, x_distance, y_distance, width, height):
- self.x_distance = x_distance
- self.y_distance = y_distance
- self.width = width
- self.height = height
-
- # ----
- # Files
- # ----
-
- OUTPUT_PATH_PROFILE = "output-profile.csv"
- OUTPUT_PATH_MOREINFO = "output-more.csv"
- OUTPUT_PATH_KILLS = "output-kills.csv"
-
- # ----
- # Coordinates
- # ----
-
- # Name, Top Left, Bottom Right, Number, Invert, BonusRightTrim
-
- PROFILE_TARGETS = [
- ("ID", (1246, 375), (1445, 430), True, True, -10),
- ("Power", (1435, 585), (1733, 634), True, True, 0),
- ("Kill Points", (1806, 585), (2112, 633), True, True, 0),
- ("Alliance", (1025, 584), (1427, 637), False, True, 0),
- ("Civilization", (1884, 420), (2132, 486), False, True, 0)
- ]
-
- MOREINFO_TARGETS = [
- ("Power", (1305, 223), (1540, 274), True, True, 0),
- ("Kill Points", (1931, 222), (2188, 276), True, True, 0),
- ("Highest Power", (1815, 416), (2105, 483), True, True, 0),
- ("Victories", (1815, 515), (2105, 580), True, True, 0),
- ("Defeats", (1815, 613), (2105, 675), True, True, 0),
- ("Dead", (1815, 710), (2105, 771), True, True, 0),
- ("Scout Times", (1815, 806), (2105, 871), True, True, 0),
- ("Resources Gathered", (1815, 980), (2105, 1047), True, True, 0),
- ("Resource Assistance", (1815, 1077), (2105, 1144), True, True, 0),
- ("Alliance Help Times", (1815, 1174), (2105, 1238), True, True, 0)
- ]
-
- KILLS_TARGETS = [
- ("Kill Points", (1418, 312), (1694, 352), True, False, 0),
- ("T1 Kills", (1321, 637), (1538, 684), True, False, 0),
- ("T1 Kill Points", (1986, 637), (2212, 684), True, False, 0),
- ("T2 Kills", (1321, 702), (1538, 755), True, False, 0),
- ("T2 Kill Points", (1986, 702), (2212, 755), True, False, 0),
- ("T3 Kills", (1321, 770), (1538, 824), True, False, 0),
- ("T3 Kill Points", (1986, 770), (2212, 824), True, False, 0),
- ("T4 Kills", (1321, 847), (1538, 897), True, False, 0),
- ("T4 Kill Points", (1986, 847), (2212, 897), True, False, 0),
- ("T5 Kills", (1321, 918), (1538, 968), True, False, 0),
- ("T5 Kill Points", (1986, 918), (2212, 968), True, False, 0),
- ("Previous Kills", (1626, 985), (2228, 1039), False, False, 0)
- ]
-
- # ----
- # Functions
- # ----
-
- # Read text from a section of an image using Tesseract
- def read_string_from_image(file, box, is_number, inv, bonusRightTrim, debugFilePath):
- with Image.open(file) as image:
- # Crop to correct dimentions
- image = image.crop((box.x, box.y, box.x2, box.y2))
-
- # Switch to RGB mode
- rgbimage = Image.new("RGB", image.size, (255, 255, 255))
- rgbimage.paste(image, mask = image.split()[3])
-
- # Invert if flagged
- if inv: rgbimage = invert(rgbimage)
-
- # Apply filters
- rgbimage = grayscale(rgbimage)
- rgbimage = autocontrast(rgbimage, cutoff=(0, 75))
- bbox = autocontrast(invert(rgbimage), cutoff=(0, 90)).getbbox()
- if bbox: rgbimage = rgbimage.crop((bbox[0], bbox[1], bbox[2] + bonusRightTrim, bbox[3]))
- rgbimage = contain(rgbimage, (800, 800), method=1)
- rgbimage = rgbimage.filter(ImageFilter.EDGE_ENHANCE_MORE)
- rgbimage = rgbimage.filter(ImageFilter.SHARPEN)
-
- if arguments.debug:
- rgbimage.save(debugFilePath)
-
- if is_number:
- return pytesseract.image_to_string(rgbimage, config="--psm 6 -c tessedit_char_whitelist=0123456789,").strip().replace('\n', ' ').replace('\r', '').replace('.', '').replace(',', '').replace('\t', ' ').replace(' ', '')
- else:
- return pytesseract.image_to_string(rgbimage, config="--psm 6").strip().replace('\n', ' ').replace('\r', '').replace('\t', ' ')
-
- # ----
- # Arguments
- # ----
-
- parser = argparse.ArgumentParser(description = PROGRAM_DESCRIPTION)
- parser.add_argument("-p", "--project", help = "project name", required = True)
- parser.add_argument("-f", "--file", help = "file name (globs accepted)", required = True)
- parser.add_argument("-o", "--output", help = "output file")
- parser.add_argument("-v", "--verbose", help = "be verbose", default = False, action = "store_true")
- parser.add_argument("--debug", help = "save debug images", default = False, action = "store_true")
- arguments = parser.parse_args()
-
- # ----
- # Program
- # ----
-
- if __name__ == '__main__':
- # Create project folder
- projectFolder = "output/" + arguments.project + "/"
- if not os.path.exists(projectFolder):
- os.makedirs(projectFolder)
-
- debugFolder = "debug" + "/"
- if arguments.debug:
- if not os.path.exists(debugFolder):
- os.makedirs(debugFolder)
-
- # Get files
- screenshots_to_read = glob.glob(arguments.file, recursive=True)
- screenshot_count = len(screenshots_to_read)
- if screenshot_count < 1: sys.exit("No files found.")
-
- # Scrape
- if arguments.verbose: print("Scraping", screenshot_count, "files")
- for i, file in enumerate(screenshots_to_read):
- filename = os.path.basename(file)
-
- if arguments.verbose: print(i+1, "/", screenshot_count, ": ", filename, sep="")
-
- if "profile" in filename:
- targets = PROFILE_TARGETS
- output = OUTPUT_PATH_PROFILE
- elif "more" in filename:
- targets = MOREINFO_TARGETS
- output = OUTPUT_PATH_MOREINFO
- elif "kills" in filename:
- targets = KILLS_TARGETS
- output = OUTPUT_PATH_KILLS
- else:
- sys.exit("File name doesn't contain type")
-
- # TODO: bad time complexity
- exists = False
- with open(projectFolder + output, "a+", newline='', encoding='utf-8') as output_file:
- if filename not in output_file.read():
- if not arguments.debug: output_file.write(filename + "\t")
- for i, target in enumerate(targets):
- debugFile = os.path.splitext(filename)[0] + "_" + str(i) + ".png"
- string = read_string_from_image(file, Box(target[1][0], target[1][1], target[2][0], target[2][1]), target[3], target[4], target[5], debugFolder + debugFile)
- if arguments.verbose: print(" ", target[0], ": ", string, sep="")
- if i and not arguments.debug: output_file.write("\t")
- if not arguments.debug: output_file.write(string)
- if not arguments.debug: output_file.write("\n")
- else:
- if arguments.verbose: print(" ", "already scraped.")
|