import argparse import glob import os from PIL import Image, ImageFilter from PIL.ImageOps import autocontrast, invert, grayscale, contain import pytesseract # ---- # Program # ---- PROGRAM_NAME = "RoK Reader" PROGRAM_VERSION = "0.3" PROGRAM_DESCRIPTION = "This program reads data from Rise of Kingdoms screenshots. It currently supports three user profile screenshots." # ---- # Classes # ---- class Box: def __init__(self, x, y, x2, y2): self.x = x self.y = y self.x2 = x2 self.y2 = y2 class RelativeBox: def __init__(self, x_distance, y_distance, width, height): self.x_distance = x_distance self.y_distance = y_distance self.width = width self.height = height # ---- # Files # ---- OUTPUT_PATH_PROFILE = "output-profile.csv" OUTPUT_PATH_MOREINFO = "output-more.csv" OUTPUT_PATH_KILLS = "output-kills.csv" # ---- # Coordinates # ---- # Name, Top Left, Bottom Right, Number, Invert, BonusRightTrim PROFILE_TARGETS = [ ("ID", (1246, 375), (1445, 430), True, True, -10), ("Power", (1435, 585), (1733, 634), True, True, 0), ("Kill Points", (1806, 585), (2112, 633), True, True, 0), ("Alliance", (1025, 584), (1427, 637), False, True, 0), ("Civilization", (1884, 420), (2132, 486), False, True, 0) ] MOREINFO_TARGETS = [ ("Power", (1305, 223), (1540, 274), True, True, 0), ("Kill Points", (1931, 222), (2188, 276), True, True, 0), ("Highest Power", (1815, 416), (2105, 483), True, True, 0), ("Victories", (1815, 515), (2105, 580), True, True, 0), ("Defeats", (1815, 613), (2105, 675), True, True, 0), ("Dead", (1815, 710), (2105, 771), True, True, 0), ("Scout Times", (1815, 806), (2105, 871), True, True, 0), ("Resources Gathered", (1815, 980), (2105, 1047), True, True, 0), ("Resource Assistance", (1815, 1077), (2105, 1144), True, True, 0), ("Alliance Help Times", (1815, 1174), (2105, 1238), True, True, 0) ] KILLS_TARGETS = [ ("Kill Points", (1418, 312), (1694, 352), True, False, 0), ("T1 Kills", (1321, 637), (1538, 684), True, False, 0), ("T1 Kill Points", (1986, 637), (2212, 684), True, False, 0), ("T2 Kills", (1321, 702), (1538, 755), True, False, 0), ("T2 Kill Points", (1986, 702), (2212, 755), True, False, 0), ("T3 Kills", (1321, 770), (1538, 824), True, False, 0), ("T3 Kill Points", (1986, 770), (2212, 824), True, False, 0), ("T4 Kills", (1321, 847), (1538, 897), True, False, 0), ("T4 Kill Points", (1986, 847), (2212, 897), True, False, 0), ("T5 Kills", (1321, 918), (1538, 968), True, False, 0), ("T5 Kill Points", (1986, 918), (2212, 968), True, False, 0), ("Previous Kills", (1626, 985), (2228, 1039), False, False, 0) ] # ---- # Functions # ---- # Read text from a section of an image using Tesseract def read_string_from_image(file, box, is_number, inv, bonusRightTrim, debugFilePath): with Image.open(file) as image: # Crop to correct dimentions image = image.crop((box.x, box.y, box.x2, box.y2)) # Switch to RGB mode rgbimage = Image.new("RGB", image.size, (255, 255, 255)) rgbimage.paste(image, mask = image.split()[3]) # Invert if flagged if inv: rgbimage = invert(rgbimage) # Apply filters rgbimage = grayscale(rgbimage) rgbimage = autocontrast(rgbimage, cutoff=(0, 75)) bbox = autocontrast(invert(rgbimage), cutoff=(0, 90)).getbbox() if bbox: rgbimage = rgbimage.crop((bbox[0], bbox[1], bbox[2] + bonusRightTrim, bbox[3])) rgbimage = contain(rgbimage, (800, 800), method=1) rgbimage = rgbimage.filter(ImageFilter.EDGE_ENHANCE_MORE) rgbimage = rgbimage.filter(ImageFilter.SHARPEN) if arguments.debug: rgbimage.save(debugFilePath) if is_number: return pytesseract.image_to_string(rgbimage, config="--psm 6 -c tessedit_char_whitelist=0123456789,").strip().replace('\n', ' ').replace('\r', '').replace('.', '').replace(',', '').replace('\t', ' ').replace(' ', '') else: return pytesseract.image_to_string(rgbimage, config="--psm 6").strip().replace('\n', ' ').replace('\r', '').replace('\t', ' ') # ---- # Arguments # ---- parser = argparse.ArgumentParser(description = PROGRAM_DESCRIPTION) parser.add_argument("-p", "--project", help = "project name", required = True) parser.add_argument("-f", "--file", help = "file name (globs accepted)", required = True) parser.add_argument("-o", "--output", help = "output file") parser.add_argument("-v", "--verbose", help = "be verbose", default = False, action = "store_true") parser.add_argument("--debug", help = "save debug images", default = False, action = "store_true") arguments = parser.parse_args() # ---- # Program # ---- if __name__ == '__main__': # Create project folder projectFolder = "output/" + arguments.project + "/" if not os.path.exists(projectFolder): os.makedirs(projectFolder) debugFolder = "debug" + "/" if arguments.debug: if not os.path.exists(debugFolder): os.makedirs(debugFolder) # Get files screenshots_to_read = glob.glob(arguments.file, recursive=True) screenshot_count = len(screenshots_to_read) if screenshot_count < 1: sys.exit("No files found.") # Scrape if arguments.verbose: print("Scraping", screenshot_count, "files") for i, file in enumerate(screenshots_to_read): filename = os.path.basename(file) if arguments.verbose: print(i+1, "/", screenshot_count, ": ", filename, sep="") if "profile" in filename: targets = PROFILE_TARGETS output = OUTPUT_PATH_PROFILE elif "more" in filename: targets = MOREINFO_TARGETS output = OUTPUT_PATH_MOREINFO elif "kills" in filename: targets = KILLS_TARGETS output = OUTPUT_PATH_KILLS else: sys.exit("File name doesn't contain type") # TODO: bad time complexity exists = False with open(projectFolder + output, "a+", newline='', encoding='utf-8') as output_file: if filename not in output_file.read(): if not arguments.debug: output_file.write(filename + "\t") for i, target in enumerate(targets): debugFile = os.path.splitext(filename)[0] + "_" + str(i) + ".png" string = read_string_from_image(file, Box(target[1][0], target[1][1], target[2][0], target[2][1]), target[3], target[4], target[5], debugFolder + debugFile) if arguments.verbose: print(" ", target[0], ": ", string, sep="") if i and not arguments.debug: output_file.write("\t") if not arguments.debug: output_file.write(string) if not arguments.debug: output_file.write("\n") else: if arguments.verbose: print(" ", "already scraped.")