123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253 |
- import argparse
- import cv2
- import glob
- import os
- import multiprocessing
- import numpy
- from PIL import Image, ImageFilter
- from PIL.ImageOps import autocontrast, invert, grayscale, contain, expand
- import pytesseract
- import signal
- import sys
-
-
- # ----
- # Program
- # ----
-
- PROGRAM_NAME = "RoK Reader"
- PROGRAM_VERSION = "0.3"
- PROGRAM_DESCRIPTION = "This program reads data from Rise of Kingdoms screenshots. It currently supports three user profile screenshots."
-
- # ----
- # Classes
- # ----
-
- class Box:
- def __init__(self, x, y, x2, y2):
- self.x = x
- self.y = y
- self.x2 = x2
- self.y2 = y2
-
- class RelativeBox:
- def __init__(self, x_distance, y_distance, width, height):
- self.x_distance = x_distance
- self.y_distance = y_distance
- self.width = width
- self.height = height
-
- # ----
- # Files
- # ----
-
- OUTPUT_PATH_PROFILE = "output-profile.csv"
- OUTPUT_PATH_MOREINFO = "output-more.csv"
- OUTPUT_PATH_KILLS = "output-kills.csv"
-
- # ----
- # Coordinates
- # ----
-
- # Name, Top Left, Bottom Right, Number, Invert, BonusRightTrim
-
- PROFILE_TARGETS = [
- ("ID", (1246, 375), (1445, 430), True, True, -10),
- ("Power", (1435, 585), (1733, 634), True, True, 0),
- ("Kill Points", (1806, 585), (2112, 633), True, True, 0),
- ("Alliance", (1025, 584), (1427, 637), False, True, 0),
- ("Civilization", (1884, 420), (2132, 486), False, True, 0)
- ]
-
- MOREINFO_TARGETS = [
- ("Power", (1305, 223), (1540, 274), True, True, 0),
- ("Kill Points", (1931, 222), (2188, 276), True, True, 0),
- ("Highest Power", (1815, 416), (2105, 483), True, True, 0),
- ("Victories", (1815, 515), (2105, 580), True, True, 0),
- ("Defeats", (1815, 613), (2105, 675), True, True, 0),
- ("Dead", (1815, 710), (2105, 771), True, True, 0),
- ("Scout Times", (1815, 806), (2105, 871), True, True, 0),
- ("Resources Gathered", (1815, 980), (2105, 1047), True, True, 0),
- ("Resource Assistance", (1815, 1077), (2105, 1144), True, True, 0),
- ("Alliance Help Times", (1815, 1174), (2105, 1238), True, True, 0)
- ]
-
- KILLS_TARGETS = [
- ("Kill Points", (1418, 312), (1694, 352), True, False, 0),
- ("T1 Kills", (1325, 637), (1538, 684), True, False, 0),
- ("T1 Kill Points", (1986, 637), (2212, 684), True, False, 0),
- ("T2 Kills", (1325, 702), (1538, 755), True, False, 0),
- ("T2 Kill Points", (1986, 702), (2212, 755), True, False, 0),
- ("T3 Kills", (1325, 770), (1538, 824), True, False, 0),
- ("T3 Kill Points", (1986, 770), (2212, 824), True, False, 0),
- ("T4 Kills", (1325, 847), (1538, 897), True, False, 0),
- ("T4 Kill Points", (1986, 847), (2212, 897), True, False, 0),
- ("T5 Kills", (1325, 918), (1538, 968), True, False, 0),
- ("T5 Kill Points", (1986, 918), (2212, 968), True, False, 0),
- ("Previous Kills", (1626, 985), (2228, 1039), False, False, -385)
- ]
-
- # ----
- # Functions
- # ----
-
- # Read an image file
- def read_file(fileTuple):
- fileNumber = fileTuple[0]
- file = fileTuple[1]
- isDuplicate = fileTuple[2]
-
- filename = os.path.basename(file)
-
- if "profile" in filename:
- targets = PROFILE_TARGETS
- outputPath = OUTPUT_PATH_PROFILE
- elif "more" in filename:
- targets = MOREINFO_TARGETS
- outputPath = OUTPUT_PATH_MOREINFO
- elif "kills" in filename:
- targets = KILLS_TARGETS
- outputPath = OUTPUT_PATH_KILLS
- else:
- sys.exit("File name doesn't contain type") # TODO: fix
-
- if not isDuplicate or arguments.debug:
- # Open image and swap to RGB
- image = Image.open(file)
- rgbImage = Image.new("RGB", image.size, (255, 255, 255))
- rgbImage.paste(image, mask = image.split()[3])
- image.close()
-
- # Get data
- outputLine = filename + "\t"
- debugOutput = ""
- for i, target in enumerate(targets):
- debugFile = os.path.splitext(filename)[0] + "_" + str(i) + ".png"
-
- string = read_string_from_image(rgbImage, Box(target[1][0], target[1][1], target[2][0], target[2][1]), target[3], target[4], target[5], debugFolder + debugFile)
- debugOutput = debugOutput + " " + target[0] + ": " + string + "\n"
- if i:
- outputLine = outputLine + "\t"
- outputLine = outputLine + string
- return (fileNumber, filename, debugOutput, outputPath, outputLine, isDuplicate)
- else:
- return (fileNumber, filename, "", outputPath, "", isDuplicate)
-
- # Read text from a section of an image using Tesseract
- def read_string_from_image(rgbImage, box, is_number, inv, bonusRightTrim, debugFilePath):
- # Crop to correct dimensions
- rgbImage = rgbImage.crop((box.x, box.y, box.x2, box.y2))
-
- # Invert if flagged
- if inv: rgbImage = invert(rgbImage)
-
- # Apply Pillow filters to cut off artifacts
- rgbImage = autocontrast(rgbImage, cutoff=(0, 50))
-
- # Convert to OpenCV
- npImage=numpy.array(rgbImage)
-
- # Set colors to grayscale
- npImage=cv2.cvtColor(npImage, cv2.COLOR_BGR2GRAY)
-
- # Apply OpenCV Filters
- npImage = cv2.medianBlur(npImage, 3)
- _, npImage = cv2.threshold(npImage, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)
- # npImage = cv2.adaptiveThreshold(npImage, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2)
-
- # Convert to Pillow
- rgbImage = Image.fromarray(npImage)
-
- # Crop to content
- bbox = invert(rgbImage).getbbox()
- if bbox: rgbImage = rgbImage.crop((bbox[0], bbox[1], bbox[2] + bonusRightTrim, bbox[3]))
- if bbox: rgbImage = rgbImage.crop(invert(rgbImage).getbbox())
- rgbImage = expand(rgbImage, border=10, fill=255)
-
- if arguments.debug:
- rgbImage.save(debugFilePath)
-
- if is_number:
- return pytesseract.image_to_string(rgbImage, config="--psm 6 -c tessedit_char_whitelist=0123456789,").strip().replace('\n', ' ').replace('\r', '').replace('.', '').replace(',', '').replace('\t', ' ').replace(' ', '')
- else:
- return pytesseract.image_to_string(rgbImage, config="--psm 6").strip().replace('\n', ' ').replace('\r', '').replace('\t', ' ')
-
- # Write to output file
- def write_file():
- return
-
- # Initialize child processes (ignore SIGINT)
- def mpInitializer():
- signal.signal(signal.SIGINT, signal.SIG_IGN)
-
- # ----
- # Arguments
- # ----
-
- parser = argparse.ArgumentParser(description = PROGRAM_DESCRIPTION)
- parser.add_argument("-p", "--project", help = "project name", required = True)
- parser.add_argument("-f", "--file", help = "file name (globs accepted)", required = True)
- parser.add_argument("-o", "--output", help = "output file")
- parser.add_argument("-v", "--verbose", help = "be verbose", default = False, action = "store_true")
- parser.add_argument("--debug", help = "save debug images", default = False, action = "store_true")
- arguments = parser.parse_args()
-
- # ----
- # Program
- # ----
-
- # TODO: remove globals
- debugFolder = "debug" + "/"
-
- if __name__ == '__main__':
- # Create project folder
- projectFolder = "output/" + arguments.project + "/"
- if not os.path.exists(projectFolder):
- os.makedirs(projectFolder)
-
- # Create debug folder
- if arguments.debug:
- if not os.path.exists(debugFolder):
- os.makedirs(debugFolder)
-
- # Get files to read
- screenshots_to_read = glob.glob(arguments.file, recursive=True)
- screenshot_count = len(screenshots_to_read)
- if screenshot_count < 1: sys.exit("No files found.")
-
- # Get all previously scraped data # TODO: limit to filenames only
- alreadyScraped = ""
- for outputPath in [OUTPUT_PATH_PROFILE, OUTPUT_PATH_MOREINFO, OUTPUT_PATH_KILLS]:
- with open(projectFolder + outputPath, "w+", newline='', encoding='utf-8') as outputFile:
- alreadyScraped = alreadyScraped + outputFile.read()
-
- # Mark as duplicates
- for i, file in enumerate(screenshots_to_read):
- if os.path.basename(file) in alreadyScraped:
- screenshots_to_read[i] = (i, file, True)
- else:
- screenshots_to_read[i] = (i, file, False)
-
- # Scrape
- if arguments.verbose: print("Scraping", screenshot_count, "files")
-
- cpuCount = multiprocessing.cpu_count()
- mpPool = multiprocessing.Pool(cpuCount, initializer=mpInitializer)
-
- try:
- # Returns: (fileNumber, filename, debugOutput, outputPath, outputLine, isDuplicate)
- for result in mpPool.imap(read_file, screenshots_to_read):
- if arguments.verbose: print(result[0]+1, "/", screenshot_count, ": ", result[1], sep="")
- if result[5] and not arguments.debug:
- if arguments.verbose: print(" ", "already scraped.")
- else:
- if arguments.verbose: print(result[2])
- if not arguments.debug:
- with open(projectFolder + result[3], "a+", newline='', encoding='utf-8') as outputFile:
- outputFile.write(result[4] + "\n")
- except KeyboardInterrupt:
- print("Exiting...")
- mpPool.terminate()
- finally:
- mpPool.terminate()
- mpPool.join()
|