ashton
/
rok-stat


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253
							import argparse
import cv2
import glob
import os
import multiprocessing
import numpy
from PIL import Image, ImageFilter
from PIL.ImageOps import autocontrast, invert, grayscale, contain, expand
import pytesseract
import signal
import sys


# ----
# Program
# ----

PROGRAM_NAME = "RoK Reader"
PROGRAM_VERSION = "0.3"
PROGRAM_DESCRIPTION = "This program reads data from Rise of Kingdoms screenshots. It currently supports three user profile screenshots."

# ----
# Classes
# ----

class Box:
    def __init__(self, x, y, x2, y2):
        self.x = x
        self.y = y
        self.x2 = x2
        self.y2 = y2

class RelativeBox:
    def __init__(self, x_distance, y_distance, width, height):
        self.x_distance = x_distance
        self.y_distance = y_distance
        self.width = width
        self.height = height

# ----
# Files
# ----

OUTPUT_PATH_PROFILE = "output-profile.csv"
OUTPUT_PATH_MOREINFO = "output-more.csv"
OUTPUT_PATH_KILLS = "output-kills.csv"

# ----
# Coordinates
# ----

# Name, Top Left, Bottom Right, Number, Invert, BonusRightTrim

PROFILE_TARGETS = [
    ("ID", (1246, 375), (1445, 430), True, True, -10),
    ("Power", (1435, 585), (1733, 634), True, True, 0),
    ("Kill Points", (1806, 585), (2112, 633), True, True, 0),
    ("Alliance", (1025, 584), (1427, 637), False, True, 0),
    ("Civilization", (1884, 420), (2132, 486), False, True, 0)
]

MOREINFO_TARGETS = [
    ("Power", (1305, 223), (1540, 274), True, True, 0),
    ("Kill Points", (1931, 222), (2188, 276), True, True, 0),
    ("Highest Power", (1815, 416), (2105, 483), True, True, 0),
    ("Victories", (1815, 515), (2105, 580), True, True, 0),
    ("Defeats", (1815, 613), (2105, 675), True, True, 0),
    ("Dead", (1815, 710), (2105, 771), True, True, 0),
    ("Scout Times", (1815, 806), (2105, 871), True, True, 0),
    ("Resources Gathered", (1815, 980), (2105, 1047), True, True, 0),
    ("Resource Assistance", (1815, 1077), (2105, 1144), True, True, 0),
    ("Alliance Help Times", (1815, 1174), (2105, 1238), True, True, 0)
]

KILLS_TARGETS = [
    ("Kill Points", (1418, 312), (1694, 352), True, False, 0),
    ("T1 Kills", (1325, 637), (1538, 684), True, False, 0),
    ("T1 Kill Points", (1986, 637), (2212, 684), True, False, 0),
    ("T2 Kills", (1325, 702), (1538, 755), True, False, 0),
    ("T2 Kill Points", (1986, 702), (2212, 755), True, False, 0),
    ("T3 Kills", (1325, 770), (1538, 824), True, False, 0),
    ("T3 Kill Points", (1986, 770), (2212, 824), True, False, 0),
    ("T4 Kills", (1325, 847), (1538, 897), True, False, 0),
    ("T4 Kill Points", (1986, 847), (2212, 897), True, False, 0),
    ("T5 Kills", (1325, 918), (1538, 968), True, False, 0),
    ("T5 Kill Points", (1986, 918), (2212, 968), True, False, 0),
    ("Previous Kills", (1626, 985), (2228, 1039), False, False, -385)
]

# ----
# Functions
# ----

# Read an image file
def read_file(fileTuple):
    fileNumber = fileTuple[0]
    file = fileTuple[1]
    isDuplicate = fileTuple[2]

    filename = os.path.basename(file)

    if "profile" in filename:
        targets = PROFILE_TARGETS
        outputPath = OUTPUT_PATH_PROFILE
    elif "more" in filename:
        targets = MOREINFO_TARGETS
        outputPath = OUTPUT_PATH_MOREINFO
    elif "kills" in filename:
        targets = KILLS_TARGETS
        outputPath = OUTPUT_PATH_KILLS
    else:
        sys.exit("File name doesn't contain type") # TODO: fix

    if not isDuplicate or arguments.debug:
        # Open image and swap to RGB
        image = Image.open(file)
        rgbImage = Image.new("RGB", image.size, (255, 255, 255))
        rgbImage.paste(image, mask = image.split()[3])
        image.close()

        # Get data
        outputLine = filename + "\t"
        debugOutput = ""
        for i, target in enumerate(targets):
            debugFile = os.path.splitext(filename)[0] + "_" + str(i) + ".png"

            string = read_string_from_image(rgbImage, Box(target[1][0], target[1][1], target[2][0], target[2][1]), target[3], target[4], target[5], debugFolder + debugFile)
            debugOutput = debugOutput + "  " + target[0] + ": " + string + "\n"
            if i:
                outputLine = outputLine + "\t"
            outputLine = outputLine + string
        return (fileNumber, filename, debugOutput, outputPath, outputLine, isDuplicate)
    else:
        return (fileNumber, filename, "", outputPath, "", isDuplicate)

# Read text from a section of an image using Tesseract
def read_string_from_image(rgbImage, box, is_number, inv, bonusRightTrim, debugFilePath):
        # Crop to correct dimensions
        rgbImage = rgbImage.crop((box.x, box.y, box.x2, box.y2))

        # Invert if flagged
        if inv: rgbImage = invert(rgbImage)

        # Apply Pillow filters to cut off artifacts
        rgbImage = autocontrast(rgbImage, cutoff=(0, 50))

        # Convert to OpenCV
        npImage=numpy.array(rgbImage)

        # Set colors to grayscale
        npImage=cv2.cvtColor(npImage, cv2.COLOR_BGR2GRAY)

        # Apply OpenCV Filters
        npImage = cv2.medianBlur(npImage, 3)
        _, npImage = cv2.threshold(npImage, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)
        # npImage = cv2.adaptiveThreshold(npImage, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2)

        # Convert to Pillow
        rgbImage = Image.fromarray(npImage)

        # Crop to content
        bbox = invert(rgbImage).getbbox()
        if bbox: rgbImage = rgbImage.crop((bbox[0], bbox[1], bbox[2] + bonusRightTrim, bbox[3]))
        if bbox: rgbImage = rgbImage.crop(invert(rgbImage).getbbox())
        rgbImage = expand(rgbImage, border=10, fill=255)

        if arguments.debug:
            rgbImage.save(debugFilePath)

        if is_number:
            return pytesseract.image_to_string(rgbImage, config="--psm 6 -c tessedit_char_whitelist=0123456789,").strip().replace('\n', ' ').replace('\r', '').replace('.', '').replace(',', '').replace('\t', ' ').replace(' ', '')
        else: 
            return pytesseract.image_to_string(rgbImage, config="--psm 6").strip().replace('\n', ' ').replace('\r', '').replace('\t', ' ')

# Write to output file
def write_file():
    return

# Initialize child processes (ignore SIGINT)
def mpInitializer():
    signal.signal(signal.SIGINT, signal.SIG_IGN)

# ----
# Arguments
# ----

parser = argparse.ArgumentParser(description = PROGRAM_DESCRIPTION)
parser.add_argument("-p", "--project", help = "project name", required = True)
parser.add_argument("-f", "--file", help = "file name (globs accepted)", required = True)
parser.add_argument("-o", "--output", help = "output file")
parser.add_argument("-v", "--verbose", help = "be verbose", default = False, action = "store_true")
parser.add_argument("--debug", help = "save debug images", default = False, action = "store_true")
arguments = parser.parse_args()

# ----
# Program
# ----

# TODO: remove globals
debugFolder = "debug" + "/"

if __name__ == '__main__':
    # Create project folder
    projectFolder = "output/" + arguments.project + "/"
    if not os.path.exists(projectFolder):
        os.makedirs(projectFolder)

    # Create debug folder
    if arguments.debug:
        if not os.path.exists(debugFolder):
            os.makedirs(debugFolder)

    # Get files to read
    screenshots_to_read = glob.glob(arguments.file, recursive=True)
    screenshot_count = len(screenshots_to_read)
    if screenshot_count < 1: sys.exit("No files found.")

    # Get all previously scraped data # TODO: limit to filenames only
    alreadyScraped = ""
    for outputPath in [OUTPUT_PATH_PROFILE, OUTPUT_PATH_MOREINFO, OUTPUT_PATH_KILLS]:
        with open(projectFolder + outputPath, "w+", newline='', encoding='utf-8') as outputFile:
            alreadyScraped = alreadyScraped + outputFile.read()

    # Mark as duplicates
    for i, file in enumerate(screenshots_to_read):
        if os.path.basename(file) in alreadyScraped:
            screenshots_to_read[i] = (i, file, True)
        else:
            screenshots_to_read[i] = (i, file, False)

    # Scrape
    if arguments.verbose: print("Scraping", screenshot_count, "files")

    cpuCount = multiprocessing.cpu_count()
    mpPool = multiprocessing.Pool(cpuCount, initializer=mpInitializer)

    try:
        # Returns: (fileNumber, filename, debugOutput, outputPath, outputLine, isDuplicate)
        for result in mpPool.imap(read_file, screenshots_to_read):
            if arguments.verbose: print(result[0]+1, "/", screenshot_count, ": ", result[1], sep="")
            if result[5] and not arguments.debug:
                if arguments.verbose: print("  ", "already scraped.")
            else:
                if arguments.verbose: print(result[2])
                if not arguments.debug:
                    with open(projectFolder + result[3], "a+", newline='', encoding='utf-8') as outputFile:
                        outputFile.write(result[4] + "\n")
    except KeyboardInterrupt:
        print("Exiting...")
        mpPool.terminate()
    finally:
        mpPool.terminate()
        mpPool.join()