Scripts used to grab or read stats in RoK
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

rok-reader.py 9.3KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254
  1. import argparse
  2. import cv2
  3. import glob
  4. import os
  5. import multiprocessing
  6. import numpy
  7. from PIL import Image, ImageDraw, ImageFilter
  8. from PIL.ImageOps import autocontrast, contain, expand, grayscale, invert
  9. import pytesseract
  10. import signal
  11. import sys
  12. # ----
  13. # Program
  14. # ----
  15. PROGRAM_NAME = "RoK Reader"
  16. PROGRAM_VERSION = "0.3"
  17. PROGRAM_DESCRIPTION = "This program reads data from Rise of Kingdoms screenshots. It currently supports three user profile screenshots."
  18. # ----
  19. # Classes
  20. # ----
  21. class Box:
  22. def __init__(self, x, y, x2, y2):
  23. self.x = x
  24. self.y = y
  25. self.x2 = x2
  26. self.y2 = y2
  27. class RelativeBox:
  28. def __init__(self, x_distance, y_distance, width, height):
  29. self.x_distance = x_distance
  30. self.y_distance = y_distance
  31. self.width = width
  32. self.height = height
  33. # ----
  34. # Files
  35. # ----
  36. OUTPUT_PATH_PROFILE = "output-profile.csv"
  37. OUTPUT_PATH_MOREINFO = "output-more.csv"
  38. OUTPUT_PATH_KILLS = "output-kills.csv"
  39. # ----
  40. # Coordinates
  41. # ----
  42. # Name, Top Left, Bottom Right, IsNumber, Invert, BonusRightTrim
  43. PROFILE_TARGETS = [
  44. ("ID", (1242, 375), (1445, 430), True, True, -10),
  45. ("Power", (1435, 585), (1733, 634), True, True, 0),
  46. ("Kill Points", (1806, 585), (2112, 633), True, True, 0),
  47. ("Alliance", (1025, 584), (1427, 637), False, True, 0),
  48. ("Civilization", (1884, 420), (2132, 486), False, True, 0)
  49. ]
  50. MOREINFO_TARGETS = [
  51. ("Power", (1305, 223), (1540, 274), True, True, 0),
  52. ("Kill Points", (1933, 222), (2188, 276), True, True, 0),
  53. ("Highest Power", (1815, 416), (2105, 483), True, True, 0),
  54. ("Victories", (1815, 515), (2105, 580), True, True, 0),
  55. ("Defeats", (1815, 613), (2105, 675), True, True, 0),
  56. ("Dead", (1815, 710), (2105, 771), True, True, 0),
  57. ("Scout Times", (1815, 806), (2105, 871), True, True, 0),
  58. ("Resources Gathered", (1815, 980), (2105, 1047), True, True, 0),
  59. ("Resource Assistance", (1815, 1077), (2105, 1144), True, True, 0),
  60. ("Alliance Help Times", (1815, 1174), (2105, 1238), True, True, 0)
  61. ]
  62. KILLS_TARGETS = [
  63. ("Kill Points", (1418, 312), (1694, 352), True, False, 0),
  64. ("T1 Kills", (1323, 637), (1538, 684), True, False, 0),
  65. ("T1 Kill Points", (1986, 637), (2212, 684), True, False, 0),
  66. ("T2 Kills", (1323, 702), (1538, 755), True, False, 0),
  67. ("T2 Kill Points", (1986, 702), (2212, 755), True, False, 0),
  68. ("T3 Kills", (1323, 770), (1538, 824), True, False, 0),
  69. ("T3 Kill Points", (1986, 770), (2212, 824), True, False, 0),
  70. ("T4 Kills", (1323, 847), (1538, 897), True, False, 0),
  71. ("T4 Kill Points", (1986, 847), (2212, 897), True, False, 0),
  72. ("T5 Kills", (1323, 918), (1538, 968), True, False, 0),
  73. ("T5 Kill Points", (1986, 918), (2212, 968), True, False, 0),
  74. ("Previous Kills", (1626, 985), (2228, 1039), True, False, -385)
  75. ]
  76. # ----
  77. # Functions
  78. # ----
  79. # Read an image file
  80. def read_file(fileTuple):
  81. fileNumber = fileTuple[0]
  82. file = fileTuple[1]
  83. isDuplicate = fileTuple[2]
  84. filename = os.path.basename(file)
  85. if "profile" in filename:
  86. targets = PROFILE_TARGETS
  87. outputPath = OUTPUT_PATH_PROFILE
  88. elif "more" in filename:
  89. targets = MOREINFO_TARGETS
  90. outputPath = OUTPUT_PATH_MOREINFO
  91. elif "kills" in filename:
  92. targets = KILLS_TARGETS
  93. outputPath = OUTPUT_PATH_KILLS
  94. else:
  95. sys.exit("File name doesn't contain type") # TODO: fix
  96. if not isDuplicate or arguments.debug:
  97. # Open image and swap to RGB
  98. image = Image.open(file)
  99. rgbImage = Image.new("RGB", image.size, (255, 255, 255))
  100. rgbImage.paste(image, mask = image.split()[3])
  101. if arguments.debug:
  102. debugImage = Image.new("RGB", rgbImage.size, (255, 255, 255))
  103. debugImage.paste(rgbImage, mask = image.split()[3])
  104. draw = ImageDraw.Draw(debugImage)
  105. image.close()
  106. # Get data
  107. outputLine = filename + "\t"
  108. debugOutput = ""
  109. for i, target in enumerate(targets):
  110. debugFile = os.path.splitext(filename)[0] + "_" + str(i) + ".png"
  111. string = read_string_from_image(rgbImage, Box(target[1][0], target[1][1], target[2][0], target[2][1]), target[3], target[4], target[5], debugFolder + debugFile)
  112. debugOutput = debugOutput + " " + target[0] + ": " + string + "\n"
  113. if i:
  114. outputLine = outputLine + "\t"
  115. outputLine = outputLine + string
  116. if arguments.debug: draw.rectangle([target[1][0], target[1][1], target[2][0], target[2][1]], outline="rgb(255,0,0)")
  117. if arguments.debug: debugImage.save(debugFolder + os.path.splitext(filename)[0] + "_debug" + ".png")
  118. return (fileNumber, filename, debugOutput, outputPath, outputLine, isDuplicate)
  119. else:
  120. return (fileNumber, filename, "", outputPath, "", isDuplicate)
  121. # Read text from a section of an image using Tesseract
  122. def read_string_from_image(rgbImage, box, is_number, inv, bonusRightTrim, debugFilePath):
  123. # Crop to correct dimensions and invert if flagged
  124. rgbImage = rgbImage.crop((box.x, box.y, box.x2, box.y2))
  125. if inv: rgbImage = invert(rgbImage)
  126. # Apply Pillow filters to cut off artifacts
  127. rgbImage = autocontrast(rgbImage, cutoff=(0, 50))
  128. # Convert to OpenCV and swap to grayscale
  129. npImage = numpy.array(rgbImage)
  130. npImage = cv2.cvtColor(npImage, cv2.COLOR_BGR2GRAY)
  131. # Apply OpenCV Filters
  132. npImage = cv2.medianBlur(npImage, 3)
  133. _, npImage = cv2.threshold(npImage, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)
  134. # npImage = cv2.adaptiveThreshold(npImage, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2)
  135. # Convert back to Pillow and crop to content
  136. rgbImage = Image.fromarray(npImage)
  137. bbox = invert(rgbImage).getbbox()
  138. if bbox: rgbImage = rgbImage.crop((bbox[0], bbox[1], bbox[2] + bonusRightTrim, bbox[3]))
  139. if bbox: rgbImage = rgbImage.crop(invert(rgbImage).getbbox())
  140. rgbImage = expand(rgbImage, border=10, fill=255)
  141. # Save debug image
  142. if arguments.debug: rgbImage.save(debugFilePath)
  143. # Run Tesseract (depends whether it is a number or not)
  144. if is_number:
  145. return pytesseract.image_to_string(rgbImage, config="--psm 6 -c tessedit_char_whitelist=0123456789,").strip().replace('\n', ' ').replace('\r', '').replace('.', '').replace(',', '').replace('\t', ' ').replace(' ', '')
  146. else:
  147. return pytesseract.image_to_string(rgbImage, config="--psm 6").strip().replace('\n', ' ').replace('\r', '').replace('\t', ' ')
  148. # Write to output file
  149. def write_file():
  150. return
  151. # Initialize child processes (ignore SIGINT)
  152. def mpInitializer():
  153. signal.signal(signal.SIGINT, signal.SIG_IGN)
  154. # ----
  155. # Arguments
  156. # ----
  157. parser = argparse.ArgumentParser(description = PROGRAM_DESCRIPTION)
  158. parser.add_argument("-p", "--project", help = "project name", required = True)
  159. parser.add_argument("-f", "--file", help = "file name (globs accepted)", required = True)
  160. parser.add_argument("-o", "--output", help = "output file")
  161. parser.add_argument("-v", "--verbose", help = "be verbose", default = False, action = "store_true")
  162. parser.add_argument("--debug", help = "save debug images", default = False, action = "store_true")
  163. arguments = parser.parse_args()
  164. # ----
  165. # Program
  166. # ----
  167. # TODO: remove globals
  168. debugFolder = "debug" + "/"
  169. if __name__ == '__main__':
  170. # Create project folder
  171. projectFolder = "output/" + arguments.project + "/"
  172. if not os.path.exists(projectFolder):
  173. os.makedirs(projectFolder)
  174. # Create debug folder
  175. if arguments.debug:
  176. if not os.path.exists(debugFolder):
  177. os.makedirs(debugFolder)
  178. # Get files to read
  179. screenshots_to_read = glob.glob(arguments.file, recursive=True)
  180. screenshot_count = len(screenshots_to_read)
  181. if screenshot_count < 1: sys.exit("No files found.")
  182. # Get all previously scraped data # TODO: limit to filenames only
  183. alreadyScraped = ""
  184. for outputPath in [OUTPUT_PATH_PROFILE, OUTPUT_PATH_MOREINFO, OUTPUT_PATH_KILLS]:
  185. with open(projectFolder + outputPath, "w+", newline='', encoding='utf-8') as outputFile:
  186. alreadyScraped = alreadyScraped + outputFile.read()
  187. # Mark as duplicates
  188. for i, file in enumerate(screenshots_to_read):
  189. if os.path.basename(file) in alreadyScraped:
  190. screenshots_to_read[i] = (i, file, True)
  191. else:
  192. screenshots_to_read[i] = (i, file, False)
  193. # Scrape
  194. if arguments.verbose: print("Scraping", screenshot_count, "files")
  195. cpuCount = multiprocessing.cpu_count()
  196. mpPool = multiprocessing.Pool(cpuCount, initializer=mpInitializer)
  197. try:
  198. # Returns: (fileNumber, filename, debugOutput, outputPath, outputLine, isDuplicate)
  199. for result in mpPool.imap(read_file, screenshots_to_read):
  200. if arguments.verbose: print(result[0]+1, "/", screenshot_count, ": ", result[1], sep="")
  201. if result[5] and not arguments.debug:
  202. if arguments.verbose: print(" ", "already scraped.")
  203. else:
  204. if arguments.verbose: print(result[2])
  205. if not arguments.debug:
  206. with open(projectFolder + result[3], "a+", newline='', encoding='utf-8') as outputFile:
  207. outputFile.write(result[4] + "\n")
  208. except KeyboardInterrupt:
  209. print("Exiting...")
  210. mpPool.terminate()
  211. finally:
  212. mpPool.terminate()
  213. mpPool.join()