Scripts used to grab or read stats in RoK
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

rok-reader.py 8.8KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253
  1. import argparse
  2. import cv2
  3. import glob
  4. import os
  5. import multiprocessing
  6. import numpy
  7. from PIL import Image, ImageFilter
  8. from PIL.ImageOps import autocontrast, invert, grayscale, contain, expand
  9. import pytesseract
  10. import signal
  11. import sys
  12. # ----
  13. # Program
  14. # ----
  15. PROGRAM_NAME = "RoK Reader"
  16. PROGRAM_VERSION = "0.3"
  17. PROGRAM_DESCRIPTION = "This program reads data from Rise of Kingdoms screenshots. It currently supports three user profile screenshots."
  18. # ----
  19. # Classes
  20. # ----
  21. class Box:
  22. def __init__(self, x, y, x2, y2):
  23. self.x = x
  24. self.y = y
  25. self.x2 = x2
  26. self.y2 = y2
  27. class RelativeBox:
  28. def __init__(self, x_distance, y_distance, width, height):
  29. self.x_distance = x_distance
  30. self.y_distance = y_distance
  31. self.width = width
  32. self.height = height
  33. # ----
  34. # Files
  35. # ----
  36. OUTPUT_PATH_PROFILE = "output-profile.csv"
  37. OUTPUT_PATH_MOREINFO = "output-more.csv"
  38. OUTPUT_PATH_KILLS = "output-kills.csv"
  39. # ----
  40. # Coordinates
  41. # ----
  42. # Name, Top Left, Bottom Right, Number, Invert, BonusRightTrim
  43. PROFILE_TARGETS = [
  44. ("ID", (1246, 375), (1445, 430), True, True, -10),
  45. ("Power", (1435, 585), (1733, 634), True, True, 0),
  46. ("Kill Points", (1806, 585), (2112, 633), True, True, 0),
  47. ("Alliance", (1025, 584), (1427, 637), False, True, 0),
  48. ("Civilization", (1884, 420), (2132, 486), False, True, 0)
  49. ]
  50. MOREINFO_TARGETS = [
  51. ("Power", (1305, 223), (1540, 274), True, True, 0),
  52. ("Kill Points", (1931, 222), (2188, 276), True, True, 0),
  53. ("Highest Power", (1815, 416), (2105, 483), True, True, 0),
  54. ("Victories", (1815, 515), (2105, 580), True, True, 0),
  55. ("Defeats", (1815, 613), (2105, 675), True, True, 0),
  56. ("Dead", (1815, 710), (2105, 771), True, True, 0),
  57. ("Scout Times", (1815, 806), (2105, 871), True, True, 0),
  58. ("Resources Gathered", (1815, 980), (2105, 1047), True, True, 0),
  59. ("Resource Assistance", (1815, 1077), (2105, 1144), True, True, 0),
  60. ("Alliance Help Times", (1815, 1174), (2105, 1238), True, True, 0)
  61. ]
  62. KILLS_TARGETS = [
  63. ("Kill Points", (1418, 312), (1694, 352), True, False, 0),
  64. ("T1 Kills", (1325, 637), (1538, 684), True, False, 0),
  65. ("T1 Kill Points", (1986, 637), (2212, 684), True, False, 0),
  66. ("T2 Kills", (1325, 702), (1538, 755), True, False, 0),
  67. ("T2 Kill Points", (1986, 702), (2212, 755), True, False, 0),
  68. ("T3 Kills", (1325, 770), (1538, 824), True, False, 0),
  69. ("T3 Kill Points", (1986, 770), (2212, 824), True, False, 0),
  70. ("T4 Kills", (1325, 847), (1538, 897), True, False, 0),
  71. ("T4 Kill Points", (1986, 847), (2212, 897), True, False, 0),
  72. ("T5 Kills", (1325, 918), (1538, 968), True, False, 0),
  73. ("T5 Kill Points", (1986, 918), (2212, 968), True, False, 0),
  74. ("Previous Kills", (1626, 985), (2228, 1039), False, False, -385)
  75. ]
  76. # ----
  77. # Functions
  78. # ----
  79. # Read an image file
  80. def read_file(fileTuple):
  81. fileNumber = fileTuple[0]
  82. file = fileTuple[1]
  83. isDuplicate = fileTuple[2]
  84. filename = os.path.basename(file)
  85. if "profile" in filename:
  86. targets = PROFILE_TARGETS
  87. outputPath = OUTPUT_PATH_PROFILE
  88. elif "more" in filename:
  89. targets = MOREINFO_TARGETS
  90. outputPath = OUTPUT_PATH_MOREINFO
  91. elif "kills" in filename:
  92. targets = KILLS_TARGETS
  93. outputPath = OUTPUT_PATH_KILLS
  94. else:
  95. sys.exit("File name doesn't contain type") # TODO: fix
  96. if not isDuplicate or arguments.debug:
  97. # Open image and swap to RGB
  98. image = Image.open(file)
  99. rgbImage = Image.new("RGB", image.size, (255, 255, 255))
  100. rgbImage.paste(image, mask = image.split()[3])
  101. image.close()
  102. # Get data
  103. outputLine = filename + "\t"
  104. debugOutput = ""
  105. for i, target in enumerate(targets):
  106. debugFile = os.path.splitext(filename)[0] + "_" + str(i) + ".png"
  107. string = read_string_from_image(rgbImage, Box(target[1][0], target[1][1], target[2][0], target[2][1]), target[3], target[4], target[5], debugFolder + debugFile)
  108. debugOutput = debugOutput + " " + target[0] + ": " + string + "\n"
  109. if i:
  110. outputLine = outputLine + "\t"
  111. outputLine = outputLine + string
  112. return (fileNumber, filename, debugOutput, outputPath, outputLine, isDuplicate)
  113. else:
  114. return (fileNumber, filename, "", outputPath, "", isDuplicate)
  115. # Read text from a section of an image using Tesseract
  116. def read_string_from_image(rgbImage, box, is_number, inv, bonusRightTrim, debugFilePath):
  117. # Crop to correct dimensions
  118. rgbImage = rgbImage.crop((box.x, box.y, box.x2, box.y2))
  119. # Invert if flagged
  120. if inv: rgbImage = invert(rgbImage)
  121. # Apply Pillow filters to cut off artifacts
  122. rgbImage = autocontrast(rgbImage, cutoff=(0, 50))
  123. # Convert to OpenCV
  124. npImage=numpy.array(rgbImage)
  125. # Set colors to grayscale
  126. npImage=cv2.cvtColor(npImage, cv2.COLOR_BGR2GRAY)
  127. # Apply OpenCV Filters
  128. npImage = cv2.medianBlur(npImage, 3)
  129. _, npImage = cv2.threshold(npImage, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)
  130. # npImage = cv2.adaptiveThreshold(npImage, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2)
  131. # Convert to Pillow
  132. rgbImage = Image.fromarray(npImage)
  133. # Crop to content
  134. bbox = invert(rgbImage).getbbox()
  135. if bbox: rgbImage = rgbImage.crop((bbox[0], bbox[1], bbox[2] + bonusRightTrim, bbox[3]))
  136. if bbox: rgbImage = rgbImage.crop(invert(rgbImage).getbbox())
  137. rgbImage = expand(rgbImage, border=10, fill=255)
  138. if arguments.debug:
  139. rgbImage.save(debugFilePath)
  140. if is_number:
  141. return pytesseract.image_to_string(rgbImage, config="--psm 6 -c tessedit_char_whitelist=0123456789,").strip().replace('\n', ' ').replace('\r', '').replace('.', '').replace(',', '').replace('\t', ' ').replace(' ', '')
  142. else:
  143. return pytesseract.image_to_string(rgbImage, config="--psm 6").strip().replace('\n', ' ').replace('\r', '').replace('\t', ' ')
  144. # Write to output file
  145. def write_file():
  146. return
  147. # Initialize child processes (ignore SIGINT)
  148. def mpInitializer():
  149. signal.signal(signal.SIGINT, signal.SIG_IGN)
  150. # ----
  151. # Arguments
  152. # ----
  153. parser = argparse.ArgumentParser(description = PROGRAM_DESCRIPTION)
  154. parser.add_argument("-p", "--project", help = "project name", required = True)
  155. parser.add_argument("-f", "--file", help = "file name (globs accepted)", required = True)
  156. parser.add_argument("-o", "--output", help = "output file")
  157. parser.add_argument("-v", "--verbose", help = "be verbose", default = False, action = "store_true")
  158. parser.add_argument("--debug", help = "save debug images", default = False, action = "store_true")
  159. arguments = parser.parse_args()
  160. # ----
  161. # Program
  162. # ----
  163. # TODO: remove globals
  164. debugFolder = "debug" + "/"
  165. if __name__ == '__main__':
  166. # Create project folder
  167. projectFolder = "output/" + arguments.project + "/"
  168. if not os.path.exists(projectFolder):
  169. os.makedirs(projectFolder)
  170. # Create debug folder
  171. if arguments.debug:
  172. if not os.path.exists(debugFolder):
  173. os.makedirs(debugFolder)
  174. # Get files to read
  175. screenshots_to_read = glob.glob(arguments.file, recursive=True)
  176. screenshot_count = len(screenshots_to_read)
  177. if screenshot_count < 1: sys.exit("No files found.")
  178. # Get all previously scraped data # TODO: limit to filenames only
  179. alreadyScraped = ""
  180. for outputPath in [OUTPUT_PATH_PROFILE, OUTPUT_PATH_MOREINFO, OUTPUT_PATH_KILLS]:
  181. with open(projectFolder + outputPath, "w+", newline='', encoding='utf-8') as outputFile:
  182. alreadyScraped = alreadyScraped + outputFile.read()
  183. # Mark as duplicates
  184. for i, file in enumerate(screenshots_to_read):
  185. if os.path.basename(file) in alreadyScraped:
  186. screenshots_to_read[i] = (i, file, True)
  187. else:
  188. screenshots_to_read[i] = (i, file, False)
  189. # Scrape
  190. if arguments.verbose: print("Scraping", screenshot_count, "files")
  191. cpuCount = multiprocessing.cpu_count()
  192. mpPool = multiprocessing.Pool(cpuCount, initializer=mpInitializer)
  193. try:
  194. # Returns: (fileNumber, filename, debugOutput, outputPath, outputLine, isDuplicate)
  195. for result in mpPool.imap(read_file, screenshots_to_read):
  196. if arguments.verbose: print(result[0]+1, "/", screenshot_count, ": ", result[1], sep="")
  197. if result[5] and not arguments.debug:
  198. if arguments.verbose: print(" ", "already scraped.")
  199. else:
  200. if arguments.verbose: print(result[2])
  201. if not arguments.debug:
  202. with open(projectFolder + result[3], "a+", newline='', encoding='utf-8') as outputFile:
  203. outputFile.write(result[4] + "\n")
  204. except KeyboardInterrupt:
  205. print("Exiting...")
  206. mpPool.terminate()
  207. finally:
  208. mpPool.terminate()
  209. mpPool.join()