Scripts used to grab or read stats in RoK
Du kan inte välja fler än 25 ämnen Ämnen måste starta med en bokstav eller siffra, kan innehålla bindestreck ('-') och vara max 35 tecken långa.

rok-reader.py 8.4KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240
  1. import argparse
  2. import glob
  3. import os
  4. import multiprocessing
  5. from PIL import Image, ImageFilter
  6. from PIL.ImageOps import autocontrast, invert, grayscale, contain
  7. import pytesseract
  8. import signal
  9. import sys
  10. # ----
  11. # Program
  12. # ----
  13. PROGRAM_NAME = "RoK Reader"
  14. PROGRAM_VERSION = "0.3"
  15. PROGRAM_DESCRIPTION = "This program reads data from Rise of Kingdoms screenshots. It currently supports three user profile screenshots."
  16. # ----
  17. # Classes
  18. # ----
  19. class Box:
  20. def __init__(self, x, y, x2, y2):
  21. self.x = x
  22. self.y = y
  23. self.x2 = x2
  24. self.y2 = y2
  25. class RelativeBox:
  26. def __init__(self, x_distance, y_distance, width, height):
  27. self.x_distance = x_distance
  28. self.y_distance = y_distance
  29. self.width = width
  30. self.height = height
  31. # ----
  32. # Files
  33. # ----
  34. OUTPUT_PATH_PROFILE = "output-profile.csv"
  35. OUTPUT_PATH_MOREINFO = "output-more.csv"
  36. OUTPUT_PATH_KILLS = "output-kills.csv"
  37. # ----
  38. # Coordinates
  39. # ----
  40. # Name, Top Left, Bottom Right, Number, Invert, BonusRightTrim
  41. PROFILE_TARGETS = [
  42. ("ID", (1246, 375), (1445, 430), True, True, -10),
  43. ("Power", (1435, 585), (1733, 634), True, True, 0),
  44. ("Kill Points", (1806, 585), (2112, 633), True, True, 0),
  45. ("Alliance", (1025, 584), (1427, 637), False, True, 0),
  46. ("Civilization", (1884, 420), (2132, 486), False, True, 0)
  47. ]
  48. MOREINFO_TARGETS = [
  49. ("Power", (1305, 223), (1540, 274), True, True, 0),
  50. ("Kill Points", (1931, 222), (2188, 276), True, True, 0),
  51. ("Highest Power", (1815, 416), (2105, 483), True, True, 0),
  52. ("Victories", (1815, 515), (2105, 580), True, True, 0),
  53. ("Defeats", (1815, 613), (2105, 675), True, True, 0),
  54. ("Dead", (1815, 710), (2105, 771), True, True, 0),
  55. ("Scout Times", (1815, 806), (2105, 871), True, True, 0),
  56. ("Resources Gathered", (1815, 980), (2105, 1047), True, True, 0),
  57. ("Resource Assistance", (1815, 1077), (2105, 1144), True, True, 0),
  58. ("Alliance Help Times", (1815, 1174), (2105, 1238), True, True, 0)
  59. ]
  60. KILLS_TARGETS = [
  61. ("Kill Points", (1418, 312), (1694, 352), True, False, 0),
  62. ("T1 Kills", (1321, 637), (1538, 684), True, False, 0),
  63. ("T1 Kill Points", (1986, 637), (2212, 684), True, False, 0),
  64. ("T2 Kills", (1321, 702), (1538, 755), True, False, 0),
  65. ("T2 Kill Points", (1986, 702), (2212, 755), True, False, 0),
  66. ("T3 Kills", (1321, 770), (1538, 824), True, False, 0),
  67. ("T3 Kill Points", (1986, 770), (2212, 824), True, False, 0),
  68. ("T4 Kills", (1321, 847), (1538, 897), True, False, 0),
  69. ("T4 Kill Points", (1986, 847), (2212, 897), True, False, 0),
  70. ("T5 Kills", (1321, 918), (1538, 968), True, False, 0),
  71. ("T5 Kill Points", (1986, 918), (2212, 968), True, False, 0),
  72. ("Previous Kills", (1626, 985), (2228, 1039), False, False, 0)
  73. ]
  74. # ----
  75. # Functions
  76. # ----
  77. # Read an image file
  78. def read_file(fileTuple):
  79. fileNumber = fileTuple[0]
  80. file = fileTuple[1]
  81. isDuplicate = fileTuple[2]
  82. filename = os.path.basename(file)
  83. if "profile" in filename:
  84. targets = PROFILE_TARGETS
  85. outputPath = OUTPUT_PATH_PROFILE
  86. elif "more" in filename:
  87. targets = MOREINFO_TARGETS
  88. outputPath = OUTPUT_PATH_MOREINFO
  89. elif "kills" in filename:
  90. targets = KILLS_TARGETS
  91. outputPath = OUTPUT_PATH_KILLS
  92. else:
  93. sys.exit("File name doesn't contain type") # TODO: fix
  94. if not isDuplicate or arguments.debug:
  95. # Open image and swap to RGB
  96. image = Image.open(file)
  97. rgbImage = Image.new("RGB", image.size, (255, 255, 255))
  98. rgbImage.paste(image, mask = image.split()[3])
  99. image.close()
  100. # Get data
  101. outputLine = filename + "\t"
  102. debugOutput = ""
  103. for i, target in enumerate(targets):
  104. debugFile = os.path.splitext(filename)[0] + "_" + str(i) + ".png"
  105. string = read_string_from_image(rgbImage, Box(target[1][0], target[1][1], target[2][0], target[2][1]), target[3], target[4], target[5], debugFolder + debugFile)
  106. debugOutput = debugOutput + " " + target[0] + ": " + string + "\n"
  107. if i:
  108. outputLine = outputLine + "\t"
  109. outputLine = outputLine + string
  110. return (fileNumber, filename, debugOutput, outputPath, outputLine, isDuplicate)
  111. else:
  112. return (fileNumber, filename, "", outputPath, "", isDuplicate)
  113. # Read text from a section of an image using Tesseract
  114. def read_string_from_image(rgbImage, box, is_number, inv, bonusRightTrim, debugFilePath):
  115. # Crop to correct dimentions
  116. rgbImage = rgbImage.crop((box.x, box.y, box.x2, box.y2))
  117. # Invert if flagged
  118. if inv: rgbImage = invert(rgbImage)
  119. # Apply filters
  120. rgbImage = grayscale(rgbImage)
  121. rgbImage = autocontrast(rgbImage, cutoff=(0, 75))
  122. # Crop to content
  123. bbox = autocontrast(invert(rgbImage), cutoff=(0, 90)).getbbox()
  124. if bbox: rgbImage = rgbImage.crop((bbox[0], bbox[1], bbox[2] + bonusRightTrim, bbox[3]))
  125. # Resize and sharpen
  126. rgbImage = contain(rgbImage, (800, 800), method=1)
  127. rgbImage = rgbImage.filter(ImageFilter.EDGE_ENHANCE_MORE)
  128. rgbImage = rgbImage.filter(ImageFilter.SHARPEN)
  129. if arguments.debug:
  130. rgbImage.save(debugFilePath)
  131. if is_number:
  132. return pytesseract.image_to_string(rgbImage, config="--psm 6 -c tessedit_char_whitelist=0123456789,").strip().replace('\n', ' ').replace('\r', '').replace('.', '').replace(',', '').replace('\t', ' ').replace(' ', '')
  133. else:
  134. return pytesseract.image_to_string(rgbImage, config="--psm 6").strip().replace('\n', ' ').replace('\r', '').replace('\t', ' ')
  135. # Write to output file
  136. def write_file():
  137. return
  138. # Initialize child processes (ignore SIGINT)
  139. def mpInitializer():
  140. signal.signal(signal.SIGINT, signal.SIG_IGN)
  141. # ----
  142. # Arguments
  143. # ----
  144. parser = argparse.ArgumentParser(description = PROGRAM_DESCRIPTION)
  145. parser.add_argument("-p", "--project", help = "project name", required = True)
  146. parser.add_argument("-f", "--file", help = "file name (globs accepted)", required = True)
  147. parser.add_argument("-o", "--output", help = "output file")
  148. parser.add_argument("-v", "--verbose", help = "be verbose", default = False, action = "store_true")
  149. parser.add_argument("--debug", help = "save debug images", default = False, action = "store_true")
  150. arguments = parser.parse_args()
  151. # ----
  152. # Program
  153. # ----
  154. # TODO: remove globals
  155. debugFolder = "debug" + "/"
  156. if __name__ == '__main__':
  157. # Create project folder
  158. projectFolder = "output/" + arguments.project + "/"
  159. if not os.path.exists(projectFolder):
  160. os.makedirs(projectFolder)
  161. # Create debug folder
  162. if arguments.debug:
  163. if not os.path.exists(debugFolder):
  164. os.makedirs(debugFolder)
  165. # Get files to read
  166. screenshots_to_read = glob.glob(arguments.file, recursive=True)
  167. screenshot_count = len(screenshots_to_read)
  168. if screenshot_count < 1: sys.exit("No files found.")
  169. # Get all previously scraped data # TODO: limit to filenames only
  170. alreadyScraped = ""
  171. for outputPath in [OUTPUT_PATH_PROFILE, OUTPUT_PATH_MOREINFO, OUTPUT_PATH_KILLS]:
  172. with open(projectFolder + outputPath, "r+", newline='', encoding='utf-8') as outputFile:
  173. alreadyScraped = alreadyScraped + outputFile.read()
  174. # Mark as duplicates
  175. for i, file in enumerate(screenshots_to_read):
  176. if os.path.basename(file) in alreadyScraped:
  177. screenshots_to_read[i] = (i, file, True)
  178. else:
  179. screenshots_to_read[i] = (i, file, False)
  180. # Scrape
  181. if arguments.verbose: print("Scraping", screenshot_count, "files")
  182. cpuCount = multiprocessing.cpu_count()
  183. mpPool = multiprocessing.Pool(cpuCount, initializer=mpInitializer)
  184. try:
  185. # Returns: (fileNumber, filename, debugOutput, outputPath, outputLine, isDuplicate)
  186. for result in mpPool.imap(read_file, screenshots_to_read):
  187. if arguments.verbose: print(result[0]+1, "/", screenshot_count, ": ", result[1], sep="")
  188. if result[5] and not arguments.debug:
  189. if arguments.verbose: print(" ", "already scraped.")
  190. else:
  191. if arguments.verbose: print(result[2])
  192. if not arguments.debug:
  193. with open(projectFolder + result[3], "a+", newline='', encoding='utf-8') as outputFile:
  194. outputFile.write(result[4] + "\n")
  195. except KeyboardInterrupt:
  196. print("Exiting...")
  197. mpPool.terminate()
  198. finally:
  199. mpPool.terminate()
  200. mpPool.join()