nba_playoffs_game_updater.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325
  1. #!/usr/bin/env python3
  2. import gspread
  3. import json
  4. import random
  5. from os import sys
  6. from oauth2client.service_account import ServiceAccountCredentials
  7. import datetime
  8. import time
  9. from nba_api.stats.static import players
  10. from nba_api.stats.endpoints import playergamelog
  11. import timeout_decorator
  12. import urllib
  13. try:
  14. from nba_api.library.debug.debug import DEBUG_STORAGE
  15. except ImportError:
  16. DEBUG_STORAGE = False
  17. #spreadsheet_key = '1QBQvGSMesox1gwjpaoK-0-p3n-c4I_L73PWCggjdayM' # 2019 Official
  18. #spreadsheet_key = '14pHOScaGXvN83iCca6_5p6QoViYvo223cIJD9nnl7TI' # 2019 Test
  19. #spreadsheet_key = '1n2qAxDhy3B-a20cn92H340GoPeKQE8fpztPlzKpGw80' # 2020 Test
  20. spreadsheet_key = '1ajlHmH-dUzwkVfD-4NgpkK8ni3I3UuUFcdefF_DUOyg' # 2020 Official
  21. json_keyfile = 'NBA Playoffs Game-1f9a46f0715c.json'
  22. day = 'today' # today, else:
  23. #day = datetime.date(2020, 8, 19) # set date manually
  24. nba_cooldown = random.gammavariate(alpha=9, beta=0.4) # don't hammer the NBA API
  25. stats=['PTS', 'REB', 'AST', 'STL', 'BLK', 'TOV', 'WL'] # stats appear in this order
  26. STATS_HEADERS = {
  27. 'Host': 'stats.nba.com',
  28. 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:72.0) Gecko/20100101 Firefox/72.0',
  29. 'Accept': 'application/json, text/plain, */*',
  30. 'Accept-Language': 'en-US,en;q=0.5',
  31. 'Accept-Encoding': 'gzip, deflate, br',
  32. 'x-nba-stats-origin': 'stats',
  33. 'x-nba-stats-token': 'true',
  34. 'Connection': 'keep-alive',
  35. 'Referer': 'https://stats.nba.com/',
  36. 'Pragma': 'no-cache',
  37. 'Cache-Control': 'no-cache',
  38. }
  39. proxy_url="https://raw.githubusercontent.com/clarketm/proxy-list/master/proxy-list-raw.txt"
  40. proxies = [
  41. ]
  42. ###############################################################################
  43. def buildProxyList(proxies=[], raw_text_url="https://raw.githubusercontent.com/clarketm/proxy-list/master/proxy-list-raw.txt"):
  44. good_proxy_list = []
  45. proxy_list = []
  46. r = urllib.request.urlopen(raw_text_url)
  47. for line in r:
  48. line = line.decode("utf-8")
  49. line = line.strip()
  50. proxy_list.append(line)
  51. random.shuffle(proxy_list)
  52. proxy_list = proxies + proxy_list
  53. return proxy_list, good_proxy_list
  54. """
  55. Returns a worksheet instance
  56. """
  57. def getWorksheet(spreadsheet_key, json_keyfile):
  58. try:
  59. scope = ['https://spreadsheets.google.com/feeds',
  60. 'https://www.googleapis.com/auth/drive']
  61. credentials = ServiceAccountCredentials.from_json_keyfile_name(json_keyfile, scope)
  62. gc = gspread.authorize(credentials)
  63. spreadsheet = gc.open_by_key(spreadsheet_key)
  64. worksheet = spreadsheet.get_worksheet(0)
  65. except Exception as e:
  66. f"Exception: {str(e)}"
  67. f"Could not retrieve worksheet!"
  68. f"Check your API key, credentials, or network!"
  69. raise(e)
  70. return worksheet
  71. """
  72. Returns a list of lists containing the values of all cells in the worksheet by row
  73. """
  74. def getAllValues(worksheet):
  75. return worksheet.get_all_values()
  76. """
  77. Create various date variables based on "today's" day
  78. """
  79. def setDates(day):
  80. if day == 'today':
  81. # in case games go past midnight
  82. date = datetime.datetime.now() - datetime.timedelta(hours=3)
  83. date = date.date()
  84. else:
  85. date = day
  86. url_date = date.strftime('%m/%d/%Y')
  87. year = date.year
  88. season = f"{format(str(year - 1))}-{str(year)[2:]}"
  89. return url_date, season, date
  90. """
  91. Determines the number of players in the pool
  92. """
  93. def getNumberOfParticipants(all_values):
  94. count=0
  95. for row_num, row in enumerate(all_values):
  96. if row[0] != "" and row_num >= 4 and count == 0:
  97. start=row_num
  98. count+=1
  99. elif row[0] != "" and row_num >= 4 and count == 1:
  100. end=row_num
  101. break
  102. num_participants = end - start
  103. return num_participants
  104. """
  105. Determines the active day's first and last rows
  106. """
  107. def getFirstRowLastRow(all_values, num_participants, current_date):
  108. first_row = None
  109. last_row = None
  110. for row_num, row in enumerate(all_values, start=1):
  111. date=row[0]
  112. if date != "" and row_num >= 4:
  113. day = datetime.datetime.strptime('{} {}'.format(date,
  114. str(current_date.year)),
  115. '%A, %B %d %Y')
  116. if day.date() == current_date:
  117. first_row = row_num
  118. last_row = first_row + num_participants - 1
  119. break
  120. return first_row, last_row
  121. """
  122. Rudimentary way to reduce player name errors
  123. """
  124. def cleanFirstNameLastName(player):
  125. first_name_last_name = player.split()
  126. first_name = first_name_last_name[0]
  127. first_name = first_name.replace('.', '')
  128. # New nickname for T.J. Warren should be "The Outlier"
  129. if first_name == "TJ":
  130. first_name = "T.J."
  131. elif first_name == "Donavan":
  132. first_name = "Donovan"
  133. last_name = first_name_last_name[1]
  134. player_clean = first_name + ' ' + last_name
  135. return player_clean
  136. """
  137. Create a unique list of players that have been selected today
  138. Also, append misspelled players to batch_update_list to autofix on next push if we can
  139. """
  140. def cleanPlayers(all_values, first_row, last_row, batch_update_list):
  141. players_unique = []
  142. for row_num, row in enumerate(all_values, start=1):
  143. if first_row <= row_num <= last_row:
  144. player = row[2]
  145. if player[-7:] != "-FIX!!!" and player != "":
  146. if len(players.find_players_by_full_name(player)) > 0:
  147. players_unique.append(player)
  148. else:
  149. player_clean = cleanFirstNameLastName(player)
  150. if len(players.find_players_by_full_name(player_clean)) > 0:
  151. all_values[row_num - 1][2] = player_clean
  152. batch_update_list.append({'range': f'{indexToLetter(2)}{row_num}', 'values': [[player_clean]]})
  153. players_unique.append(player_clean)
  154. else:
  155. print("Player:", player, "not found, please fix name!")
  156. players_unique = list(dict.fromkeys(players_unique))
  157. return players_unique, batch_update_list, all_values
  158. """
  159. Pull player's gamelog from stats.nba.com based on the url_date and player_id
  160. """
  161. #@timeout_decorator.timeout(30)
  162. def getStats(players_unique, url_date, season, proxy_list=[], good_proxy_list=[]):
  163. stats_dict = {}
  164. for player in players_unique:
  165. player_info = players.find_players_by_full_name(player)
  166. player_id = player_info[0].get('id')
  167. while True:
  168. # Move working proxies to the front of the list
  169. if len(good_proxy_list) > 0:
  170. proxy_list = good_proxy_list + proxy_list
  171. # Remove duplicate proxies
  172. proxy_list = list(dict.fromkeys(proxy_list))
  173. # Use the first proxy in the list
  174. request_proxy = proxy_list[0]
  175. try:
  176. print('Proxy:', request_proxy)
  177. player_game_log = playergamelog.PlayerGameLog( player_id=player_id,
  178. proxy=request_proxy,
  179. season=season,
  180. timeout=10,
  181. league_id_nullable='00',
  182. season_type_all_star='Playoffs',
  183. date_from_nullable=url_date,
  184. date_to_nullable=url_date,
  185. )
  186. print('Success!')
  187. if request_proxy not in good_proxy_list:
  188. good_proxy_list.append(request_proxy)
  189. player_game_log_dict = player_game_log.get_dict()
  190. if DEBUG_STORAGE is False:
  191. time.sleep(nba_cooldown)
  192. break
  193. except OSError as e:
  194. print(e)
  195. if request_proxy in good_proxy_list:
  196. good_proxy_list.remove(request_proxy)
  197. else:
  198. print('Proxy refused, removing', request_proxy)
  199. proxy_list.remove(request_proxy)
  200. continue
  201. except Exception as e:
  202. print(e)
  203. print('Could not connect to the NBA API, sleeping for 30 seconds')
  204. time.sleep(30)
  205. player_game_log_results = player_game_log_dict.get('resultSets')[0]
  206. player_game_log_headers = player_game_log_results.get('headers')
  207. # if player has no stats for this day, list will be empty
  208. if len(player_game_log_results.get('rowSet')) < 1:
  209. player_stats_dict = None
  210. else:
  211. player_game_log_stats = player_game_log_results.get('rowSet')[0]
  212. player_stats_dict = dict(zip(player_game_log_headers, player_game_log_stats))
  213. stats_dict[player] = player_stats_dict
  214. return stats_dict, good_proxy_list
  215. """
  216. Append stat cells that have changes to batch_update_list
  217. Also append player cells that need fixing to batch_update_list
  218. """
  219. def cellsToUpdate(all_values, first_row, last_row, stats_dict, stats, batch_update_list):
  220. for row_num, row in enumerate(all_values, start=1):
  221. if first_row <= row_num <= last_row:
  222. player_name = row[2]
  223. if player_name[-7:] != "-FIX!!!" and player_name in stats_dict.keys():
  224. if stats_dict[player_name] is not None:
  225. player_stats = stats_dict[player_name]
  226. if player_stats == "Fix!":
  227. batch_update_list.append({'range': f'{indexToLetter(2)}{row_num}', 'values': [[f'{player_name}-FIX!!!']]})
  228. continue
  229. for col_num, stat in enumerate(stats, start=3):
  230. pass
  231. #print(player_name, player_stats[stat])
  232. #print(player_name, f'{indexToLetter(col_num)}{row_num}', str(row[col_num]), f',', player_stats[stat])
  233. if str(player_stats[stat]) != str(row[col_num]) and player_stats[stat] is not None:
  234. #print('Update:', row_num, col_num, player_name, f'{indexToLetter(col_num)}{row_num}', str(row[col_num]), player_stats[stat])
  235. batch_update_list.append({'range': f'{indexToLetter(col_num)}{row_num}', 'values': [[f'{player_stats[stat]}']]}.copy())
  236. return batch_update_list
  237. """
  238. Convert zero-indexed column number to the appropriate column letter (A=0, B=1, C=2...)
  239. """
  240. def indexToLetter(index):
  241. return chr(ord('@')+int(index)+1)
  242. """
  243. Push changes to Google Sheet
  244. """
  245. def batchUpdate(batch_update_list):
  246. if len(batch_update_list) > 1:
  247. worksheet.batch_update(batch_update_list, value_input_option="USER_ENTERED")
  248. else:
  249. print('No update needed, sleeping for 1 minute')
  250. time.sleep(60)
  251. if __name__ == "__main__":
  252. # Use a combination of our good proxies with some fetched from the internet for variation
  253. proxy_list, good_proxy_list = buildProxyList(proxies=proxies, raw_text_url=proxy_url)
  254. while True:
  255. try:
  256. batch_update_list = []
  257. worksheet = getWorksheet(spreadsheet_key, json_keyfile)
  258. url_date, season, date = setDates(day)
  259. print("Date: " + str(date))
  260. all_values = getAllValues(worksheet)
  261. num_participants = getNumberOfParticipants(all_values)
  262. first_row, last_row = getFirstRowLastRow(all_values, num_participants, date)
  263. if first_row is None:
  264. print("No games today! Pausing for 1000 seconds...")
  265. time.sleep(1000)
  266. continue
  267. players_unique, batch_update_list, all_values = cleanPlayers(all_values, first_row, last_row, batch_update_list)
  268. stats_dict, good_proxy_list = getStats(players_unique, url_date, season, proxy_list=proxy_list, good_proxy_list=good_proxy_list)
  269. batch_update_list = cellsToUpdate(all_values, first_row, last_row, stats_dict, stats, batch_update_list)
  270. if len(batch_update_list) > 1:
  271. print(batch_update_list)
  272. batchUpdate(batch_update_list)
  273. except Exception as e:
  274. print(e)
  275. print('Sleeping for 10 seconds')
  276. time.sleep(10)
  277. continue