@@ -1,229 +1,325 @@
import gspread
+import json
+import random
+from os import sys
from oauth2client.service_account import ServiceAccountCredentials
import datetime
import time
from nba_api.stats.static import players
from nba_api.stats.endpoints import playergamelog
+import timeout_decorator
+import urllib
-spreadsheet_key = '1QBQvGSMesox1gwjpaoK-0-p3n-c4I_L73PWCggjdayM'
+ from nba_api.library.debug.debug import DEBUG_STORAGE
+except ImportError:
+spreadsheet_key = '1ajlHmH-dUzwkVfD-4NgpkK8ni3I3UuUFcdefF_DUOyg'
json_keyfile = 'NBA Playoffs Game-1f9a46f0715c.json'
-current_day = 'today'
-nba_cooldown = 2
+day = 'today'
+nba_cooldown = random.gammavariate(alpha=9, beta=0.4)
+stats=['PTS', 'REB', 'AST', 'STL', 'BLK', 'TOV', 'WL']
+ 'Host': 'stats.nba.com',
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:72.0) Gecko/20100101 Firefox/72.0',
+ 'Accept': 'application/json, text/plain, */*',
+ 'Accept-Language': 'en-US,en;q=0.5',
+ 'Accept-Encoding': 'gzip, deflate, br',
+ 'x-nba-stats-origin': 'stats',
+ 'x-nba-stats-token': 'true',
+ 'Connection': 'keep-alive',
+ 'Referer': 'https://stats.nba.com/',
+ 'Pragma': 'no-cache',
+ 'Cache-Control': 'no-cache',
+proxies = [
+def buildProxyList(proxies=[], raw_text_url="https://raw.githubusercontent.com/clarketm/proxy-list/master/proxy-list-raw.txt"):
+ good_proxy_list = []
+ proxy_list = []
+ r = urllib.request.urlopen(raw_text_url)
+ for line in r:
+ line = line.decode("utf-8")
+ line = line.strip()
+ proxy_list.append(line)
+ random.shuffle(proxy_list)
+ proxy_list = proxies + proxy_list
+ return proxy_list, good_proxy_list
+Returns a worksheet instance
def getWorksheet(spreadsheet_key, json_keyfile):
- scope = ['https://spreadsheets.google.com/feeds',
- 'https://www.googleapis.com/auth/drive']
- credentials = ServiceAccountCredentials.from_json_keyfile_name(json_keyfile, scope)
- gc = gspread.authorize(credentials)
- spreadsheet = gc.open_by_key(spreadsheet_key)
- worksheet = spreadsheet.get_worksheet(0)
+ try:
+ scope = ['https://spreadsheets.google.com/feeds',
+ 'https://www.googleapis.com/auth/drive']
+ credentials = ServiceAccountCredentials.from_json_keyfile_name(json_keyfile, scope)
+ gc = gspread.authorize(credentials)
+ spreadsheet = gc.open_by_key(spreadsheet_key)
+ worksheet = spreadsheet.get_worksheet(0)
+ except Exception as e:
+ f"Exception: {str(e)}"
+ f"Could not retrieve worksheet!"
+ f"Check your API key, credentials, or network!"
+ raise(e)
return worksheet
-def setDates(current_day):
- if current_day == 'today':
+Returns a list of lists containing the values of all cells in the worksheet by row
+def getAllValues(worksheet):
+ return worksheet.get_all_values()
+Create various date variables based on "today's" day
+def setDates(day):
+ if day == 'today':
- current_date = datetime.datetime.now() - datetime.timedelta(hours=3)
- current_date = current_date.date()
- else:
- current_date = current_day
- url_date = current_date.strftime('%m/%d/%Y')
- current_year = current_date.year
- if current_date.month > 6:
- current_season = '{}-{}'.format(str(current_year),
- str(current_year + 1)[2:])
+ date = datetime.datetime.now() - datetime.timedelta(hours=3)
+ date = date.date()
- current_season = '{}-{}'.format(str(current_year - 1),
- str(current_year)[2:])
- return url_date, current_season, current_date
-def getFirstRowLastRow(worksheet, current_date):
- date_col_values = worksheet.col_values(1)
- final_row = len(date_col_values)
- date_cells = worksheet.range('A4:A'+str(final_row))
- first_row = ""
- last_row = ""
- for date_cell in date_cells:
- if first_row != "":
- if date_cell.row == final_row:
- last_row = final_row
- break
- elif date_cell.value != "":
- last_row = date_cell.row - 1
+ date = day
+ url_date = date.strftime('%m/%d/%Y')
+ year = date.year
+ season = f"{format(str(year - 1))}-{str(year)[2:]}"
+ return url_date, season, date
+Determines the number of players in the pool
+def getNumberOfParticipants(all_values):
+ count=0
+ for row_num, row in enumerate(all_values):
+ if row[0] != "" and row_num >= 4 and count == 0:
+ start=row_num
+ count+=1
+ elif row[0] != "" and row_num >= 4 and count == 1:
+ end=row_num
+ break
+ num_participants = end - start
+ return num_participants
+Determines the active day's first and last rows
+def getFirstRowLastRow(all_values, num_participants, current_date):
+ first_row = None
+ last_row = None
+ for row_num, row in enumerate(all_values, start=1):
+ date=row[0]
+ if date != "" and row_num >= 4:
+ day = datetime.datetime.strptime('{} {}'.format(date,
+ str(current_date.year)),
+ '%A, %B %d %Y')
+ if day.date() == current_date:
+ first_row = row_num
+ last_row = first_row + num_participants - 1
- continue
- elif date_cell.value != "":
- date_cell_day = datetime.datetime.strptime('{} {}'.format(date_cell.value,
- str(current_date.year)),
- '%A, %B %d %Y')
- date_cell_day = date_cell_day.date()
- if date_cell_day == current_date:
- first_row = date_cell.row
return first_row, last_row
-def parsePlayersCells(todays_cells):
- player_cells = []
- player_names = []
- for cell in todays_cells:
- if cell.col == 3 and cell.value != "" and cell.value[-7:] != "-FIX!!!":
- player_cells.append(cell)
- player_name = cleanPlayerName(cell)
- player_names.append(player_name)
- player_names_unique = list(dict.fromkeys(player_names))
- return player_cells, player_names_unique
-def cleanPlayerName(player_cell):
- raw_name = player_cell.value
- first_name_last_name = raw_name.split()
+Rudimentary way to reduce player name errors
+def cleanFirstNameLastName(player):
+ first_name_last_name = player.split()
first_name = first_name_last_name[0]
first_name = first_name.replace('.', '')
+ if first_name == "TJ":
+ first_name = "T.J."
+ elif first_name == "Donavan":
+ first_name = "Donovan"
last_name = first_name_last_name[1]
- player_name = first_name + ' ' + last_name
- return player_name
-def getPlayerStats(player_id, url_date, current_season):
- player_game_logs = playergamelog.PlayerGameLog(player_id=player_id,
- league_id_nullable='00',
- season_type_all_star='Playoffs',
- date_from_nullable=url_date,
- date_to_nullable=url_date)
- player_game_logs = player_game_logs.get_dict()
- player_game_logs_results = player_game_logs.get('resultSets')[0]
- player_game_logs_headers = player_game_logs_results.get('headers')
- if len(player_game_logs_results.get('rowSet')) < 1:
- player_stats_dict = None
- else:
- player_game_logs_values = player_game_logs_results.get('rowSet')[0]
- player_stats_dict = dict(zip(player_game_logs_headers, player_game_logs_values))
- return player_stats_dict
+ player_clean = first_name + ' ' + last_name
+ return player_clean
+Create a unique list of players that have been selected today
+Also, append misspelled players to batch_update_list to autofix on next push if we can
+def cleanPlayers(all_values, first_row, last_row, batch_update_list):
+ players_unique = []
+ for row_num, row in enumerate(all_values, start=1):
+ if first_row <= row_num < last_row:
+ player = row[2]
+ if player[-7:] != "-FIX!!!" and player != "":
+ if len(players.find_players_by_full_name(player)) > 0:
+ players_unique.append(player)
+ else:
+ player_clean = cleanFirstNameLastName(player)
+ if len(players.find_players_by_full_name(player_clean)) > 0:
+ all_values[row_num - 1][2] = player_clean
+ batch_update_list.append({'range': f'{indexToLetter(2)}{row_num}', 'values': [[player_clean]]})
+ players_unique.append(player_clean)
+ else:
+ print("Player:", player, "not found, please fix name!")
+ players_unique = list(dict.fromkeys(players_unique))
+ return players_unique, batch_update_list, all_values
+Pull player's gamelog from stats.nba.com based on the url_date and player_id
+def getStats(players_unique, url_date, season, proxy_list=[], good_proxy_list=[]):
-def buildStatsDict(player_names_unique, url_date, current_season):
stats_dict = {}
- for player_name in player_names_unique:
- player_info = players.find_players_by_full_name(player_name)
- if len(player_info) < 1:
- print("\nPlayer \"" + player_name + "\" not found, please fix name!")
- stats_dict[player_name] = "Fix Name!"
- continue
+ for player in players_unique:
+ player_info = players.find_players_by_full_name(player)
player_id = player_info[0].get('id')
- stats_dict[player_name] = getPlayerStats(player_id, url_date, current_season)
- print('{} {}{} {}{}'.format('Got', player_name, '\'s (ID:', str(player_id), ') stats from NBA.com'))
- time.sleep(nba_cooldown)
- return stats_dict
-def updatePlayerStatsLocal(todays_cells, player_cell, player_stats, update, player_update):
- for cell in todays_cells:
- if cell.row == player_cell.row:
- if (cell.col == 4 and player_stats.get("PTS") != "" and
- str(cell.value) != str(player_stats.get("PTS"))):
- cell.value = str(player_stats.get("PTS"))
- update = True
- player_update = True
- elif (cell.col == 5 and player_stats.get("REB") != "" and
- str(cell.value) != str(player_stats.get("REB"))):
- cell.value = str(player_stats.get("REB"))
- update = True
- player_update = True
- elif (cell.col == 6 and player_stats.get("AST") != "" and
- str(cell.value) != str(player_stats.get("AST"))):
- cell.value = str(player_stats.get("AST"))
- update = True
- player_update = True
- elif (cell.col == 7 and player_stats.get("STL") != "" and
- str(cell.value) != str(player_stats.get("STL"))):
- cell.value = str(player_stats.get("STL"))
- update = True
- player_update = True
- elif (cell.col == 8 and player_stats.get("BLK") != "" and
- str(cell.value) != str(player_stats.get("BLK"))):
- cell.value = str(player_stats.get("BLK"))
- update = True
- player_update = True
- elif (cell.col == 9 and player_stats.get("TOV") != "" and
- str(cell.value) != str(player_stats.get("TOV"))):
- cell.value = str(player_stats.get("TOV"))
- update = True
- player_update = True
- elif (cell.col == 10 and player_stats.get("WL") != "" and
- str(cell.value) != str(player_stats.get("WL"))):
- cell.value = str(player_stats.get("WL"))
- update = True
- player_update = True
- return todays_cells, update, player_update
+ while True:
+ if len(good_proxy_list) > 0:
+ proxy_list = good_proxy_list + proxy_list
+ proxy_list = list(dict.fromkeys(proxy_list))
+ request_proxy = proxy_list[0]
+ try:
+ print('Proxy:', request_proxy)
+ player_game_log = playergamelog.PlayerGameLog( player_id=player_id,
+ proxy=request_proxy,
+ season=season,
+ timeout=10,
+ league_id_nullable='00',
+ season_type_all_star='Playoffs',
+ date_from_nullable=url_date,
+ date_to_nullable=url_date,
+ )
+ print('Success!')
+ if request_proxy not in good_proxy_list:
+ good_proxy_list.append(request_proxy)
+ player_game_log_dict = player_game_log.get_dict()
+ if DEBUG_STORAGE is False:
+ time.sleep(nba_cooldown)
+ break
+ except OSError as e:
+ print(e)
+ if request_proxy in good_proxy_list:
+ good_proxy_list.remove(request_proxy)
+ else:
+ print('Proxy refused, removing', request_proxy)
+ proxy_list.remove(request_proxy)
+ continue
+ except Exception as e:
+ print(e)
+ print('Could not connect to the NBA API, sleeping for 30 seconds')
+ time.sleep(30)
+ player_game_log_results = player_game_log_dict.get('resultSets')[0]
+ player_game_log_headers = player_game_log_results.get('headers')
+ if len(player_game_log_results.get('rowSet')) < 1:
+ player_stats_dict = None
+ else:
+ player_game_log_stats = player_game_log_results.get('rowSet')[0]
+ player_stats_dict = dict(zip(player_game_log_headers, player_game_log_stats))
+ stats_dict[player] = player_stats_dict
+ return stats_dict, good_proxy_list
+Append stat cells that have changes to batch_update_list
+Also append player cells that need fixing to batch_update_list
+def cellsToUpdate(all_values, first_row, last_row, stats_dict, stats, batch_update_list):
+ for row_num, row in enumerate(all_values, start=1):
+ if first_row <= row_num <= last_row:
+ player_name = row[2]
+ if player_name[-7:] != "-FIX!!!" and player_name in stats_dict.keys():
+ if stats_dict[player_name] is not None:
+ player_stats = stats_dict[player_name]
+ if player_stats == "Fix!":
+ batch_update_list.append({'range': f'{indexToLetter(2)}{row_num}', 'values': [[f'{player_name}-FIX!!!']]})
+ continue
+ for col_num, stat in enumerate(stats, start=3):
+ pass
+ if str(player_stats[stat]) != str(row[col_num]) and player_stats[stat] is not None:
+ batch_update_list.append({'range': f'{indexToLetter(col_num)}{row_num}', 'values': [[f'{player_stats[stat]}']]}.copy())
+ return batch_update_list
+Convert zero-indexed column number to the appropriate column letter (A=0, B=1, C=2...)
+def indexToLetter(index):
+ return chr(ord('@')+int(index)+1)
+Push changes to Google Sheet
+def batchUpdate(batch_update_list):
+ if len(batch_update_list) > 1:
+ worksheet.batch_update(batch_update_list, value_input_option="USER_ENTERED")
+ else:
+ print('No update needed')
+ time.sleep(30)
if __name__ == "__main__":
+ proxy_list, good_proxy_list = buildProxyList(proxies=proxies, raw_text_url=proxy_url)
while True:
- url_date, current_season, current_date = setDates(current_day)
- print("Date: " + str(current_date))
+ batch_update_list = []
worksheet = getWorksheet(spreadsheet_key, json_keyfile)
- first_row, last_row = getFirstRowLastRow(worksheet, current_date)
- if first_row == "":
+ url_date, season, date = setDates(day)
+ print("Date: " + str(date))
+ all_values = getAllValues(worksheet)
+ num_participants = getNumberOfParticipants(all_values)
+ first_row, last_row = getFirstRowLastRow(all_values, num_participants, date)
+ if first_row is None:
print("No games today! Pausing for 1000 seconds...")
- todays_cells = worksheet.range(first_row, 3, last_row, 10)
- player_cells, player_names_unique = parsePlayersCells(todays_cells)
- print("Today's players:", end=' ', flush=True)
- print(player_names_unique)
- except gspread.exceptions.APIError:
- print("Google API overloaded, retrying in 10 seconds...")
- time.sleep(10)
- continue
- except:
+ players_unique, batch_update_list, all_values = cleanPlayers(all_values, first_row, last_row, batch_update_list)
+ stats_dict, good_proxy_list = getStats(players_unique, url_date, season, proxy_list=proxy_list, good_proxy_list=good_proxy_list)
+ batch_update_list = cellsToUpdate(all_values, first_row, last_row, stats_dict, stats, batch_update_list)
+ if len(batch_update_list) > 1:
+ print(batch_update_list)
+ batchUpdate(batch_update_list)
+ except Exception as e:
+ print(e)
+ print('Sleeping for 10 seconds')
- print("Retrieving player stats from NBA.com...")
- stats_dict = buildStatsDict(player_names_unique, url_date, current_season)
- update = False
- for player_cell in player_cells:
- player_update = False
- player_name = cleanPlayerName(player_cell)
- if stats_dict.get(player_name) != None:
- if stats_dict.get(player_name) == "Fix Name!":
- player_cell.value = str(player_cell.value) + "-FIX!!!"
- update = True
- else:
- player_stats = stats_dict.get(player_name)
- todays_cells, update, player_update = updatePlayerStatsLocal(todays_cells,
- player_cell,
- player_stats,
- update,
- player_update)
- if player_update == True:
- print("Updated " + player_name + "\'s stats")
- elif player_update == False:
- print(player_name + "\'s stats have not changed")
- if update == True:
- while True:
- try:
- print("Pushing changes to google sheet...")
- worksheet.update_cells(todays_cells, 'USER_ENTERED')
- break
- except gspread.exceptions.APIError:
- print("Google API overloaded, adding a 10 second delay...")
- time.sleep(10)
- continue
- else:
- print("No update necessary, pausing for 30 seconds...")
- time.sleep(30)
- continue