#!/usr/bin/env python3 import gspread import json import random from os import sys from oauth2client.service_account import ServiceAccountCredentials import datetime import time from nba_api.stats.static import players from nba_api.stats.endpoints import playergamelog import timeout_decorator import urllib try: from nba_api.library.debug.debug import DEBUG_STORAGE except ImportError: DEBUG_STORAGE = False #spreadsheet_key = '1QBQvGSMesox1gwjpaoK-0-p3n-c4I_L73PWCggjdayM' # 2019 Official #spreadsheet_key = '14pHOScaGXvN83iCca6_5p6QoViYvo223cIJD9nnl7TI' # 2019 Test #spreadsheet_key = '1n2qAxDhy3B-a20cn92H340GoPeKQE8fpztPlzKpGw80' # 2020 Test spreadsheet_key = '1ajlHmH-dUzwkVfD-4NgpkK8ni3I3UuUFcdefF_DUOyg' # 2020 Official json_keyfile = 'NBA Playoffs Game-1f9a46f0715c.json' day = 'today' # today, else: #day = datetime.date(2020, 8, 19) # set date manually nba_cooldown = random.gammavariate(alpha=9, beta=0.4) # don't hammer the NBA API stats=['PTS', 'REB', 'AST', 'STL', 'BLK', 'TOV', 'WL'] # stats appear in this order STATS_HEADERS = { 'Host': 'stats.nba.com', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:72.0) Gecko/20100101 Firefox/72.0', 'Accept': 'application/json, text/plain, */*', 'Accept-Language': 'en-US,en;q=0.5', 'Accept-Encoding': 'gzip, deflate, br', 'x-nba-stats-origin': 'stats', 'x-nba-stats-token': 'true', 'Connection': 'keep-alive', 'Referer': 'https://stats.nba.com/', 'Pragma': 'no-cache', 'Cache-Control': 'no-cache', } proxy_url="https://raw.githubusercontent.com/clarketm/proxy-list/master/proxy-list-raw.txt" proxies = [ ] ############################################################################### def buildProxyList(proxies=[], raw_text_url="https://raw.githubusercontent.com/clarketm/proxy-list/master/proxy-list-raw.txt"): good_proxy_list = [] proxy_list = [] r = urllib.request.urlopen(raw_text_url) for line in r: line = line.decode("utf-8") line = line.strip() proxy_list.append(line) random.shuffle(proxy_list) proxy_list = proxies + proxy_list return proxy_list, good_proxy_list """ Returns a worksheet instance """ def getWorksheet(spreadsheet_key, json_keyfile): try: scope = ['https://spreadsheets.google.com/feeds', 'https://www.googleapis.com/auth/drive'] credentials = ServiceAccountCredentials.from_json_keyfile_name(json_keyfile, scope) gc = gspread.authorize(credentials) spreadsheet = gc.open_by_key(spreadsheet_key) worksheet = spreadsheet.get_worksheet(0) except Exception as e: f"Exception: {str(e)}" f"Could not retrieve worksheet!" f"Check your API key, credentials, or network!" raise(e) return worksheet """ Returns a list of lists containing the values of all cells in the worksheet by row """ def getAllValues(worksheet): return worksheet.get_all_values() """ Create various date variables based on "today's" day """ def setDates(day): if day == 'today': # in case games go past midnight date = datetime.datetime.now() - datetime.timedelta(hours=3) date = date.date() else: date = day url_date = date.strftime('%m/%d/%Y') year = date.year season = f"{format(str(year - 1))}-{str(year)[2:]}" return url_date, season, date """ Determines the number of players in the pool """ def getNumberOfParticipants(all_values): count=0 for row_num, row in enumerate(all_values): if row[0] != "" and row_num >= 4 and count == 0: start=row_num count+=1 elif row[0] != "" and row_num >= 4 and count == 1: end=row_num break num_participants = end - start return num_participants """ Determines the active day's first and last rows """ def getFirstRowLastRow(all_values, num_participants, current_date): first_row = None last_row = None for row_num, row in enumerate(all_values, start=1): date=row[0] if date != "" and row_num >= 4: day = datetime.datetime.strptime('{} {}'.format(date, str(current_date.year)), '%A, %B %d %Y') if day.date() == current_date: first_row = row_num last_row = first_row + num_participants - 1 break return first_row, last_row """ Rudimentary way to reduce player name errors """ def cleanFirstNameLastName(player): first_name_last_name = player.split() first_name = first_name_last_name[0] first_name = first_name.replace('.', '') # New nickname for T.J. Warren should be "The Outlier" if first_name == "TJ": first_name = "T.J." elif first_name == "Donavan": first_name = "Donovan" last_name = first_name_last_name[1] player_clean = first_name + ' ' + last_name return player_clean """ Create a unique list of players that have been selected today Also, append misspelled players to batch_update_list to autofix on next push if we can """ def cleanPlayers(all_values, first_row, last_row, batch_update_list): players_unique = [] for row_num, row in enumerate(all_values, start=1): if first_row <= row_num <= last_row: player = row[2] if player[-7:] != "-FIX!!!" and player != "": if len(players.find_players_by_full_name(player)) > 0: players_unique.append(player) else: player_clean = cleanFirstNameLastName(player) if len(players.find_players_by_full_name(player_clean)) > 0: all_values[row_num - 1][2] = player_clean batch_update_list.append({'range': f'{indexToLetter(2)}{row_num}', 'values': [[player_clean]]}) players_unique.append(player_clean) else: print("Player:", player, "not found, please fix name!") players_unique = list(dict.fromkeys(players_unique)) return players_unique, batch_update_list, all_values """ Pull player's gamelog from stats.nba.com based on the url_date and player_id """ #@timeout_decorator.timeout(30) def getStats(players_unique, url_date, season, proxy_list=[], good_proxy_list=[]): stats_dict = {} for player in players_unique: player_info = players.find_players_by_full_name(player) player_id = player_info[0].get('id') while True: # Move working proxies to the front of the list if len(good_proxy_list) > 0: proxy_list = good_proxy_list + proxy_list # Remove duplicate proxies proxy_list = list(dict.fromkeys(proxy_list)) # Use the first proxy in the list request_proxy = proxy_list[0] try: print('Proxy:', request_proxy) player_game_log = playergamelog.PlayerGameLog( player_id=player_id, proxy=request_proxy, season=season, timeout=10, league_id_nullable='00', season_type_all_star='Playoffs', date_from_nullable=url_date, date_to_nullable=url_date, ) print('Success!') if request_proxy not in good_proxy_list: good_proxy_list.append(request_proxy) player_game_log_dict = player_game_log.get_dict() if DEBUG_STORAGE is False: time.sleep(nba_cooldown) break except OSError as e: print(e) if request_proxy in good_proxy_list: good_proxy_list.remove(request_proxy) else: print('Proxy refused, removing', request_proxy) proxy_list.remove(request_proxy) continue except Exception as e: print(e) print('Could not connect to the NBA API, sleeping for 30 seconds') time.sleep(30) player_game_log_results = player_game_log_dict.get('resultSets')[0] player_game_log_headers = player_game_log_results.get('headers') # if player has no stats for this day, list will be empty if len(player_game_log_results.get('rowSet')) < 1: player_stats_dict = None else: player_game_log_stats = player_game_log_results.get('rowSet')[0] player_stats_dict = dict(zip(player_game_log_headers, player_game_log_stats)) stats_dict[player] = player_stats_dict return stats_dict, good_proxy_list """ Append stat cells that have changes to batch_update_list Also append player cells that need fixing to batch_update_list """ def cellsToUpdate(all_values, first_row, last_row, stats_dict, stats, batch_update_list): for row_num, row in enumerate(all_values, start=1): if first_row <= row_num <= last_row: player_name = row[2] if player_name[-7:] != "-FIX!!!" and player_name in stats_dict.keys(): if stats_dict[player_name] is not None: player_stats = stats_dict[player_name] if player_stats == "Fix!": batch_update_list.append({'range': f'{indexToLetter(2)}{row_num}', 'values': [[f'{player_name}-FIX!!!']]}) continue for col_num, stat in enumerate(stats, start=3): pass #print(player_name, player_stats[stat]) #print(player_name, f'{indexToLetter(col_num)}{row_num}', str(row[col_num]), f',', player_stats[stat]) if str(player_stats[stat]) != str(row[col_num]) and player_stats[stat] is not None: #print('Update:', row_num, col_num, player_name, f'{indexToLetter(col_num)}{row_num}', str(row[col_num]), player_stats[stat]) batch_update_list.append({'range': f'{indexToLetter(col_num)}{row_num}', 'values': [[f'{player_stats[stat]}']]}.copy()) return batch_update_list """ Convert zero-indexed column number to the appropriate column letter (A=0, B=1, C=2...) """ def indexToLetter(index): return chr(ord('@')+int(index)+1) """ Push changes to Google Sheet """ def batchUpdate(batch_update_list): if len(batch_update_list) > 1: worksheet.batch_update(batch_update_list, value_input_option="USER_ENTERED") else: print('No update needed, sleeping for 1 minute') time.sleep(60) if __name__ == "__main__": # Use a combination of our good proxies with some fetched from the internet for variation proxy_list, good_proxy_list = buildProxyList(proxies=proxies, raw_text_url=proxy_url) while True: try: batch_update_list = [] worksheet = getWorksheet(spreadsheet_key, json_keyfile) url_date, season, date = setDates(day) print("Date: " + str(date)) all_values = getAllValues(worksheet) num_participants = getNumberOfParticipants(all_values) first_row, last_row = getFirstRowLastRow(all_values, num_participants, date) if first_row is None: print("No games today! Pausing for 1000 seconds...") time.sleep(1000) continue players_unique, batch_update_list, all_values = cleanPlayers(all_values, first_row, last_row, batch_update_list) stats_dict, good_proxy_list = getStats(players_unique, url_date, season, proxy_list=proxy_list, good_proxy_list=good_proxy_list) batch_update_list = cellsToUpdate(all_values, first_row, last_row, stats_dict, stats, batch_update_list) if len(batch_update_list) > 1: print(batch_update_list) batchUpdate(batch_update_list) except Exception as e: print(e) print('Sleeping for 10 seconds') time.sleep(10) continue