123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380 |
- #!/usr/bin/env python3
- import gspread
- import json
- import random
- from os import sys
- from oauth2client.service_account import ServiceAccountCredentials
- import datetime
- import time
- from nba_api.stats.static import players
- from nba_api.stats.endpoints import playergamelog
- import timeout_decorator
- import urllib
- try:
- from nba_api.library.debug.debug import DEBUG_STORAGE
- except ImportError:
- DEBUG_STORAGE = False
- #spreadsheet_key = '1QBQvGSMesox1gwjpaoK-0-p3n-c4I_L73PWCggjdayM' # 2019 Official
- #spreadsheet_key = '14pHOScaGXvN83iCca6_5p6QoViYvo223cIJD9nnl7TI' # 2019 Test
- #spreadsheet_key = '1n2qAxDhy3B-a20cn92H340GoPeKQE8fpztPlzKpGw80' # 2020 Test
- #spreadsheet_key = '1ajlHmH-dUzwkVfD-4NgpkK8ni3I3UuUFcdefF_DUOyg' # 2020 Official
- #spreadsheet_key = '1FgoBfPw4Vhi89rcYgxnAxLTeJ84GLXD1r39K0DXqxq4' # 2021 Official
- #spreadsheet_key = '1Gt4J1nNOv1E3-gikopSJQCD7nAqx7DEYxUqsZNac9_Y' # 2022 Official
- #spreadsheet_key = '1taZ97_PKEGYdSnMAeraPAbFR2U_7-P1GamzIdhlVXjc' # 2023 Test
- spreadsheet_key = '10qAZWyRhwo4Hb4_5X0Q4LFFSudGn2okJNLojwGAg0yI' # 2023 Official
- json_keyfile = 'NBA Playoffs Game-1f9a46f0715c.json'
- day = 'today' # today, else:
- #day = datetime.date(2022, 5, 7) # set date manually
- nba_cooldown = random.gammavariate(alpha=9, beta=0.4) # don't hammer the NBA API
- stats=['PTS', 'REB', 'AST', 'STL', 'BLK', 'TOV', 'WL'] # stats appear in this order
- STATS_HEADERS = {
- 'Host': 'stats.nba.com',
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:72.0) Gecko/20100101 Firefox/72.0',
- 'Accept': 'application/json, text/plain, */*',
- 'Accept-Language': 'en-US,en;q=0.5',
- 'Accept-Encoding': 'gzip, deflate, br',
- 'x-nba-stats-origin': 'stats',
- 'x-nba-stats-token': 'true',
- 'Connection': 'keep-alive',
- 'Referer': 'https://stats.nba.com/',
- 'Pragma': 'no-cache',
- 'Cache-Control': 'no-cache',
- }
- proxy_url="https://raw.githubusercontent.com/clarketm/proxy-list/master/proxy-list-raw.txt"
- # proxies = [
- # "206.127.88.18:80",
- # "47.112.104.235:80",
- # "60.169.201.13:53315",
- # "14.207.10.155:8080",
- # "113.100.209.10:3128",
- # "113.100.209.145:3128",
- # "69.162.65.42:5836",
- # "46.18.210.88:5836",
- # "203.150.160.95:8080",
- # "117.252.68.173:8080",
- # "113.100.209.140:3128",
- # "218.66.253.146:8800",
- # "186.159.5.161:8080",
- # "181.224.161.132:999",
- # "103.28.90.154:5836",
- # "113.100.209.116:3128",
- # "45.231.29.45:8080",
- # "202.169.255.3:8181",
- # "103.52.144.242:8080",
- # "181.78.12.164:999",
- # "117.67.77.188:4216",
- # "183.89.63.159:8080",
- # "88.150.220.130:3128",
- # "168.195.204.168:8080",
- # "116.0.2.162:52076",
- # "58.52.115.99:4216",
- # "182.23.52.114:6060",
- # "116.0.3.238:8080",
- # "175.6.66.48:3128",
- # "180.183.246.110:8080",
- # "220.249.149.69:9999",
- # "103.221.254.102:48146",
- # "110.77.242.14:8080",
- # "198.98.59.87:8080",
- # "209.91.216.168:8080",
- # "177.8.170.62:8080",
- # "218.66.253.144:10200",
- # "113.161.58.255:8080",
- # "180.244.73.12:8080",
- # "190.2.210.98:8080",
- # "82.114.71.98:8080",
- # "190.103.28.161:999",
- # "181.119.69.89:3128",
- # "190.120.249.246:999",
- # "82.114.115.194:1256",
- # "218.66.253.146:10084",
- # "103.146.68.255:8080",
- # ]
- proxies = []
- ###############################################################################
- def buildProxyList(proxies=[], raw_text_url="https://raw.githubusercontent.com/clarketm/proxy-list/master/proxy-list-raw.txt"):
- good_proxy_list = []
- proxy_list = []
- r = urllib.request.urlopen(raw_text_url)
- for line in r:
- line = line.decode("utf-8")
- line = line.strip()
- proxy_list.append(line)
- random.shuffle(proxy_list)
- proxy_list = proxies + proxy_list
- return proxy_list, good_proxy_list
- """
- Returns a worksheet instance
- """
- def getWorksheet(spreadsheet_key, json_keyfile):
- try:
- scope = ['https://spreadsheets.google.com/feeds',
- 'https://www.googleapis.com/auth/drive']
- credentials = ServiceAccountCredentials.from_json_keyfile_name(json_keyfile, scope)
- gc = gspread.authorize(credentials)
- spreadsheet = gc.open_by_key(spreadsheet_key)
- worksheet = spreadsheet.get_worksheet(0)
- except Exception as e:
- print(f"Exception: {str(e)}")
- print("Could not retrieve worksheet!")
- print("Check your API key, credentials, or network!")
- raise(e)
- return worksheet
- """
- Returns a list of lists containing the values of all cells in the worksheet by row
- """
- def getAllValues(worksheet):
- return worksheet.get_all_values()
- """
- Create various date variables based on "today's" day
- """
- def setDates(day):
- if day == 'today':
- # in case games go past midnight
- date = datetime.datetime.now() - datetime.timedelta(hours=3)
- date = date.date()
- else:
- date = day
- url_date = date.strftime('%m/%d/%Y')
- year = date.year
- season = f"{format(str(year - 1))}-{str(year)[2:]}"
- return url_date, season, date
- """
- Determines the number of players in the pool
- """
- def getNumberOfParticipants(all_values):
- count=0
- for row_num, row in enumerate(all_values):
- if row[0] != "" and row_num >= 3 and count == 0:
- start=row_num
- count+=1
- elif row[0] != "" and row_num >= 3 and count == 1:
- end=row_num
- break
- num_participants = end - start
- return num_participants
- """
- Determines the active day's first and last rows
- """
- def getFirstRowLastRow(all_values, num_participants, current_date):
- first_row = None
- last_row = None
- for row_num, row in enumerate(all_values, start=1):
- date=row[0]
- if date != "" and row_num >= 4:
- day = datetime.datetime.strptime('{} {}'.format(date,
- str(current_date.year)),
- '%A, %B %d %Y')
- if day.date() == current_date:
- first_row = row_num
- last_row = first_row + num_participants - 1
- break
- return first_row, last_row
- """
- Rudimentary way to reduce player name errors
- """
- def cleanFirstNameLastName(player):
- first_name_last_name = player.split()
- first_name = first_name_last_name[0]
- first_name = first_name.replace('.', '')
- # New nickname for T.J. Warren should be "The Outlier"
- if first_name == "TJ":
- first_name = "T.J."
- elif first_name == "Donavan":
- first_name = "Donovan"
- last_name = first_name_last_name[1]
- player_clean = first_name + ' ' + last_name
- return player_clean
- """
- Create a unique list of players that have been selected today
- Also, append misspelled players to batch_update_list to autofix on next push if we can
- """
- def cleanPlayers(all_values, first_row, last_row, batch_update_list):
- players_unique = []
- for row_num, row in enumerate(all_values, start=1):
- if first_row <= row_num <= last_row:
- player = row[2]
- if player[-7:] != "-FIX!!!" and player != "":
- if len(players.find_players_by_full_name(player)) > 0:
- players_unique.append(player)
- else:
- player_clean = cleanFirstNameLastName(player)
- if len(players.find_players_by_full_name(player_clean)) > 0:
- all_values[row_num - 1][2] = player_clean
- batch_update_list.append({'range': f'{indexToLetter(2)}{row_num}', 'values': [[player_clean]]})
- players_unique.append(player_clean)
- else:
- print(f"Player: {player} not found, please fix name!")
- players_unique = list(dict.fromkeys(players_unique))
- return players_unique, batch_update_list, all_values
- """
- Pull player's gamelog from stats.nba.com based on the url_date and player_id
- """
- #@timeout_decorator.timeout(30)
- def getStats(players_unique, url_date, season, proxy_list=[], good_proxy_list=[]):
- stats_dict = {}
- for player in players_unique:
- player_info = players.find_players_by_full_name(player)
- player_id = player_info[0].get('id')
- print(f'Retrieving stats for: {player}')
- while True:
- # Move working proxies to the front of the list
- if len(good_proxy_list) > 0:
- proxy_list = good_proxy_list + proxy_list
-
- # Remove duplicate proxies
- proxy_list = list(dict.fromkeys(proxy_list))
- # Use the first proxy in the list
- request_proxy = proxy_list[0]
- try:
- print(f'Proxy: http://{request_proxy}')
- player_game_log = playergamelog.PlayerGameLog( player_id=player_id,
- proxy='http://' + request_proxy,
- season=season,
- timeout=10,
- league_id_nullable='00',
- season_type_all_star='Playoffs',
- date_from_nullable=url_date,
- date_to_nullable=url_date,
- )
- print('Success!')
- if request_proxy not in good_proxy_list:
- good_proxy_list.append(request_proxy)
- player_game_log_dict = player_game_log.get_dict()
- if DEBUG_STORAGE is False:
- time.sleep(nba_cooldown)
- break
- except OSError as e:
- print(e)
- if request_proxy in good_proxy_list:
- good_proxy_list.remove(request_proxy)
- else:
- print(f'Proxy refused, removing {request_proxy}')
- proxy_list.remove(request_proxy)
- continue
- except Exception as e:
- print(e)
- print('Could not connect to the NBA API, sleeping for 30 seconds')
- time.sleep(30)
- player_game_log_results = player_game_log_dict.get('resultSets')[0]
- player_game_log_headers = player_game_log_results.get('headers')
-
- # if player has no stats for this day, list will be empty
- if len(player_game_log_results.get('rowSet')) < 1:
- player_stats_dict = None
- else:
- player_game_log_stats = player_game_log_results.get('rowSet')[0]
- player_stats_dict = dict(zip(player_game_log_headers, player_game_log_stats))
- stats_dict[player] = player_stats_dict
-
- return stats_dict, good_proxy_list
- """
- Append stat cells that have changes to batch_update_list
- Also append player cells that need fixing to batch_update_list
- """
- def cellsToUpdate(all_values, first_row, last_row, stats_dict, stats, batch_update_list):
- for row_num, row in enumerate(all_values, start=1):
- if first_row <= row_num <= last_row:
- player_name = row[2]
- if player_name[-7:] != "-FIX!!!" and player_name in stats_dict.keys():
- if stats_dict[player_name] is not None:
- player_stats = stats_dict[player_name]
- if player_stats == "Fix!":
- batch_update_list.append({'range': f'{indexToLetter(2)}{row_num}', 'values': [[f'{player_name}-FIX!!!']]})
- continue
- for col_num, stat in enumerate(stats, start=3):
- pass
- #print(player_name, player_stats[stat])
- #print(player_name, f'{indexToLetter(col_num)}{row_num}', str(row[col_num]), f',', player_stats[stat])
- if str(player_stats[stat]) != str(row[col_num]) and player_stats[stat] is not None:
- #print('Update:', row_num, col_num, player_name, f'{indexToLetter(col_num)}{row_num}', str(row[col_num]), player_stats[stat])
- batch_update_list.append({'range': f'{indexToLetter(col_num)}{row_num}', 'values': [[f'{player_stats[stat]}']]}.copy())
- return batch_update_list
- """
- Convert zero-indexed column number to the appropriate column letter (A=0, B=1, C=2...)
- """
- def indexToLetter(index):
- return chr(ord('@')+int(index)+1)
- """
- Push changes to Google Sheet
- """
- def batchUpdate(batch_update_list):
- if len(batch_update_list) > 1:
- worksheet.batch_update(batch_update_list, value_input_option="USER_ENTERED")
- else:
- print('No update needed, sleeping for 1 minute')
- time.sleep(60)
- if __name__ == "__main__":
- # Use a combination of our good proxies with some fetched from the internet for variation
- proxy_list, good_proxy_list = buildProxyList(proxies=proxies, raw_text_url=proxy_url)
- while True:
- try:
- batch_update_list = []
- worksheet = getWorksheet(spreadsheet_key, json_keyfile)
- url_date, season, date = setDates(day)
- print("Date: " + str(date))
- all_values = getAllValues(worksheet)
- num_participants = getNumberOfParticipants(all_values)
- first_row, last_row = getFirstRowLastRow(all_values, num_participants, date)
- if first_row is None:
- print("No games today! Pausing for 1000 seconds...")
- time.sleep(1000)
- continue
- players_unique, batch_update_list, all_values = cleanPlayers(all_values, first_row, last_row, batch_update_list)
- stats_dict, good_proxy_list = getStats(players_unique, url_date, season, proxy_list=proxy_list, good_proxy_list=good_proxy_list)
- batch_update_list = cellsToUpdate(all_values, first_row, last_row, stats_dict, stats, batch_update_list)
- if len(batch_update_list) > 1:
- print(batch_update_list)
- batchUpdate(batch_update_list)
- except Exception as e:
- print(e)
- print('Sleeping for 10 seconds')
- time.sleep(10)
- continue
|