Browse Source

Complete refactor

bryan 3 years ago
parent
commit
1208e5f4d1
2 changed files with 286 additions and 189 deletions
  1. 284 188
      nba_playoffs_game_updater.py
  2. 2 1
      requirements.txt

+ 284 - 188
nba_playoffs_game_updater.py

@@ -1,229 +1,325 @@
 #!/usr/bin/env python3
 
 import gspread
+import json
+import random
+from os import sys
 from oauth2client.service_account import ServiceAccountCredentials
 import datetime
 import time
 from nba_api.stats.static import players
 from nba_api.stats.endpoints import playergamelog
+import timeout_decorator
+import urllib
 
-spreadsheet_key = '1QBQvGSMesox1gwjpaoK-0-p3n-c4I_L73PWCggjdayM' # 2019 Official
+
+try:
+    from nba_api.library.debug.debug import DEBUG_STORAGE
+except ImportError:
+    DEBUG_STORAGE = False
+
+#spreadsheet_key = '1QBQvGSMesox1gwjpaoK-0-p3n-c4I_L73PWCggjdayM' # 2019 Official
 #spreadsheet_key = '14pHOScaGXvN83iCca6_5p6QoViYvo223cIJD9nnl7TI' # 2019 Test
+#spreadsheet_key = '1n2qAxDhy3B-a20cn92H340GoPeKQE8fpztPlzKpGw80' # 2020 Test
+spreadsheet_key = '1ajlHmH-dUzwkVfD-4NgpkK8ni3I3UuUFcdefF_DUOyg' # 2020 Official
 json_keyfile = 'NBA Playoffs Game-1f9a46f0715c.json'
-current_day = 'today' # today, else:
-#current_day = datetime.date(2018, 4, 15) # set date manually
-nba_cooldown = 2 # in seconds, don't hammer the NBA.com API too hard
+day = 'today' # today, else:
+#day = datetime.date(2020, 8, 19) # set date manually
+nba_cooldown = random.gammavariate(alpha=9, beta=0.4) # don't hammer the NBA API
+stats=['PTS', 'REB', 'AST', 'STL', 'BLK', 'TOV', 'WL'] # stats appear in this order
+
+STATS_HEADERS = {
+    'Host': 'stats.nba.com',
+    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:72.0) Gecko/20100101 Firefox/72.0',
+    'Accept': 'application/json, text/plain, */*',
+    'Accept-Language': 'en-US,en;q=0.5',
+    'Accept-Encoding': 'gzip, deflate, br',
+    'x-nba-stats-origin': 'stats',
+    'x-nba-stats-token': 'true',
+    'Connection': 'keep-alive',
+    'Referer': 'https://stats.nba.com/',
+    'Pragma': 'no-cache',
+    'Cache-Control': 'no-cache',
+}
+
+proxy_url="https://raw.githubusercontent.com/clarketm/proxy-list/master/proxy-list-raw.txt"
+
+proxies = [
+
+]
+
 
 ###############################################################################
+def buildProxyList(proxies=[], raw_text_url="https://raw.githubusercontent.com/clarketm/proxy-list/master/proxy-list-raw.txt"):
+    good_proxy_list = []
+    proxy_list = []
+    r = urllib.request.urlopen(raw_text_url)
+    for line in r:
+        line = line.decode("utf-8")
+        line = line.strip()
+        proxy_list.append(line)
+    random.shuffle(proxy_list)
+    proxy_list = proxies + proxy_list
+    return proxy_list, good_proxy_list
+
 
+"""
+Returns a worksheet instance
+"""
 def getWorksheet(spreadsheet_key, json_keyfile):
-    scope = ['https://spreadsheets.google.com/feeds',
-                     'https://www.googleapis.com/auth/drive']
-    credentials = ServiceAccountCredentials.from_json_keyfile_name(json_keyfile, scope)
-    gc = gspread.authorize(credentials)
-    spreadsheet = gc.open_by_key(spreadsheet_key)
-    worksheet = spreadsheet.get_worksheet(0)
+    try:
+        scope = ['https://spreadsheets.google.com/feeds',
+                'https://www.googleapis.com/auth/drive']
+        credentials = ServiceAccountCredentials.from_json_keyfile_name(json_keyfile, scope)
+        gc = gspread.authorize(credentials)
+        spreadsheet = gc.open_by_key(spreadsheet_key)
+        worksheet = spreadsheet.get_worksheet(0)
+    except Exception as e:
+        f"Exception: {str(e)}"
+        f"Could not retrieve worksheet!"
+        f"Check your API key, credentials, or network!"
+        raise(e)
     return worksheet
 
-def setDates(current_day):
-    if current_day == 'today':
+
+"""
+Returns a list of lists containing the values of all cells in the worksheet by row
+"""
+def getAllValues(worksheet):
+    return worksheet.get_all_values()
+
+
+"""
+Create various date variables based on "today's" day
+"""
+def setDates(day):
+    if day == 'today':
         # in case games go past midnight
-        current_date = datetime.datetime.now() - datetime.timedelta(hours=3)
-        current_date = current_date.date()
-    else:
-        current_date = current_day
-    url_date = current_date.strftime('%m/%d/%Y')
-    current_year = current_date.year
-    if current_date.month > 6:
-        current_season = '{}-{}'.format(str(current_year), 
-                                        str(current_year + 1)[2:])
+        date = datetime.datetime.now() - datetime.timedelta(hours=3)
+        date = date.date()
     else:
-        current_season = '{}-{}'.format(str(current_year - 1), 
-                                        str(current_year)[2:])
-    return url_date, current_season, current_date
-    
-def getFirstRowLastRow(worksheet, current_date):
-    date_col_values = worksheet.col_values(1) # get all date cell values from column 1
-    final_row = len(date_col_values) # get number of rows in spreadsheet
-    date_cells = worksheet.range('A4:A'+str(final_row)) # get all date cells
-    first_row = ""
-    last_row = ""
-    for date_cell in date_cells:
-        if first_row != "":
-            if date_cell.row == final_row:
-                last_row = final_row
-                break
-            elif date_cell.value != "":
-                last_row = date_cell.row - 1
+        date = day
+    url_date = date.strftime('%m/%d/%Y')
+    year = date.year
+    season = f"{format(str(year - 1))}-{str(year)[2:]}"
+    return url_date, season, date
+
+
+"""
+Determines the number of players in the pool
+"""
+def getNumberOfParticipants(all_values):
+    count=0
+    for row_num, row in enumerate(all_values):
+        if row[0] != "" and row_num >= 4 and count == 0:
+            start=row_num
+            count+=1
+        elif row[0] != "" and row_num >= 4 and count == 1:
+            end=row_num
+            break
+    num_participants = end - start
+    return num_participants
+
+
+"""
+Determines the active day's first and last rows
+"""
+def getFirstRowLastRow(all_values, num_participants, current_date):
+    first_row = None
+    last_row = None
+    for row_num, row in enumerate(all_values, start=1):
+        date=row[0]
+        if date != "" and row_num >= 4:
+            day = datetime.datetime.strptime('{} {}'.format(date, 
+                                                            str(current_date.year)), 
+                                                            '%A, %B %d %Y')
+            if day.date() == current_date:
+                first_row = row_num
+                last_row = first_row + num_participants - 1
                 break
-            continue
-        elif date_cell.value != "":
-            date_cell_day = datetime.datetime.strptime('{} {}'.format(date_cell.value, 
-                                                       str(current_date.year)), 
-                                                       '%A, %B %d %Y')
-            date_cell_day = date_cell_day.date()
-            if date_cell_day == current_date:
-                first_row = date_cell.row
     return first_row, last_row
 
-def parsePlayersCells(todays_cells):
-    player_cells = []
-    player_names = []
-    for cell in todays_cells:
-        if cell.col == 3 and cell.value != "" and cell.value[-7:] != "-FIX!!!":
-            player_cells.append(cell)
-            player_name = cleanPlayerName(cell)
-            player_names.append(player_name)
-    player_names_unique = list(dict.fromkeys(player_names))
-    return player_cells, player_names_unique
-        
-def cleanPlayerName(player_cell):
-    raw_name = player_cell.value
-    first_name_last_name = raw_name.split()
+
+"""
+Rudimentary way to reduce player name errors
+"""
+def cleanFirstNameLastName(player):
+    first_name_last_name = player.split()
     first_name = first_name_last_name[0]
     first_name = first_name.replace('.', '')
+    # New nickname for T.J. Warren should be "The Outlier"
+    if first_name == "TJ":
+        first_name = "T.J."
+    elif first_name == "Donavan":
+        first_name = "Donovan"
     last_name = first_name_last_name[1]
-    player_name = first_name + ' ' + last_name
-    return player_name
-
-def getPlayerStats(player_id, url_date, current_season):
-    player_game_logs = playergamelog.PlayerGameLog(player_id=player_id,
-                                                   league_id_nullable='00',
-                                                   season_type_all_star='Playoffs',
-                                                   date_from_nullable=url_date,
-                                                   date_to_nullable=url_date)
-    player_game_logs = player_game_logs.get_dict()
-    player_game_logs_results = player_game_logs.get('resultSets')[0]
-    player_game_logs_headers = player_game_logs_results.get('headers')
-    # if player has no stats for this day, list will be empty
-    if len(player_game_logs_results.get('rowSet')) < 1:
-        player_stats_dict = None
-    else:
-        player_game_logs_values = player_game_logs_results.get('rowSet')[0]
-        player_stats_dict = dict(zip(player_game_logs_headers, player_game_logs_values))
-    return player_stats_dict
+    player_clean = first_name + ' ' + last_name
+    return player_clean
+
+
+"""
+Create a unique list of players that have been selected today
+Also, append misspelled players to batch_update_list to autofix on next push if we can
+"""
+def cleanPlayers(all_values, first_row, last_row, batch_update_list):
+    players_unique = []
+    for row_num, row in enumerate(all_values, start=1):
+        if first_row <= row_num < last_row:
+            player = row[2]
+            if player[-7:] != "-FIX!!!" and player != "":
+                if len(players.find_players_by_full_name(player)) > 0:
+                    players_unique.append(player)
+                else:
+                    player_clean = cleanFirstNameLastName(player)
+                    if len(players.find_players_by_full_name(player_clean)) > 0:
+                        all_values[row_num - 1][2] = player_clean
+                        batch_update_list.append({'range': f'{indexToLetter(2)}{row_num}', 'values': [[player_clean]]})
+                        players_unique.append(player_clean)
+                    else:
+                        print("Player:", player, "not found, please fix name!")         
+    players_unique = list(dict.fromkeys(players_unique))
+    return players_unique, batch_update_list, all_values
+
+
+"""
+Pull player's gamelog from stats.nba.com based on the url_date and player_id
+"""
+#@timeout_decorator.timeout(30)
+def getStats(players_unique, url_date, season, proxy_list=[], good_proxy_list=[]):
 
-def buildStatsDict(player_names_unique, url_date, current_season):
     stats_dict = {}
-    for player_name in player_names_unique:
-        player_info = players.find_players_by_full_name(player_name)
-        if len(player_info) < 1:
-            print("\nPlayer \"" + player_name + "\" not found, please fix name!")
-            stats_dict[player_name] = "Fix Name!"
-            continue
+
+    for player in players_unique:
+        player_info = players.find_players_by_full_name(player)
         player_id = player_info[0].get('id')
-        stats_dict[player_name] = getPlayerStats(player_id, url_date, current_season)
-        print('{} {}{} {}{}'.format('Got', player_name, '\'s (ID:', str(player_id), ') stats from NBA.com'))
-        time.sleep(nba_cooldown)
-    return stats_dict
-
-def updatePlayerStatsLocal(todays_cells, player_cell, player_stats, update, player_update):
-    for cell in todays_cells:
-        if cell.row == player_cell.row:
-            if (cell.col == 4 and player_stats.get("PTS") != "" and 
-                str(cell.value) != str(player_stats.get("PTS"))):
-                cell.value = str(player_stats.get("PTS"))
-                update = True
-                player_update = True
-            elif (cell.col == 5 and player_stats.get("REB") != "" and 
-                 str(cell.value) != str(player_stats.get("REB"))):
-                cell.value = str(player_stats.get("REB"))
-                update = True
-                player_update = True
-            elif (cell.col == 6 and player_stats.get("AST") != "" and 
-                 str(cell.value) != str(player_stats.get("AST"))):
-                cell.value = str(player_stats.get("AST"))
-                update = True
-                player_update = True
-            elif (cell.col == 7 and player_stats.get("STL") != "" and 
-                 str(cell.value) != str(player_stats.get("STL"))):
-                cell.value = str(player_stats.get("STL"))
-                update = True
-                player_update = True
-            elif (cell.col == 8 and player_stats.get("BLK") != "" and 
-                 str(cell.value) != str(player_stats.get("BLK"))):
-                cell.value = str(player_stats.get("BLK"))
-                update = True
-                player_update = True
-            elif (cell.col == 9 and player_stats.get("TOV") != "" and 
-                 str(cell.value) != str(player_stats.get("TOV"))):
-                cell.value = str(player_stats.get("TOV"))
-                update = True
-                player_update = True
-            elif (cell.col == 10 and player_stats.get("WL") != "" and 
-                 str(cell.value) != str(player_stats.get("WL"))):
-                cell.value = str(player_stats.get("WL"))
-                update = True
-                player_update = True
-    return todays_cells, update, player_update
+
+        while True:
+
+            # Move working proxies to the front of the list
+            if len(good_proxy_list) > 0:
+                proxy_list = good_proxy_list + proxy_list
             
+            # Remove duplicate proxies
+            proxy_list = list(dict.fromkeys(proxy_list))
+
+            # Use the first proxy in the list
+            request_proxy = proxy_list[0]
+
+            try:
+                print('Proxy:', request_proxy)
+                player_game_log = playergamelog.PlayerGameLog(  player_id=player_id,
+                                                                proxy=request_proxy,
+                                                                season=season,
+                                                                timeout=10,
+                                                                league_id_nullable='00',
+                                                                season_type_all_star='Playoffs',
+                                                                date_from_nullable=url_date,
+                                                                date_to_nullable=url_date,
+                                                                )                                                                                           
+                print('Success!')
+                if request_proxy not in good_proxy_list:
+                    good_proxy_list.append(request_proxy)
+                player_game_log_dict = player_game_log.get_dict()
+                if DEBUG_STORAGE is False:
+                    time.sleep(nba_cooldown)
+                break
+            except OSError as e:
+                print(e)
+                if request_proxy in good_proxy_list:
+                    good_proxy_list.remove(request_proxy)
+                else:
+                    print('Proxy refused, removing', request_proxy)
+                    proxy_list.remove(request_proxy)
+                continue
+            except Exception as e:
+                print(e)
+                print('Could not connect to the NBA API, sleeping for 30 seconds')
+                time.sleep(30)
+
+        player_game_log_results = player_game_log_dict.get('resultSets')[0]
+        player_game_log_headers = player_game_log_results.get('headers')
+        
+        # if player has no stats for this day, list will be empty
+        if len(player_game_log_results.get('rowSet')) < 1:
+            player_stats_dict = None
+        else:
+            player_game_log_stats = player_game_log_results.get('rowSet')[0]
+            player_stats_dict = dict(zip(player_game_log_headers, player_game_log_stats))
+
+        stats_dict[player] = player_stats_dict
+
+        
+    return stats_dict, good_proxy_list
+
+
+"""
+Append stat cells that have changes to batch_update_list
+Also append player cells that need fixing to batch_update_list
+"""
+def cellsToUpdate(all_values, first_row, last_row, stats_dict, stats, batch_update_list):
+    for row_num, row in enumerate(all_values, start=1):
+        if first_row <= row_num <= last_row:
+            player_name = row[2]
+            if player_name[-7:] != "-FIX!!!" and player_name in stats_dict.keys():
+                if stats_dict[player_name] is not None:
+                    player_stats = stats_dict[player_name]
+                    if player_stats == "Fix!":
+                        batch_update_list.append({'range': f'{indexToLetter(2)}{row_num}', 'values': [[f'{player_name}-FIX!!!']]})
+                        continue
+                    for col_num, stat in enumerate(stats, start=3):
+                        pass
+                        #print(player_name, player_stats[stat])
+                        #print(player_name, f'{indexToLetter(col_num)}{row_num}', str(row[col_num]), f',', player_stats[stat])
+                        if str(player_stats[stat]) != str(row[col_num]) and player_stats[stat] is not None:
+                            #print('Update:', row_num, col_num, player_name, f'{indexToLetter(col_num)}{row_num}', str(row[col_num]), player_stats[stat])
+                            batch_update_list.append({'range': f'{indexToLetter(col_num)}{row_num}', 'values': [[f'{player_stats[stat]}']]}.copy())
+    return batch_update_list
+
+
+"""
+Convert zero-indexed column number to the appropriate column letter (A=0, B=1, C=2...)
+"""
+def indexToLetter(index):
+    return chr(ord('@')+int(index)+1)
+
+
+"""
+Push changes to Google Sheet
+"""
+def batchUpdate(batch_update_list):
+    if len(batch_update_list) > 1:
+        worksheet.batch_update(batch_update_list, value_input_option="USER_ENTERED")
+    else:
+        print('No update needed')
+        time.sleep(30)
 
 
 if __name__ == "__main__":
 
+    # Use a combination of our good proxies with some fetched from the internet for variation
+    proxy_list, good_proxy_list = buildProxyList(proxies=proxies, raw_text_url=proxy_url)
+
     while True:
-        url_date, current_season, current_date = setDates(current_day)
-        print("Date: " + str(current_date))
-        
-        # open worksheet, find today's action, and create unique list of players
         try:
+            batch_update_list = []
             worksheet = getWorksheet(spreadsheet_key, json_keyfile)
-            first_row, last_row = getFirstRowLastRow(worksheet, current_date)
-            if first_row == "":
+            url_date, season, date = setDates(day)
+            print("Date: " + str(date))
+            all_values = getAllValues(worksheet)
+            num_participants = getNumberOfParticipants(all_values)
+            first_row, last_row = getFirstRowLastRow(all_values, num_participants, date)
+            if first_row is None:
                 print("No games today! Pausing for 1000 seconds...")
                 time.sleep(1000)
                 continue
-            todays_cells = worksheet.range(first_row, 3, last_row, 10)
-            player_cells, player_names_unique = parsePlayersCells(todays_cells)
-            print("Today's players:", end=' ', flush=True)
-            print(player_names_unique)
-        except gspread.exceptions.APIError:
-            print("Google API overloaded, retrying in 10 seconds...")
-            time.sleep(10)
-            continue
-        except:
+            players_unique, batch_update_list, all_values = cleanPlayers(all_values, first_row, last_row, batch_update_list)
+            stats_dict, good_proxy_list = getStats(players_unique, url_date, season, proxy_list=proxy_list, good_proxy_list=good_proxy_list)
+            batch_update_list = cellsToUpdate(all_values, first_row, last_row, stats_dict, stats, batch_update_list)
+            if len(batch_update_list) > 1:
+                print(batch_update_list)
+            batchUpdate(batch_update_list)
+        except Exception as e:
+            print(e)
+            print('Sleeping for 10 seconds')
             time.sleep(10)
             continue
-
-        # Build stats dictionary from unique list of players
-        print("Retrieving player stats from NBA.com...")
-        stats_dict = buildStatsDict(player_names_unique, url_date, current_season)
-
-
-        update = False # only update the sheet if it has changed
-        for player_cell in player_cells:
-            player_update = False # Return if individual player stats have changed
-            player_name = cleanPlayerName(player_cell)
-            if stats_dict.get(player_name) != None: # skip players with empty stats
-                if stats_dict.get(player_name) == "Fix Name!":
-                    player_cell.value = str(player_cell.value) + "-FIX!!!"
-                    update = True
-                else:
-                    player_stats = stats_dict.get(player_name)
-                    todays_cells, update, player_update = updatePlayerStatsLocal(todays_cells, 
-                                                                                 player_cell, 
-                                                                                 player_stats, 
-                                                                                 update, 
-                                                                                 player_update)
-            if player_update == True:
-                print("Updated " + player_name + "\'s stats")
-            elif player_update == False:
-                print(player_name + "\'s stats have not changed")
-                    
-        if update == True:
-            while True:
-                try: 
-                    print("Pushing changes to google sheet...")
-                    worksheet.update_cells(todays_cells, 'USER_ENTERED')
-                    break
-                except gspread.exceptions.APIError:
-                    print("Google API overloaded, adding a 10 second delay...")
-                    time.sleep(10)
-                    continue
-        else:
-            print("No update necessary, pausing for 30 seconds...")
-            time.sleep(30)
-                
-        continue # restart entire loop
-
-

+ 2 - 1
requirements.txt

@@ -2,4 +2,5 @@ gspread
 oauth2client
 datetime
 requests
-git+git://github.com/swar/nba_api.git
+timeout-decorator
+git+git://github.com/swar/nba_api.git@master