#!/usr/bin/env python3 ############################# ###### USER VARIABLES ####### ############################# # set download root dir download_root_dir = '/home/bryan/Downloads/Podcasts' # set handheld directory handheld_root_dir = '/run/media/bryan/SANSA CLIPZ/Podcasts' # feeds (python list) feedurls = ['http://linuxactionnews.com/rss', \ 'http://wakingup.libsyn.com/rss', \ 'http://feeds.soundcloud.com/users/soundcloud:users:146429914/sounds.rss', \ 'http://billburr.libsyn.com/rss', \ #'http://feeds.99percentinvisible.org/99percentinvisible', \ 'http://files.libertyfund.org/econtalk/EconTalk.xml', \ 'http://jamesandthegiantpodcast.libsyn.com/rss', \ 'http://rss.art19.com/tim-ferriss-show'] #'http://feed.thisamericanlife.org/talpodcast'] #'https://www.npr.org/rss/podcast.php?id=510019'] # number of old episodes to keep old_episodes_keep = 2 # include episode name in filename (1 = on, 0 = off) episode_name_in_filename = 0 # enable debug output (1 = on, 0 = off) debug = 0 ################################# ###### END USER VARIABLES ####### ################################# import podcastparser import urllib.request import os import time import datetime import glob import shutil # spoof headers for certain rss feeds opener=urllib.request.build_opener() opener.addheaders=[('User-Agent','Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1941.0 Safari/537.36')] opener.addheaders=[('Content-Type', 'application/json')] urllib.request.install_opener(opener) ################################# ######### GET EPISODES ########## ################################# # parse feeds for feed in feedurls: # parsed is a dict parsed = podcastparser.parse(feed, urllib.request.urlopen(feed), old_episodes_keep) # debug if debug == 1: import pprint pprint.pprint(parsed) # get podcast title podcast_title = parsed['title'] print('Checking ' + podcast_title + ' for new episdoes...') # check if download dir exists and if not, make it podcast_download_dir = os.path.join(download_root_dir, podcast_title) if not os.path.exists(podcast_download_dir): os.mkdir(podcast_download_dir) # parse episodes for episode in parsed['episodes']: # format release time to date format release_time = int(episode['published']) episode_date = datetime.datetime.fromtimestamp(release_time) episode_date = episode_date.strftime("%y-%m-%d") # debug if debug == 1: print(release_time) print(episode_date) # create filename based on episode date and/or episode title if episode_name_in_filename == 0: episode_title = episode_date else: episode_title = episode_date + " " + episode['title'] full_episode_path = os.path.splitext(os.path.join(podcast_download_dir, episode_title))[0] for enclosures in episode['enclosures']: # get download url url = enclosures['url'] if debug == 1: print(url) if enclosures['mime_type'] == 'audio/mpeg': full_episode_path += '.mp3' # download file and save to episode title if not os.path.exists(full_episode_path): try: print('Downloading ' + episode['title']) urllib.request.urlretrieve(url, full_episode_path) except urllib.error.HTTPError as e: print('HTTPError' + ': Could not download ' + podcast_title + '\n' + 'Reason: ' + e.reason + '\n' + 'URL: ' + url) except urllib.error.URLError: print('URLError') except urllib.error.ContentTooShortError: print('Download failed, file corrupt!') # remove old episodes old_files = glob.glob(os.path.join(podcast_download_dir, '*')) old_files.sort() for file in old_files[:-old_episodes_keep]: os.unlink(file) ################################# ######## MANAGE HANDHELD ######## ################################# if os.path.exists(handheld_root_dir): # define copy to handheld function def copy_files(src_file, dest_dir): if not os.path.isdir(dest_dir): os.mkdir(dest_dir) filename = os.path.basename(src_file) dest_file = os.path.join(dest_dir, filename) if not os.path.exists(dest_file): print('Copying ' + src_file + ' to ' + dest_file) shutil.copyfile(src_file, dest_file) # copy to handheld podcast_dirs = glob.glob(os.path.join(download_root_dir, '*')) for podcast_dir in podcast_dirs: podcast_name = os.path.basename(podcast_dir) handheld_target_dir = os.path.join(handheld_root_dir, podcast_name) files_to_copy = glob.glob(os.path.join(podcast_dir, '*')) for file in files_to_copy: if debug == 1: print(file + ' ' + handheld_target_dir) copy_files(file, handheld_target_dir) # remove old files from handheld podcast_dirs = glob.glob(os.path.join(handheld_root_dir, '*')) for podcast_dir in podcast_dirs: old_files = glob.glob(os.path.join(podcast_dir, '*')) old_files.sort() for file in old_files[:-old_episodes_keep]: os.unlink(file) else: print('Handheld not detected, skipping...')