123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160 |
- #!/usr/bin/env python3
- #############################
- ###### USER VARIABLES #######
- #############################
- # set download root dir
- download_root_dir = '/home/bryan/Downloads/Podcasts'
- # set handheld directory
- handheld_root_dir = '/run/media/bryan/SANSA CLIPZ/Podcasts'
- # feeds (python list)
- feedurls = ['http://linuxactionnews.com/rss', \
- 'http://wakingup.libsyn.com/rss', \
- 'http://feeds.soundcloud.com/users/soundcloud:users:146429914/sounds.rss', \
- 'http://billburr.libsyn.com/rss', \
- #'http://feeds.99percentinvisible.org/99percentinvisible', \
- 'http://files.libertyfund.org/econtalk/EconTalk.xml', \
- 'http://jamesandthegiantpodcast.libsyn.com/rss', \
- 'http://rss.art19.com/tim-ferriss-show']
- #'http://feed.thisamericanlife.org/talpodcast']
- #'https://www.npr.org/rss/podcast.php?id=510019']
- # number of old episodes to keep
- old_episodes_keep = 2
- # include episode name in filename (1 = on, 0 = off)
- episode_name_in_filename = 0
- # enable debug output (1 = on, 0 = off)
- debug = 0
- #################################
- ###### END USER VARIABLES #######
- #################################
- import podcastparser
- import urllib.request
- import os
- import time
- import datetime
- import glob
- import shutil
- # spoof headers for certain rss feeds
- opener=urllib.request.build_opener()
- opener.addheaders=[('User-Agent','Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1941.0 Safari/537.36')]
- opener.addheaders=[('Content-Type', 'application/json')]
- urllib.request.install_opener(opener)
- #################################
- ######### GET EPISODES ##########
- #################################
- # parse feeds
- for feed in feedurls:
- # parsed is a dict
- parsed = podcastparser.parse(feed, urllib.request.urlopen(feed), old_episodes_keep)
-
- # debug
- if debug == 1:
- import pprint
- pprint.pprint(parsed)
-
- # get podcast title
- podcast_title = parsed['title']
- print('Checking ' + podcast_title + ' for new episdoes...')
-
- # check if download dir exists and if not, make it
- podcast_download_dir = os.path.join(download_root_dir, podcast_title)
- if not os.path.exists(podcast_download_dir):
- os.mkdir(podcast_download_dir)
- # parse episodes
- for episode in parsed['episodes']:
-
- # format release time to date format
- release_time = int(episode['published'])
- episode_date = datetime.datetime.fromtimestamp(release_time)
- episode_date = episode_date.strftime("%y-%m-%d")
-
- # debug
- if debug == 1:
- print(release_time)
- print(episode_date)
-
- # create filename based on episode date and/or episode title
- if episode_name_in_filename == 0:
- episode_title = episode_date
- else:
- episode_title = episode_date + " " + episode['title']
-
- full_episode_path = os.path.splitext(os.path.join(podcast_download_dir, episode_title))[0]
-
- for enclosures in episode['enclosures']:
- # get download url
- url = enclosures['url']
- if debug == 1:
- print(url)
- if enclosures['mime_type'] == 'audio/mpeg':
- full_episode_path += '.mp3'
- # download file and save to episode title
- if not os.path.exists(full_episode_path):
- try:
- print('Downloading ' + episode['title'])
- urllib.request.urlretrieve(url, full_episode_path)
- except urllib.error.HTTPError as e:
- print('HTTPError' + ': Could not download ' + podcast_title + '\n' + 'Reason: ' + e.reason + '\n' + 'URL: ' + url)
- except urllib.error.URLError:
- print('URLError')
- except urllib.error.ContentTooShortError:
- print('Download failed, file corrupt!')
- # remove old episodes
- old_files = glob.glob(os.path.join(podcast_download_dir, '*'))
- old_files.sort()
- for file in old_files[:-old_episodes_keep]:
- os.unlink(file)
-
- #################################
- ######## MANAGE HANDHELD ########
- #################################
- if os.path.exists(handheld_root_dir):
- # define copy to handheld function
- def copy_files(src_file, dest_dir):
- if not os.path.isdir(dest_dir):
- os.mkdir(dest_dir)
- filename = os.path.basename(src_file)
- dest_file = os.path.join(dest_dir, filename)
- if not os.path.exists(dest_file):
- print('Copying ' + src_file + ' to ' + dest_file)
- shutil.copyfile(src_file, dest_file)
-
- # copy to handheld
- podcast_dirs = glob.glob(os.path.join(download_root_dir, '*'))
- for podcast_dir in podcast_dirs:
- podcast_name = os.path.basename(podcast_dir)
- handheld_target_dir = os.path.join(handheld_root_dir, podcast_name)
- files_to_copy = glob.glob(os.path.join(podcast_dir, '*'))
- for file in files_to_copy:
- if debug == 1:
- print(file + ' ' + handheld_target_dir)
- copy_files(file, handheld_target_dir)
- # remove old files from handheld
- podcast_dirs = glob.glob(os.path.join(handheld_root_dir, '*'))
- for podcast_dir in podcast_dirs:
- old_files = glob.glob(os.path.join(podcast_dir, '*'))
- old_files.sort()
- for file in old_files[:-old_episodes_keep]:
- os.unlink(file)
- else:
- print('Handheld not detected, skipping...')
-
|