get_podcasts/get_podcasts.py

#!/usr/bin/env python3

#############################
###### USER VARIABLES #######
#############################

# set download root dir
download_root_dir = '/home/bryan/Downloads/Podcasts'

# set handheld directory
handheld_root_dir = '/run/media/bryan/SANSA CLIPZ/Podcasts'

# feeds (python list)
feedurls = ['http://linuxactionnews.com/rss', \
            'http://wakingup.libsyn.com/rss', \
            'http://feeds.soundcloud.com/users/soundcloud:users:146429914/sounds.rss', \
            'http://billburr.libsyn.com/rss', \
            #'http://feeds.99percentinvisible.org/99percentinvisible', \
            'http://files.libertyfund.org/econtalk/EconTalk.xml', \
            'http://jamesandthegiantpodcast.libsyn.com/rss', \
            'http://rss.art19.com/tim-ferriss-show']
            #'http://feed.thisamericanlife.org/talpodcast']
            #'https://www.npr.org/rss/podcast.php?id=510019']

# number of old episodes to keep
old_episodes_keep = 2

# include episode name in filename (1 = on, 0 = off)
episode_name_in_filename = 0

# enable debug output (1 = on, 0 = off)
debug = 0

#################################
###### END USER VARIABLES #######
#################################

import podcastparser
import urllib.request
import os
import time
import datetime
import glob
import shutil

# spoof headers for certain rss feeds
opener=urllib.request.build_opener()
opener.addheaders=[('User-Agent','Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1941.0 Safari/537.36')]
opener.addheaders=[('Content-Type', 'application/json')]
urllib.request.install_opener(opener)

#################################
######### GET EPISODES ##########
#################################

# parse feeds
for feed in feedurls:

  # parsed is a dict
  parsed = podcastparser.parse(feed, urllib.request.urlopen(feed), old_episodes_keep)

  # debug
  if debug == 1:
    import pprint
    pprint.pprint(parsed)

  # get podcast title
  podcast_title = parsed['title']
  print('Checking ' + podcast_title + ' for new episdoes...')

  # check if download dir exists and if not, make it
  podcast_download_dir = os.path.join(download_root_dir, podcast_title)
  if not os.path.exists(podcast_download_dir):
    os.mkdir(podcast_download_dir)

  # parse episodes
  for episode in parsed['episodes']:

    # format release time to date format
    release_time = int(episode['published'])
    episode_date = datetime.datetime.fromtimestamp(release_time)
    episode_date = episode_date.strftime("%y-%m-%d")

    # debug
    if debug == 1:
      print(release_time)
      print(episode_date)

    # create filename based on episode date and/or episode title
    if episode_name_in_filename == 0:
      episode_title = episode_date
    else:
      episode_title = episode_date + " " + episode['title']

    full_episode_path = os.path.splitext(os.path.join(podcast_download_dir, episode_title))[0]

    for enclosures in episode['enclosures']:
       # get download url
       url = enclosures['url']
       if debug == 1:
         print(url)
       if enclosures['mime_type'] == 'audio/mpeg':
         full_episode_path += '.mp3'
    # download file and save to episode title
    if not os.path.exists(full_episode_path):
      try:
        print('Downloading ' + episode['title'])
        urllib.request.urlretrieve(url, full_episode_path)
      except urllib.error.HTTPError as e:
        print('HTTPError' + ': Could not download ' + podcast_title + '\n' + 'Reason: ' + e.reason + '\n' + 'URL: ' + url)
      except urllib.error.URLError:
        print('URLError')
      except urllib.error.ContentTooShortError:
        print('Download failed, file corrupt!')

  # remove old episodes
  old_files = glob.glob(os.path.join(podcast_download_dir, '*'))
  old_files.sort()
  for file in old_files[:-old_episodes_keep]:
    os.unlink(file)

#################################
######## MANAGE HANDHELD ########
#################################

if os.path.exists(handheld_root_dir):

  # define copy to handheld function
  def copy_files(src_file, dest_dir):
    if not os.path.isdir(dest_dir):
      os.mkdir(dest_dir)
    filename = os.path.basename(src_file)
    dest_file = os.path.join(dest_dir, filename)
    if not os.path.exists(dest_file):
      print('Copying ' + src_file + ' to ' + dest_file)
      shutil.copyfile(src_file, dest_file)


  # copy to handheld
  podcast_dirs = glob.glob(os.path.join(download_root_dir, '*'))
  for podcast_dir in podcast_dirs:
    podcast_name = os.path.basename(podcast_dir)
    handheld_target_dir = os.path.join(handheld_root_dir, podcast_name)
    files_to_copy = glob.glob(os.path.join(podcast_dir, '*'))
    for file in files_to_copy:
      if debug == 1:
        print(file + ' ' + handheld_target_dir)
      copy_files(file, handheld_target_dir)

  # remove old files from handheld
  podcast_dirs = glob.glob(os.path.join(handheld_root_dir, '*'))
  for podcast_dir in podcast_dirs:
    old_files = glob.glob(os.path.join(podcast_dir, '*'))
    old_files.sort()
    for file in old_files[:-old_episodes_keep]:
      os.unlink(file)

else:
  print('Handheld not detected, skipping...')