Files
get_podcasts/get_podcasts.py
2018-04-11 00:26:37 -07:00

175 lines
5.3 KiB
Python
Executable File

#!/usr/bin/env python3
#############################
###### USER VARIABLES #######
#############################
# set download root dir
download_root_dir = '/home/bryan/Downloads/Podcasts'
# set handheld directory
handheld_root_dir = '/run/media/bryan/D5E7-1B94/Podcasts'
# feeds (python list)
feedurls = ['http://linuxactionnews.com/rss', \
'http://wakingup.libsyn.com/rss', \
'http://feeds.soundcloud.com/users/soundcloud:users:146429914/sounds.rss', \
'http://billburr.libsyn.com/rss', \
'http://feeds.99percentinvisible.org/99percentinvisible', \
'http://rss.art19.com/tim-ferriss-show']
# number of old episodes to keep
old_episodes_keep = 3
# include episode name in filename (1 = on, 0 = off)
episode_name_in_filename = 0
# enable debug output (1 = on, 0 = off)
debug = 0
#################################
###### END USER VARIABLES #######
#################################
import podcastparser
import urllib.request
import os
import time
import datetime
import glob
import shutil
# spoof headers for certain rss feeds
opener=urllib.request.build_opener()
opener.addheaders=[('User-Agent','Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1941.0 Safari/537.36')]
urllib.request.install_opener(opener)
# create new timestamp
timestamp_file = os.path.join(download_root_dir, '.last_sync')
new_ts = int(time.time())
# get old timestamp
if os.path.exists(timestamp_file):
with open(timestamp_file) as f:
old_ts = int(f.readline())
else:
old_ts = 0
#################################
######### GET EPISODES ##########
#################################
# parse feeds
for feed in feedurls:
# parsed is a dict
parsed = podcastparser.parse(feed, urllib.request.urlopen(feed), old_episodes_keep)
# debug
if debug == 1:
import pprint
pprint.pprint(parsed)
# get podcast title
podcast_title = parsed['title']
print('Checking ' + podcast_title + ' for new episdoes...')
# check if download dir exists and if not, make it
podcast_download_dir = os.path.join(download_root_dir, podcast_title)
if not os.path.exists(podcast_download_dir):
os.mkdir(podcast_download_dir)
# parse episodes
for episode in parsed['episodes']:
# get release time
release_time = int(episode['published'])
# debug
if debug == 1:
print(old_ts)
print(release_time)
if release_time > old_ts:
# format release time to date format
episode_date = datetime.datetime.fromtimestamp(release_time)
episode_date = episode_date.strftime("%y-%m-%d")
# debug
if debug == 1:
print(episode_date)
# create filename based on episode date and/or episode title
if episode_name_in_filename == 0:
episode_title = episode_date
else:
episode_title = episode_date + " " + episode['title']
full_episode_path = os.path.splitext(os.path.join(podcast_download_dir, episode_title))[0]
for enclosures in episode['enclosures']:
# get download url
url = enclosures['url']
if enclosures['mime_type'] == 'audio/mpeg':
full_episode_path += '.mp3'
# download file and save to episode title
if not os.path.exists(full_episode_path):
try:
print('Downloading ' + episode['title'])
urllib.request.urlretrieve(url, full_episode_path)
except urllib.error.HTTPError as e:
print('HTTPError' + ': Could not download ' + podcast_title + '\n' + 'Reason: ' + e.reason + '\n' + 'URL: ' + url)
except urllib.error.URLError:
print('URLError')
except urllib.error.ContentTooShortError:
print('Download failed, file corrupt!')
# remove old episodes
old_files = glob.glob(os.path.join(podcast_download_dir, '*'))
old_files.sort()
for file in old_files[:-old_episodes_keep]:
os.unlink(file)
# write new timestamp file
with open(timestamp_file, 'w') as outf:
outf.write(str(new_ts))
#################################
######## MANAGE HANDHELD ########
#################################
if os.path.exists(handheld_root_dir):
# define copy to handheld function
def copy_files(src_file, dest_dir):
if not os.path.isdir(dest_dir):
os.makedir(dest_dir)
filename = os.path.basename(src_file)
dest_file = os.path.join(dest_dir, filename)
if not os.path.exists(dest_file):
print('Copying ' + src_file + ' to ' + dest_file)
shutil.copyfile(src_file, dest_file)
# copy to handheld
podcast_dirs = glob.glob(os.path.join(download_root_dir, '*'))
for podcast_dir in podcast_dirs:
podcast_name = os.path.basename(podcast_dir)
handheld_target_dir = os.path.join(handheld_root_dir, podcast_name)
files_to_copy = glob.glob(os.path.join(podcast_dir, '*'))
for file in files_to_copy:
if debug == 1:
print(file + ' ' + handheld_target_dir)
copy_files(file, handheld_target_dir)
# remove old files from handheld
podcast_dirs = glob.glob(os.path.join(handheld_root_dir, '*'))
for podcast_dir in podcast_dirs:
old_files = glob.glob(os.path.join(podcast_dir, '*'))
old_files.sort()
for file in old_files[:-old_episodes_keep]:
os.unlink(file)
else:
print('Handheld not detected, skipping...')