From e9801d33e53747712c25053c03229f61f3cdfb40 Mon Sep 17 00:00:00 2001 From: cryobry <38270216+cryobry@users.noreply.github.com> Date: Wed, 11 Apr 2018 00:26:37 -0700 Subject: [PATCH] Initial commit --- LICENSE | 21 ++++++ README.md | 4 ++ get_podcasts.py | 174 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 199 insertions(+) create mode 100644 LICENSE create mode 100644 README.md create mode 100755 get_podcasts.py diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..8a71722 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2018 cryobry + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..279e312 --- /dev/null +++ b/README.md @@ -0,0 +1,4 @@ +# get_podcasts +Podcast downloader and portable media player syncer written in python + +To-do: PyPI diff --git a/get_podcasts.py b/get_podcasts.py new file mode 100755 index 0000000..993a6b8 --- /dev/null +++ b/get_podcasts.py @@ -0,0 +1,174 @@ +#!/usr/bin/env python3 + +############################# +###### USER VARIABLES ####### +############################# + +# set download root dir +download_root_dir = '/home/bryan/Downloads/Podcasts' + +# set handheld directory +handheld_root_dir = '/run/media/bryan/D5E7-1B94/Podcasts' + +# feeds (python list) +feedurls = ['http://linuxactionnews.com/rss', \ + 'http://wakingup.libsyn.com/rss', \ + 'http://feeds.soundcloud.com/users/soundcloud:users:146429914/sounds.rss', \ + 'http://billburr.libsyn.com/rss', \ + 'http://feeds.99percentinvisible.org/99percentinvisible', \ + 'http://rss.art19.com/tim-ferriss-show'] + +# number of old episodes to keep +old_episodes_keep = 3 + +# include episode name in filename (1 = on, 0 = off) +episode_name_in_filename = 0 + +# enable debug output (1 = on, 0 = off) +debug = 0 + +################################# +###### END USER VARIABLES ####### +################################# + +import podcastparser +import urllib.request +import os +import time +import datetime +import glob +import shutil + +# spoof headers for certain rss feeds +opener=urllib.request.build_opener() +opener.addheaders=[('User-Agent','Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1941.0 Safari/537.36')] +urllib.request.install_opener(opener) + +# create new timestamp +timestamp_file = os.path.join(download_root_dir, '.last_sync') +new_ts = int(time.time()) + +# get old timestamp +if os.path.exists(timestamp_file): + with open(timestamp_file) as f: + old_ts = int(f.readline()) +else: + old_ts = 0 + +################################# +######### GET EPISODES ########## +################################# + +# parse feeds +for feed in feedurls: + + # parsed is a dict + parsed = podcastparser.parse(feed, urllib.request.urlopen(feed), old_episodes_keep) + + # debug + if debug == 1: + import pprint + pprint.pprint(parsed) + + # get podcast title + podcast_title = parsed['title'] + print('Checking ' + podcast_title + ' for new episdoes...') + + # check if download dir exists and if not, make it + podcast_download_dir = os.path.join(download_root_dir, podcast_title) + if not os.path.exists(podcast_download_dir): + os.mkdir(podcast_download_dir) + + # parse episodes + for episode in parsed['episodes']: + # get release time + release_time = int(episode['published']) + + # debug + if debug == 1: + print(old_ts) + print(release_time) + + if release_time > old_ts: + # format release time to date format + episode_date = datetime.datetime.fromtimestamp(release_time) + episode_date = episode_date.strftime("%y-%m-%d") + + # debug + if debug == 1: + print(episode_date) + + # create filename based on episode date and/or episode title + if episode_name_in_filename == 0: + episode_title = episode_date + else: + episode_title = episode_date + " " + episode['title'] + + full_episode_path = os.path.splitext(os.path.join(podcast_download_dir, episode_title))[0] + + for enclosures in episode['enclosures']: + # get download url + url = enclosures['url'] + if enclosures['mime_type'] == 'audio/mpeg': + full_episode_path += '.mp3' + # download file and save to episode title + if not os.path.exists(full_episode_path): + try: + print('Downloading ' + episode['title']) + urllib.request.urlretrieve(url, full_episode_path) + except urllib.error.HTTPError as e: + print('HTTPError' + ': Could not download ' + podcast_title + '\n' + 'Reason: ' + e.reason + '\n' + 'URL: ' + url) + except urllib.error.URLError: + print('URLError') + except urllib.error.ContentTooShortError: + print('Download failed, file corrupt!') + + # remove old episodes + old_files = glob.glob(os.path.join(podcast_download_dir, '*')) + old_files.sort() + for file in old_files[:-old_episodes_keep]: + os.unlink(file) + +# write new timestamp file +with open(timestamp_file, 'w') as outf: + outf.write(str(new_ts)) + +################################# +######## MANAGE HANDHELD ######## +################################# + +if os.path.exists(handheld_root_dir): + + # define copy to handheld function + def copy_files(src_file, dest_dir): + if not os.path.isdir(dest_dir): + os.makedir(dest_dir) + filename = os.path.basename(src_file) + dest_file = os.path.join(dest_dir, filename) + if not os.path.exists(dest_file): + print('Copying ' + src_file + ' to ' + dest_file) + shutil.copyfile(src_file, dest_file) + + + # copy to handheld + podcast_dirs = glob.glob(os.path.join(download_root_dir, '*')) + for podcast_dir in podcast_dirs: + podcast_name = os.path.basename(podcast_dir) + handheld_target_dir = os.path.join(handheld_root_dir, podcast_name) + files_to_copy = glob.glob(os.path.join(podcast_dir, '*')) + for file in files_to_copy: + if debug == 1: + print(file + ' ' + handheld_target_dir) + copy_files(file, handheld_target_dir) + + # remove old files from handheld + podcast_dirs = glob.glob(os.path.join(handheld_root_dir, '*')) + for podcast_dir in podcast_dirs: + old_files = glob.glob(os.path.join(podcast_dir, '*')) + old_files.sort() + for file in old_files[:-old_episodes_keep]: + os.unlink(file) + +else: + print('Handheld not detected, skipping...') +