Initial commit

2018-04-11 00:26:37 -07:00
commit e9801d33e5
3 changed files with 199 additions and 0 deletions
--- a/21
+++ b/21
@@ -0,0 +1,21 @@
 MIT License
 Copyright (c) 2018 cryobry
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
 in the Software without restriction, including without limitation the rights
 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 copies of the Software, and to permit persons to whom the Software is
 furnished to do so, subject to the following conditions:
 The above copyright notice and this permission notice shall be included in all
 copies or substantial portions of the Software.
 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
--- a/README.md
+++ b/README.md
@@ -0,0 +1,4 @@
 # get_podcasts
 Podcast downloader and portable media player syncer written in python
 To-do: PyPI
--- a/get_podcasts.py
+++ b/get_podcasts.py
@@ -0,0 +1,174 @@
 #!/usr/bin/env python3
 #############################
 ###### USER VARIABLES #######
 #############################
 # set download root dir
 download_root_dir = '/home/bryan/Downloads/Podcasts'
 # set handheld directory
 handheld_root_dir = '/run/media/bryan/D5E7-1B94/Podcasts'
 # feeds (python list)
 feedurls = ['http://linuxactionnews.com/rss', \
            'http://wakingup.libsyn.com/rss', \
            'http://feeds.soundcloud.com/users/soundcloud:users:146429914/sounds.rss', \
            'http://billburr.libsyn.com/rss', \
            'http://feeds.99percentinvisible.org/99percentinvisible', \
            'http://rss.art19.com/tim-ferriss-show']
 # number of old episodes to keep
 old_episodes_keep = 3
 # include episode name in filename (1 = on, 0 = off)
 episode_name_in_filename = 0
 # enable debug output (1 = on, 0 = off)
 debug = 0
 #################################
 ###### END USER VARIABLES #######
 #################################
 import podcastparser
 import urllib.request
 import os
 import time
 import datetime
 import glob
 import shutil
 # spoof headers for certain rss feeds
 opener=urllib.request.build_opener()
 opener.addheaders=[('User-Agent','Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1941.0 Safari/537.36')]
 urllib.request.install_opener(opener)
 # create new timestamp
 timestamp_file = os.path.join(download_root_dir, '.last_sync')
 new_ts = int(time.time())
 # get old timestamp
 if os.path.exists(timestamp_file):
  with open(timestamp_file) as f:
    old_ts = int(f.readline())
 else:
  old_ts = 0
 #################################
 ######### GET EPISODES ##########
 #################################
 # parse feeds
 for feed in feedurls:
  # parsed is a dict
  parsed = podcastparser.parse(feed, urllib.request.urlopen(feed), old_episodes_keep)
  # debug
  if debug == 1:
    import pprint
    pprint.pprint(parsed)
  # get podcast title
  podcast_title = parsed['title']
  print('Checking ' + podcast_title + ' for new episdoes...')
  # check if download dir exists and if not, make it
  podcast_download_dir = os.path.join(download_root_dir, podcast_title)
  if not os.path.exists(podcast_download_dir):
    os.mkdir(podcast_download_dir)
  # parse episodes
  for episode in parsed['episodes']:
    # get release time
    release_time = int(episode['published'])
    # debug
    if debug == 1:
      print(old_ts)
      print(release_time)
    if release_time > old_ts:
      # format release time to date format
      episode_date = datetime.datetime.fromtimestamp(release_time)
      episode_date = episode_date.strftime("%y-%m-%d")
      # debug
      if debug == 1:
        print(episode_date)
      # create filename based on episode date and/or episode title
      if episode_name_in_filename == 0:
        episode_title = episode_date
      else:
        episode_title = episode_date + " " + episode['title']
      full_episode_path = os.path.splitext(os.path.join(podcast_download_dir, episode_title))[0]
      for enclosures in episode['enclosures']:
         # get download url
         url = enclosures['url']
         if enclosures['mime_type'] == 'audio/mpeg':
           full_episode_path += '.mp3'
      # download file and save to episode title
      if not os.path.exists(full_episode_path):
        try:
          print('Downloading ' + episode['title'])
          urllib.request.urlretrieve(url, full_episode_path)
        except urllib.error.HTTPError as e:
          print('HTTPError' + ': Could not download ' + podcast_title + '\n' + 'Reason: ' + e.reason + '\n' + 'URL: ' + url)
        except urllib.error.URLError:
          print('URLError')
        except urllib.error.ContentTooShortError:
          print('Download failed, file corrupt!')
  # remove old episodes
  old_files = glob.glob(os.path.join(podcast_download_dir, '*'))
  old_files.sort()
  for file in old_files[:-old_episodes_keep]:
    os.unlink(file)
 # write new timestamp file
 with open(timestamp_file, 'w') as outf:
  outf.write(str(new_ts))
 #################################
 ######## MANAGE HANDHELD ########
 #################################        
 if os.path.exists(handheld_root_dir):
  # define copy to handheld function
  def copy_files(src_file, dest_dir):
    if not os.path.isdir(dest_dir):
      os.makedir(dest_dir)
    filename = os.path.basename(src_file)
    dest_file = os.path.join(dest_dir, filename)
    if not os.path.exists(dest_file):
      print('Copying ' + src_file + ' to ' + dest_file)
      shutil.copyfile(src_file, dest_file)          
  # copy to handheld
  podcast_dirs = glob.glob(os.path.join(download_root_dir, '*'))
  for podcast_dir in podcast_dirs:
    podcast_name = os.path.basename(podcast_dir)
    handheld_target_dir = os.path.join(handheld_root_dir, podcast_name)
    files_to_copy = glob.glob(os.path.join(podcast_dir, '*'))
    for file in files_to_copy:
      if debug == 1:
        print(file + ' ' + handheld_target_dir)
      copy_files(file, handheld_target_dir)
  # remove old files from handheld
  podcast_dirs = glob.glob(os.path.join(handheld_root_dir, '*'))
  for podcast_dir in podcast_dirs:
    old_files = glob.glob(os.path.join(podcast_dir, '*'))
    old_files.sort()
    for file in old_files[:-old_episodes_keep]:
      os.unlink(file)
 else:
  print('Handheld not detected, skipping...')