Browse Source

Initial commit

cryobry 6 years ago
commit
e9801d33e5
3 changed files with 199 additions and 0 deletions
  1. 21 0
      LICENSE
  2. 4 0
      README.md
  3. 174 0
      get_podcasts.py

+ 21 - 0
LICENSE

@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2018 cryobry
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

+ 4 - 0
README.md

@@ -0,0 +1,4 @@
+# get_podcasts
+Podcast downloader and portable media player syncer written in python
+
+To-do: PyPI

+ 174 - 0
get_podcasts.py

@@ -0,0 +1,174 @@
+#!/usr/bin/env python3
+
+#############################
+###### USER VARIABLES #######
+#############################
+
+# set download root dir
+download_root_dir = '/home/bryan/Downloads/Podcasts'
+
+# set handheld directory
+handheld_root_dir = '/run/media/bryan/D5E7-1B94/Podcasts'
+
+# feeds (python list)
+feedurls = ['http://linuxactionnews.com/rss', \
+            'http://wakingup.libsyn.com/rss', \
+            'http://feeds.soundcloud.com/users/soundcloud:users:146429914/sounds.rss', \
+            'http://billburr.libsyn.com/rss', \
+            'http://feeds.99percentinvisible.org/99percentinvisible', \
+            'http://rss.art19.com/tim-ferriss-show']
+
+# number of old episodes to keep
+old_episodes_keep = 3
+
+# include episode name in filename (1 = on, 0 = off)
+episode_name_in_filename = 0
+
+# enable debug output (1 = on, 0 = off)
+debug = 0
+
+#################################
+###### END USER VARIABLES #######
+#################################
+
+import podcastparser
+import urllib.request
+import os
+import time
+import datetime
+import glob
+import shutil
+
+# spoof headers for certain rss feeds
+opener=urllib.request.build_opener()
+opener.addheaders=[('User-Agent','Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1941.0 Safari/537.36')]
+urllib.request.install_opener(opener)
+
+# create new timestamp
+timestamp_file = os.path.join(download_root_dir, '.last_sync')
+new_ts = int(time.time())
+
+# get old timestamp
+if os.path.exists(timestamp_file):
+  with open(timestamp_file) as f:
+    old_ts = int(f.readline())
+else:
+  old_ts = 0
+
+#################################
+######### GET EPISODES ##########
+#################################
+
+# parse feeds
+for feed in feedurls:
+
+  # parsed is a dict
+  parsed = podcastparser.parse(feed, urllib.request.urlopen(feed), old_episodes_keep)
+  
+  # debug
+  if debug == 1:
+    import pprint
+    pprint.pprint(parsed)
+  
+  # get podcast title
+  podcast_title = parsed['title']
+  print('Checking ' + podcast_title + ' for new episdoes...')
+  
+  # check if download dir exists and if not, make it
+  podcast_download_dir = os.path.join(download_root_dir, podcast_title)
+  if not os.path.exists(podcast_download_dir):
+    os.mkdir(podcast_download_dir)
+
+  # parse episodes
+  for episode in parsed['episodes']:
+    # get release time
+    release_time = int(episode['published'])
+    
+    # debug
+    if debug == 1:
+      print(old_ts)
+      print(release_time)
+    
+    if release_time > old_ts:
+      # format release time to date format
+      episode_date = datetime.datetime.fromtimestamp(release_time)
+      episode_date = episode_date.strftime("%y-%m-%d")
+      
+      # debug
+      if debug == 1:
+        print(episode_date)
+        
+      # create filename based on episode date and/or episode title
+      if episode_name_in_filename == 0:
+        episode_title = episode_date
+      else:
+        episode_title = episode_date + " " + episode['title']
+        
+      full_episode_path = os.path.splitext(os.path.join(podcast_download_dir, episode_title))[0]
+    
+      for enclosures in episode['enclosures']:
+         # get download url
+         url = enclosures['url']
+         if enclosures['mime_type'] == 'audio/mpeg':
+           full_episode_path += '.mp3'
+      # download file and save to episode title
+      if not os.path.exists(full_episode_path):
+        try:
+          print('Downloading ' + episode['title'])
+          urllib.request.urlretrieve(url, full_episode_path)
+        except urllib.error.HTTPError as e:
+          print('HTTPError' + ': Could not download ' + podcast_title + '\n' + 'Reason: ' + e.reason + '\n' + 'URL: ' + url)
+        except urllib.error.URLError:
+          print('URLError')
+        except urllib.error.ContentTooShortError:
+          print('Download failed, file corrupt!')
+
+  # remove old episodes
+  old_files = glob.glob(os.path.join(podcast_download_dir, '*'))
+  old_files.sort()
+  for file in old_files[:-old_episodes_keep]:
+    os.unlink(file)
+          
+# write new timestamp file
+with open(timestamp_file, 'w') as outf:
+  outf.write(str(new_ts))
+        
+#################################
+######## MANAGE HANDHELD ########
+#################################        
+
+if os.path.exists(handheld_root_dir):
+
+  # define copy to handheld function
+  def copy_files(src_file, dest_dir):
+    if not os.path.isdir(dest_dir):
+      os.makedir(dest_dir)
+    filename = os.path.basename(src_file)
+    dest_file = os.path.join(dest_dir, filename)
+    if not os.path.exists(dest_file):
+      print('Copying ' + src_file + ' to ' + dest_file)
+      shutil.copyfile(src_file, dest_file)          
+
+      
+  # copy to handheld
+  podcast_dirs = glob.glob(os.path.join(download_root_dir, '*'))
+  for podcast_dir in podcast_dirs:
+    podcast_name = os.path.basename(podcast_dir)
+    handheld_target_dir = os.path.join(handheld_root_dir, podcast_name)
+    files_to_copy = glob.glob(os.path.join(podcast_dir, '*'))
+    for file in files_to_copy:
+      if debug == 1:
+        print(file + ' ' + handheld_target_dir)
+      copy_files(file, handheld_target_dir)
+
+  # remove old files from handheld
+  podcast_dirs = glob.glob(os.path.join(handheld_root_dir, '*'))
+  for podcast_dir in podcast_dirs:
+    old_files = glob.glob(os.path.join(podcast_dir, '*'))
+    old_files.sort()
+    for file in old_files[:-old_episodes_keep]:
+      os.unlink(file)
+
+else:
+  print('Handheld not detected, skipping...')
+