get_podcasts.py 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174
  1. #!/usr/bin/env python3
  2. #############################
  3. ###### USER VARIABLES #######
  4. #############################
  5. # set download root dir
  6. download_root_dir = '/home/bryan/Downloads/Podcasts'
  7. # set handheld directory
  8. handheld_root_dir = '/run/media/bryan/D5E7-1B94/Podcasts'
  9. # feeds (python list)
  10. feedurls = ['http://linuxactionnews.com/rss', \
  11. 'http://wakingup.libsyn.com/rss', \
  12. 'http://feeds.soundcloud.com/users/soundcloud:users:146429914/sounds.rss', \
  13. 'http://billburr.libsyn.com/rss', \
  14. 'http://feeds.99percentinvisible.org/99percentinvisible', \
  15. 'http://rss.art19.com/tim-ferriss-show']
  16. # number of old episodes to keep
  17. old_episodes_keep = 3
  18. # include episode name in filename (1 = on, 0 = off)
  19. episode_name_in_filename = 0
  20. # enable debug output (1 = on, 0 = off)
  21. debug = 0
  22. #################################
  23. ###### END USER VARIABLES #######
  24. #################################
  25. import podcastparser
  26. import urllib.request
  27. import os
  28. import time
  29. import datetime
  30. import glob
  31. import shutil
  32. # spoof headers for certain rss feeds
  33. opener=urllib.request.build_opener()
  34. opener.addheaders=[('User-Agent','Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1941.0 Safari/537.36')]
  35. urllib.request.install_opener(opener)
  36. # create new timestamp
  37. timestamp_file = os.path.join(download_root_dir, '.last_sync')
  38. new_ts = int(time.time())
  39. # get old timestamp
  40. if os.path.exists(timestamp_file):
  41. with open(timestamp_file) as f:
  42. old_ts = int(f.readline())
  43. else:
  44. old_ts = 0
  45. #################################
  46. ######### GET EPISODES ##########
  47. #################################
  48. # parse feeds
  49. for feed in feedurls:
  50. # parsed is a dict
  51. parsed = podcastparser.parse(feed, urllib.request.urlopen(feed), old_episodes_keep)
  52. # debug
  53. if debug == 1:
  54. import pprint
  55. pprint.pprint(parsed)
  56. # get podcast title
  57. podcast_title = parsed['title']
  58. print('Checking ' + podcast_title + ' for new episdoes...')
  59. # check if download dir exists and if not, make it
  60. podcast_download_dir = os.path.join(download_root_dir, podcast_title)
  61. if not os.path.exists(podcast_download_dir):
  62. os.mkdir(podcast_download_dir)
  63. # parse episodes
  64. for episode in parsed['episodes']:
  65. # get release time
  66. release_time = int(episode['published'])
  67. # debug
  68. if debug == 1:
  69. print(old_ts)
  70. print(release_time)
  71. if release_time > old_ts:
  72. # format release time to date format
  73. episode_date = datetime.datetime.fromtimestamp(release_time)
  74. episode_date = episode_date.strftime("%y-%m-%d")
  75. # debug
  76. if debug == 1:
  77. print(episode_date)
  78. # create filename based on episode date and/or episode title
  79. if episode_name_in_filename == 0:
  80. episode_title = episode_date
  81. else:
  82. episode_title = episode_date + " " + episode['title']
  83. full_episode_path = os.path.splitext(os.path.join(podcast_download_dir, episode_title))[0]
  84. for enclosures in episode['enclosures']:
  85. # get download url
  86. url = enclosures['url']
  87. if enclosures['mime_type'] == 'audio/mpeg':
  88. full_episode_path += '.mp3'
  89. # download file and save to episode title
  90. if not os.path.exists(full_episode_path):
  91. try:
  92. print('Downloading ' + episode['title'])
  93. urllib.request.urlretrieve(url, full_episode_path)
  94. except urllib.error.HTTPError as e:
  95. print('HTTPError' + ': Could not download ' + podcast_title + '\n' + 'Reason: ' + e.reason + '\n' + 'URL: ' + url)
  96. except urllib.error.URLError:
  97. print('URLError')
  98. except urllib.error.ContentTooShortError:
  99. print('Download failed, file corrupt!')
  100. # remove old episodes
  101. old_files = glob.glob(os.path.join(podcast_download_dir, '*'))
  102. old_files.sort()
  103. for file in old_files[:-old_episodes_keep]:
  104. os.unlink(file)
  105. # write new timestamp file
  106. with open(timestamp_file, 'w') as outf:
  107. outf.write(str(new_ts))
  108. #################################
  109. ######## MANAGE HANDHELD ########
  110. #################################
  111. if os.path.exists(handheld_root_dir):
  112. # define copy to handheld function
  113. def copy_files(src_file, dest_dir):
  114. if not os.path.isdir(dest_dir):
  115. os.makedir(dest_dir)
  116. filename = os.path.basename(src_file)
  117. dest_file = os.path.join(dest_dir, filename)
  118. if not os.path.exists(dest_file):
  119. print('Copying ' + src_file + ' to ' + dest_file)
  120. shutil.copyfile(src_file, dest_file)
  121. # copy to handheld
  122. podcast_dirs = glob.glob(os.path.join(download_root_dir, '*'))
  123. for podcast_dir in podcast_dirs:
  124. podcast_name = os.path.basename(podcast_dir)
  125. handheld_target_dir = os.path.join(handheld_root_dir, podcast_name)
  126. files_to_copy = glob.glob(os.path.join(podcast_dir, '*'))
  127. for file in files_to_copy:
  128. if debug == 1:
  129. print(file + ' ' + handheld_target_dir)
  130. copy_files(file, handheld_target_dir)
  131. # remove old files from handheld
  132. podcast_dirs = glob.glob(os.path.join(handheld_root_dir, '*'))
  133. for podcast_dir in podcast_dirs:
  134. old_files = glob.glob(os.path.join(podcast_dir, '*'))
  135. old_files.sort()
  136. for file in old_files[:-old_episodes_keep]:
  137. os.unlink(file)
  138. else:
  139. print('Handheld not detected, skipping...')