#!/usr/bin/env python

version = "1.0.2"

# ###########################################################################
# Imports...

import os, sys, urllib
from tempfile   import mkstemp
from urlparse   import urlparse, urlunparse
from fcntl      import flock, LOCK_EX, LOCK_NB
from subprocess import call, Popen, PIPE
from optparse   import OptionParser, OptionGroup
from copy       import copy
# from urllib     import urlretrieve

# ###########################################################################
# Command-line arguments...

help_text ='''
Normally, podrecast downloads podcast files in acordance with its
configuration and arguments.  However, if the following four CGI environment
variables are all defined, then it behaves instead as a CGI script:
REQUEST_METHOD, HTTP_HOST, SCRIPT_NAME and PATH_INFO.
'''

parser = OptionParser(epilog=help_text[1:], version="%prog v{}".format(version))
dopts = OptionGroup(parser, "Download options")
popts = OptionGroup(parser, "Post-processing options")

dopts.add_option("-m", "--max",         type=int,            dest="maximum",     default=0,     help="the maximum number of podcasts to download from each feed (default: 0, meaning download all new podcasts)")
dopts.add_option("-c", "--catchup",     action="store_true", dest="catchup",     default=False, help="mark all podcasts as read")
dopts.add_option("-a", "--autocatchup", action="store_true", dest="autocatchup", default=False, help="mark all podcasts in newly-encountered feeds as read; a feed is \"newly-encountered\" if either its directory or its feed file does not initially exist")

popts.add_option("-d", "--downloaded",  action="store_true", dest="downloaded",  default=False, help="output a list of files which have been downloaded, and now require post processing")
popts.add_option("-r", "--ready",       action="store_true", dest="ready",       default=False, help="register the files named on the command line as now post processed, and ready for delivery")

parser.add_option_group(dopts)
parser.add_option_group(popts)
(options, args) = parser.parse_args()

if args and not options.ready:
   parser.error("unused arguments: {}".format(" ".join(args)))

have_download = options.maximum or options.catchup or options.autocatchup
have_processing = options.downloaded or options.ready

if have_download and have_processing:
   parser.error("download and processing options are mutually exclusive")

if options.downloaded and options.ready:
   parser.error("these processing options are mutually exclusive")

## download_options = [ options.maximum, options.catchup, options.autocatchup ]
## if 1 < sum([ 1 if opt else 0 for opt in download_options ]):
##    parser.error("these download options are mutually exclusive")

# ###########################################################################
# Globals and "constants", ...

errors = 0
lock = None
cgi = "PATH_INFO" in os.environ and "SCRIPT_NAME" in os.environ and "HTTP_HOST" in os.environ and "REQUEST_METHOD" in os.environ

# Get current umask.
#
umask = os.umask(022)
os.umask(umask)

## What if some but not all of these are set?
## if not cgi and ("PATH_INFO" in os.environ or "SCRIPT_NAME" in os.environ or "HTTP_HOST" in os.environ or "REQUEST_METHOD" in os.environ):
##    sys.stderr.write("error: insufficient environment variables for CGI\n")
##    sys.exit(1)

def done_file(fname):
   return "." + fname + ".podrecast_done"

DOWNLOAD_SUFFIX = ".podrecast_download"
def download_file(fname):
   return fname + DOWNLOAD_SUFFIX

def feed_file():
   return ".podrecast_feed.xml"

# Configuration files.
# The first available configuration file is chosen.
#
# If using the CGI functionality of podrecast, then it makes sense to use
# "/etc/podrecast.conf" or "/usr/local/etc/podrecast.conf", because
# the web server will only be able to find system-wide configuration files.
# However, the other two might also be useful for testing.

configs = [ "./podrecast.conf",
            "~/.podrecast.conf",
            "/usr/local/etc/podrecast.conf",
            "/etc/podrecast.conf" ]

# ###########################################################################
# Utilities...

def log(*args):
   if cgi: print "X-PODRECAST-LOG:", " ".join(args)
   else:   print " ".join(args)

def elog(*args):
   if cgi: print "X-PODRECAST-ERROR:", " ".join(args)
   else:   sys.stderr.write(" ".join(args) + "\n")

def cgi_error(*args):
   for arg in args:
      elog(arg)
   print "Status: 404 Not Found"
   print
   sys.exit(0)

def touch(fname):
   with file(fname, 'a'):
      os.utime(fname, None)

def slurp(fname):
   with open(fname) as fd:
      return fd.read()

def head_request():
   return 'REQUEST_METHOD' in os.environ and os.environ['REQUEST_METHOD'] == 'HEAD'

def fd_copy(src,dst):
   sz = 4 * 4096
   while True:
      chunk = os.read(src,sz)
      if not chunk: return
      os.write(dst,chunk)

def chdir(directory):
   if os.path.exists(directory) and not os.path.isdir(directory):
      raise Exception("exists, but is not a directory: " + directory)
   if not os.path.isdir(directory):
      log("creating:", directory)
      os.makedirs(directory)
   #
   log("chdir:", directory)
   os.chdir(directory)

# ###########################################################################
# Wget...

def wget(url,fname,options=[]):
   return wget_external(url,fname,options)

def wget_external(url,fname,options=[]):
   tmp = None
   extras = [ '-q' ] if not sys.stdout.isatty() else []
   log("wget:", url)
   try:
      fd, tmp = mkstemp(dir=".", prefix=fname, suffix=".podrecast.download.tmp")
      os.close(fd)
      if 0 != call([ 'wget', '-O', tmp ] + options + extras + [ url ]):
         raise Exception("wget/download failed: " + url)
      os.chmod(tmp,0644&~umask)
      os.rename(tmp,fname)
      log("done:", url)
   except:
      if tmp and os.path.exists(tmp):
         os.unlink(tmp)
      raise

# It may be better to use an internal downloader (such as urllib.urlretrieve).
# However, the external wget (above) handles things like proxies and .netrc
# passwords neatly, without the need to add any special handling to podrecast
# itself.  So, we'll stick with the external wget, for the moment.
#
# def wget_internal(url,fname,options=[]):
#    tmp = None
#    extras = [ '-q' ] if not sys.stdout.isatty() else []
#    log("wget:", url)
#    try:
#       fd, tmp = mkstemp(dir=".", prefix=fname, suffix=".podrecast.download.tmp")
#       os.close(fd)
#       urlretrieve(url,tmp)
#       os.chmod(tmp,0644&~umask)
#       os.rename(tmp,fname)
#       log("done:", fname)
#    except:
#       if tmp and os.path.exists(tmp):
#          os.unlink(tmp)
#       raise

# ###########################################################################
# MIME types, encodings...

def mime_file(fname,fallback,arg):
   pipe = None
   try:
      with open(os.devnull,"w") as devnull:
         pipe = Popen(['file', '-b', arg, fname], stdout=PIPE, stderr=devnull)
      result = pipe.stdout.readline().strip()
      return result if pipe.wait() == 0 and result else fallback
   except:
      return fallback

def mime_type(fname,fallback):
   return mime_file(fname,fallback,'--mime-type')

def mime_encoding(fname,fallback):
   return mime_file(fname,fallback,'--mime-encoding')

# ###########################################################################
# Non-standard libraries...

try:
   from lxml import objectify, etree
except:
   elog("import error: package/module python-lxml not found")
   elog("import error: please install it")
   elog("import error: on debian, it's...")
   elog("import error: sudo apt-get install python-lxml")
   sys.exit(1)

# try:
#    import magic
# except:
#    elog("import error: package/module python-magic not found")
#    elog("import error: please install it")
#    elog("import error: on debian, it's...")
#    elog("import error: sudo apt-get install python-magic")
#    sys.exit(1)

# ###########################################################################
# Configuration...

def choose_config(quiet=False):
   for config in configs:
      config = os.path.expanduser(config)
      if os.path.isfile(config) and os.access(config, os.R_OK):
         if not quiet: log("config:", config)
         return config
   raise Exception("failed to find config file: " + str(configs))

def read_config(config,locking=False):
   global lock
   #
   lock = fd = open(config)
   lines = fd.readlines()
   #
   if locking:
      try:
         flock(lock.fileno(), LOCK_EX | LOCK_NB)
      except:
         raise Exception("locking, would block: " + config)
   else:
      lock = None
      fd.close()
   #
   lines = [ line.strip() for line in lines ]
   lines = [ line for line in lines if len(line) ]
   lines = [ line for line in lines if not line.startswith("#") ]
   lines = [ line.split() for line in lines ]
   #
   for line in lines:
      if len(line) != 2:
         raise Exception("config error: invalid line in {}: {}".format(config," ".join(line)))
   #
   return lines

def read_roots(quiet=False):
   roots = [ line for line in read_config(choose_config(quiet)) if line[0] == "root" ]
   return roots

# ###########################################################################
# Handle feeds and enclosures...

def download_feeds(config,options):
   global errors
   for line in read_config(config,True):
      if line[0] == "root":
         root = line[1]
         if not os.path.isabs(root):
            raise Exception("config: root must be an absolute path: " + str(line))
         chdir(root)
      else:
         try:
            directory, url = line
            download_feed(directory,url,options)
         except (KeyboardInterrupt, SystemExit):
            raise
         except Exception as error:
            elog("feed error:", directory, url, str(error))
            errors += 1

def download_feed(directory,url,options):
   global errors
   cwd = os.getcwd()
   try:
      existing_feed = os.path.isdir(directory)
      chdir(directory)
      #
      existing_feed = existing_feed and os.path.isfile(feed_file())
      if options.autocatchup and not existing_feed:
         options = copy(options)
         options.catchup = True
      #
      wget(url,feed_file(), [ '-q' ])
      feed, enclosures = slurp_enclosures(feed_file())
   except:
      chdir(cwd)
      raise
   #
   if 0 < options.maximum:
      handle_enclosures = enclosures[:options.maximum]
      catchup_enclosures = enclosures[options.maximum:]
   else:
      handle_enclosures = enclosures
      catchup_enclosures = []
   #
   for enclosure in handle_enclosures:
      try:
         handle_enclosure(enclosure,options)
      except (KeyboardInterrupt, SystemExit):
         raise
      except Exception as error:
         elog("enclosure error:", str(error))
         errors += 1
   #
   options = copy(options)
   options.catchup = True
   for enclosure in catchup_enclosures:
      try:
         handle_enclosure(enclosure,options)
      except (KeyboardInterrupt, SystemExit):
         raise
      except Exception as error:
         elog("enclosure error:", str(error))
         errors += 1
   #
   chdir(cwd)

def slurp_enclosures(fname):
   text = slurp(fname)
   feed = objectify.fromstring(text)
   return feed, feed.findall('.//channel/item/enclosure')

def handle_enclosure(enclosure,options):
   attributes = enclosure.attrib
   if not "url" in attributes:
      raise Eception("enclosure, but no url")
   #
   url = urlparse(attributes["url"])
   fname = os.path.basename(url.path)
   #
   if os.path.exists(done_file(fname)):
      # Touch the done_file().  That way, done_file()s which are more than a
      # little older than the feed file itself no longer occur in the feed, and
      # might safely be deleted.  This must, therefore, be the first of this
      # sequence of steps.
      #
      touch(done_file(fname))
      # log("done:", fname)
      return
   #
   if os.path.exists(fname):
      log("ready:", fname)
      return
   #
   if os.path.exists(download_file(fname)):
      log("downloaded:", fname)
      return
   #
   if options.catchup:
      log("touch:", fname)
      touch(done_file(fname))
      return
   #
   # OK.  Download the file...
   wget(urlunparse(url),download_file(fname))
   touch(done_file(fname))

# ###########################################################################
# Handle cgi requests...

def handle_cgi():
   roots = read_roots()
   if len(roots) != 1:
      raise Exception("for CGI, there must be a single, valid root")
   #
   root = roots[0][1]
   #
   if os.environ['PATH_INFO'] == '/' or os.environ['PATH_INFO'] == '/index.html':
      prefix = "http://{}{}/".format(os.environ['HTTP_HOST'],os.environ['SCRIPT_NAME'])
      feed_index(root,prefix)
      return
   #
   path = root + os.environ['PATH_INFO']
   #
   # A few simple checks for potentially malicious arguments...
   # These checks do not render podrecast secure!
   #
   if not path_has_prefix(path,root):
      raise Exception("PATH_INFO, invalid", path)
   if '..' in path:
      raise Exception("PATH_INFO, dangerous looking path", path)
   if 1000 < len(path):
      raise Exception("PATH_INFO, path too long", path)
   #
   if os.path.isdir(path) and os.access(path, os.R_OK) and os.access(path, os.X_OK):
      return handle_cgi_directory(path)
   if os.path.isfile(path) and os.access(path, os.R_OK):
      return handle_cgi_file(path)
   #
   raise Exception("PATH_INFO, path not found", path)

def path_has_prefix(path,prefix):
   path = os.path.realpath(path)
   prefix = os.path.realpath(prefix)
   return path.startswith(prefix)

# ###########################################################################
# Handle cgi requests for feeds...

def handle_cgi_directory(path):
   chdir(path)
   #
   if not os.path.isfile(feed_file()):
      raise Exception("feed file does not exist", path)
   if not os.access(feed_file(), os.R_OK):
      raise Exception("feed file not readable", path)
   #
   feed, enclosures = slurp_enclosures(feed_file())
   for enclosure in enclosures:
      attributes = enclosure.attrib
      url = urlparse(attributes["url"])
      fname = os.path.basename(url.path)
      if not os.path.isfile(fname) or not os.access(fname, os.R_OK):
         item = enclosure.getparent()
         item.getparent().remove(item)
         continue
      #
      http_host = os.environ['HTTP_HOST']
      script_name = os.environ['SCRIPT_NAME']
      path_info = os.environ['PATH_INFO']
      attributes["url"] = "http://{}{}{}/{}".format(http_host,script_name,path_info,fname)
   #
   mime = 'text/xml'
   encoding = mime_encoding(feed_file(), "utf-8");
   content = etree.tostring(feed, pretty_print=True, xml_declaration=True, encoding=encoding)
   print 'Content-Type:', mime
   print
   if head_request():
      return
   print content

# ###########################################################################
# Handle cgi requests for files...

def handle_cgi_file(path):
   mime = mime_type(path, 'audio/mpeg')
   stat = os.stat(path)
   print 'Content-Type:', mime
   print 'Content-Length:', stat.st_size
   print
   if head_request():
      return
   sys.stdout.flush()
   fd = os.open(path, os.O_RDONLY)
   fd_copy(fd,sys.stdout.fileno())

# ###########################################################################
# Show files ready for post processing...

def show_downloaded():
   for IGNORED, root in read_roots(True):
      for path, dirs, files in os.walk(root):
         for f in files:
            if f.endswith(DOWNLOAD_SUFFIX):
               print os.path.join(path,f)

# ###########################################################################
# Generate an index of the available feeds...

def feed_index(root,prefix):
   chdir(root)
   formatted_feeds = "\n".join(html_feed_items(prefix))
   #
   title = 'PodRecast Feeds'
   a_little_css_for_mobile_devices = '''
      <style type="text/css">
         body
         {
            background-color: #101010;
            color: #606060;
            font-family: "Lucida Sans Unicode", "Lucida Grande", sans-serif;
            font-size: 150%;
         }
      </style>
      '''
   #
   print 'Content-Type: text/html'
   print
   if head_request():
      return
   print '<html>'
   print '<head>'
   print a_little_css_for_mobile_devices
   print '<title>{}</title>'.format(title)
   print '</head>'
   print "<h1>{}</h1>".format(title)
   print '<dl>'
   print formatted_feeds
   print '</dl>'
   print '</html>'

def html_feed_items(prefix):
   for feed, url in list_feeds():
      if os.path.isdir(feed):
         if os.access(os.path.join(feed,feed_file()), os.R_OK):
            yield html_feed_item(prefix,feed)

def list_feeds(config=None):
   if not config:
      config = choose_config(True)
   for line in read_config(config):
      if line[0] != 'root':
         yield line

def html_feed_item(prefix,feed):
   url = prefix+feed
   title = feed.replace('/',', ')
   #
   text  = ''
   text += '  <dt>\n'
   text += '     <a href="{0}" type="<text/xml>">{1}</a>\n'.format(url, title)
   text += '  </dt>\n'
   text += '  <dd>\n'
   text += '     <tt><pre>{0}</pre></tt><br>\n'.format(url)
   text += '  </dd>\n'
   #
   return text

# ###########################################################################
# Register files as post processed, and now ready for delivery...

def strip_suffix(text, suffix):
    return text[:len(text)-len(suffix)]

# TODO:
# This should install as many as it can, then report however
# many errors it encountered.
def make_ready(files):
   global errors
   for f in files:
      try:
         if not os.path.isfile(f):
            raise Exception("file does no exist: " + f)
         if not f.endswith(DOWNLOAD_SUFFIX):
            raise Exception("not a podrecast download file: " + f)
         #
         dst = strip_suffix(f,DOWNLOAD_SUFFIX) 
         #
         if os.path.exists(dst):
            raise Exception("target exist: " + dst)
         #
         os.rename(f,dst)
         log("install:", dst)
      #
      except (KeyboardInterrupt, SystemExit):
         raise
      except Exception as error:
         elog("install error:", str(error))
         errors += 1
   #
   if 0 < errors:
      elog("errors:", str(errors))
      sys.exit(1)

# ###########################################################################
# Main...

if __name__ == "__main__":
   option_handled = False
   #
   if cgi:
      try:
         handle_cgi()
      except Exception as error:
         cgi_error(str(error))
      sys.exit(0)
   #
   if options.downloaded:
      show_downloaded()
      option_handled = True
   #
   if options.ready:
      try:
         make_ready(args)
      except Exception as error:
         elog(str(error))
         sys.exit(1)
      option_handled = True
   #
   if not option_handled:
      download_feeds(choose_config(),options)
      if ( 0 < errors ):
         elog("errors:", str(errors))
         sys.exit(1)
   #
   sys.exit(0)

