#!/usr/bin/env python2
__author__ = "Hindol Adhya"
__license__ = "GPL"
__version__ = "0.1"
__email__ = "hindol.adhya@gmail.com>"
__status__ = "Development"


# A function that prints only when -v/--verbose is specified. Function gets defines inside main().
verbose_print = lambda *a: None


def parse_bytes(str_rep):
    """Decode strings like 150M and return size in bytes. Supported suffixes: G, M, K and B and no-suffix."""
    import re

    multiplier = {'G':1024**3, 'M':1024**2, 'K':1024, 'B':1}

    match = re.match('\d+[GMKB]$', str_rep)
    if match:
    	count, suffix = str_rep[:-1], str_rep[-1]
    	return int(count) * multiplier[suffix]
    elif str_rep.isdigit():
    	return int(str_rep)
    else:
    	return 0


class DownloadManager:
    """Implements primitive url management and parallel downloading of files."""    
    
    ## List of urls to download
    __urls = []

    ## Proxy server for downloads
    __proxy = ""

    ## Maximum file size in bytes allowed by proxy server (valid if proxy is specified)
    __limit = 0 # 0 means no limit

    def set_limit(self, limit_in_bytes):
        """Set maximum allowed file size in bytes"""
        self.__limit = limit_in_bytes
        verbose_print("Limit:", self.__limit)
    
    def get_effective_url(self, url_to_get):
        """Get to the effective (absolute) url of the resource by following redirects."""
        import pycurl
        
        c = pycurl.Curl()
        
        c.setopt(c.URL, url_to_get)
        
        # If url is not final, follow redirects to get effective url.
        c.setopt(c.FOLLOWLOCATION, 1)
        c.setopt(c.NOBODY, 1)
        c.perform()
        
        effective_url = c.getinfo(c.EFFECTIVE_URL)
        c.close()
        
        verbose_print("Effective url:", effective_url)
        return effective_url
        
    def accepts_byte_ranges(self, effective_url):
        """Test if the server supports multi-part file download. Method expects effective (absolute) url."""
        import pycurl
        import cStringIO
        import re
        
        c = pycurl.Curl()
        header = cStringIO.StringIO()
        
        # Get http header
        c.setopt(c.URL, effective_url)
        c.setopt(c.NOBODY, 1)
        c.setopt(c.HEADERFUNCTION, header.write)
        c.perform()
        c.close()
        
        header_text = header.getvalue()
        header.close()

        verbose_print(header_text)
        
        # Check if server accepts byte-ranges
        match = re.search('Accept-Ranges:\s+bytes', header_text)
        if match:
            return True
        else:
            # If server explicitly specifies "Accept-Ranges: none" in the header, we do not attempt partial download.
            match = re.search('Accept-Ranges:\s+none', header_text)
            if match:
                return False
            else:
                c = pycurl.Curl()
                
                # There is still hope, try a simple byte range query
                c.setopt(c.RANGE, '0-0') # First byte
                c.setopt(c.URL, effective_url)
                c.setopt(c.NOBODY, 1)
                c.perform()
                
                http_code = c.getinfo(c.HTTP_CODE)
                c.close()
                
                if http_code == 206: # Http status code 206 means byte-ranges are accepted
                    return True
                else:
                    return False

    def get_content_length(self, effective_url):
        """Get content-length by inspecting http header only"""
        import pycurl
        
        c = pycurl.Curl()
        
        # Get http header
        c.setopt(c.URL, effective_url)
        c.setopt(c.NOBODY, 1)
        c.perform()

        content_length = int(c.getinfo(c.CONTENT_LENGTH_DOWNLOAD))
        verbose_print("Content-length:", content_length)
        
        c.close()
        return content_length
    
    def download(self, url_to_get, download_dir=""):
        """Download the file at url"""
        import pycurl
        import cStringIO
        from urllib2 import urlparse
        import os
        
        effective_url = self.get_effective_url(url_to_get)
        content_length = self.get_content_length(effective_url)
        filename = urlparse.urlsplit(effective_url).path.split('/')[-1]
        
        if download_dir == "":
            download_dir = os.environ['HOME']
            verbose_print("Download directory:", download_dir)
        
        if self.__limit <= 0 or content_length <= self.__limit:
            c = pycurl.Curl()
            
            # Download the file
            fp = open("{}/{}".format(download_dir, filename), "wb")
            
            c.setopt(c.WRITEDATA, fp)
            c.setopt(c.URL, effective_url)
            c.perform()
            c.close()
        else:
            # We must attempt multi-part download now
            accepts_range = self.accepts_byte_ranges(effective_url)
            
            if accepts_range:
                verbose_print("Server accepts byte-ranges.")
                
                chunk_count = (content_length - 1) // self.__limit + 1
                verbose_print("Parts:", chunk_count)
                
                for i in range(0, chunk_count):
                    start = i * self.__limit
                    end = (start + self.__limit - 1) if (i < chunk_count - 1) else content_length
                    
                    c = pycurl.Curl()
                    
                    # Download the file
                    fp = open("{}/{}".format(download_dir, filename), "a+b")
                    
                    c.setopt(c.WRITEDATA, fp)
                    c.setopt(c.URL, effective_url)
                    c.setopt(c.RANGE, "{}-{}".format(start, end))
                    
                    verbose_print("Range: {}-{}".format(start, end))
                    
                    c.perform()
                    c.close()
            else:
                verbose_print("Server does not accept byte-ranges.")
                verbose_print("Download failed due to proxy server restrictions.")
                return
    
    def download_all(self, download_dir=""):
        """Download all urls stored in manager"""                
        for url in self.__urls:
            self.download(url, download_dir)
        
    def add_url(self, url_to_get):
        """Add url to manager"""
        self.__urls.append(url_to_get)            


def main():
    import argparse
    
    parser = argparse.ArgumentParser()
    
    parser.add_argument("url", help="Url of file to download.")
    parser.add_argument("-p", "--proxy",
        help="Proxy server to use for downloading. Inherits from $http_proxy if unspecified.")
    parser.add_argument("-l", "--limit", help="If your proxy server has a filesize limit, specify that here. E.g. 150M.")
    parser.add_argument("-v", "--verbose", action="store_true",
        help="Increase the verbosity of script (helps in debugging.)")
    
    args = parser.parse_args()

    # Define a function that print only when -v/--verbosity is specified in the command line.
    if args.verbose:
        def anonymous(*args):
            # Print each argument separately so caller doesn't need to
            # stuff everything to be printed into a single string
            for arg in args:
               print arg,
            print
        
        global verbose_print
        verbose_print = anonymous

    download_mgr = DownloadManager()

    if args.limit is not None:
        limit_in_bytes = parse_bytes(args.limit)
        download_mgr.set_limit(limit_in_bytes)

    download_mgr.add_url(args.url)
    
    download_mgr.download_all()


if __name__ == '__main__':
    main()