"""Object cache for URLs with garbage collection"""

import os
from os import path
import hashlib
import numpy
import urlparse
from viset.util import isarchive, isurl, isimg, ishdf5, isfile
import viset.download
import pylab
import string
import shutil
import h5py

class Cache():
    _cacheroot = os.environ.get('VISYM_CACHE')
    if _cacheroot is None:
        _cacheroot = path.join(os.environ['HOME'],'.visym','cache')
    if not path.exists(_cacheroot):
        os.makedirs(_cacheroot)
    _maxsize = None
    _verbose = None
    _strategy = None
    _free_maxctr = 100
    _free_ctr = _free_maxctr
    _cachesize = None  # async result
    _prettyhash = True
    
    def __init__(self, cacheroot=_cacheroot, maxsize=10E9, verbose=False, strategy='lru', refetch=True):
        if cacheroot is not None:
            self._cacheroot = cacheroot
        self._maxsize = maxsize
        self._verbose = verbose
        self._strategy = strategy
        self._refetch = refetch
            
    def __len__(self):
        if self._cachesize is not None:
            return self._cachesize.get()
        else:
            return self.size()
        
    def __str__(self):
        return str('this is a string')

    def _url_fragment_options(self, url, sha1=None):
        default_opts = {'sha1':sha1, 'item':None, 'reader':None, 'id':None}
        url_scheme = urlparse.urlparse(url)[0]
        url_fragment = urlparse.urlparse(url)[5]   
        opt = default_opts
        fragment = urlparse.parse_qs(url_fragment)
        for key in fragment.keys():
            if type(fragment[key]) is list:
                opt[key] = fragment[key][0]
            else:
                opt[key] = fragment[key]                
        return opt
  
    def read(self, filename, reader=None, id=None):
        quietprint('[viset.cache][HIT]: Reading ' + reader + ' encoded image ' + str(id) + ' from file "' + filename + '"', self._verbose)
        if reader == 'universal':
            return pylab.imread(filename)
        elif reader == 'mnist':
            return viset.library.mnist.imread(filename, int(id))
        else:
            self.quietprint('[viset.cache][ERROR]: reader type "' + str(reader) + '" unsupported', self._verbose)
            raise NotImplementedError()
        
    def unpack(self, filename, sha1=None, item=None):
        """Extract cached archive to archive directory file and delete archive"""
        if isarchive(filename):
            (unpackdir, ext) = viset.util.splitextension(filename)
            if not path.exists(unpackdir):
                os.makedirs(unpackdir)
            viset.download.extract(filename, unpackdir, sha1=sha1, verbose=self._verbose)            
            os.remove(filename)
            if item is not None:
                filename = path.join(unpackdir, item)
            else:
                filename = unpackdir    
        return filename
        
    def load(self, filename, reader=None, id=None):
        """Load a filename to an object"""
        if isimg(filename):
            obj = pylab.imread(filename)
        elif reader is not None:
            obj = self.read(filename, reader=reader, id=id)
        elif path.isdir(viset.util.splitextension(filename)[0]):
            obj = viset.util.splitextension(filename)[0]  # archive directory in cache
        elif isarchive(filename):
            quietprint('[viset.cache][ERROR]: archive file "' + filename + '" not unpacked to "' + (viset.util.splitextension(filename)[0]), self._verbose)
            raise IOError()
        elif ishdf5(filename):
            obj = filename
        elif isfile(filename):
            obj = filename
        else:
            quietprint('[viset.cache][ERROR]: could not load filename "' + filename + '"', self._verbose)
            raise IOError()
        return obj
        
    def download(self, url):        
        """Download element according to URL scheme"""
        self.free()  # garbage collection time?        
        filename = self.cachefile(urlparse.urldefrag(url)[0])
        url_scheme = urlparse.urlparse(url)[0]
        if url_scheme == 'http':
            viset.download.download(url, filename, verbose=self._verbose)                       
        elif url_scheme == 'file':
            pass
        elif url_scheme == 'viset':
            raise NotImplementedError('FIXME: support for viset database queries')                            
        elif url_scheme == 'hdfs':
            raise NotImplementedError('FIXME: support for hadoop distributed file system')                
        else:
            raise NotImplementedError('FIXME: support for URL scheme ' + url_scheme)            
        return filename        

    def delete(self):
        """Delete entire cache"""
        quietprint('[viset.cache]: Deleting all cached data in "' + self._cacheroot + '"', self._verbose)
        shutil.rmtree(self._cacheroot)
        os.makedirs(self._cacheroot)        
        
    def fetch(self, url, sha1=None):
        """Fetch and element and store in cache"""
        urlopts = self._url_fragment_options(url, sha1)
        if isurl(url):            
            if not self.iscached(url):
                quietprint('[viset.cache][MISS]: downloading \'' + str(url[0:63] + '...') + '\'', self._verbose)                                        
                filename = self.download(url)
                if isarchive(filename):
                    quietprint('[viset.cache][MISS]: extracting archive \'' + filename + '\'', self._verbose)
                    if urlopts['item'] is not None:
                        filename = self.unpack(filename, sha1=urlopts['sha1'], item=urlopts['item'])
                    else:
                        filename = self.unpack(filename, sha1=urlopts['sha1'], item=None)                        
            else:
                filename = self.cachefile(url)            
                quietprint('[viset.cache][HIT]: retrieving \'' + filename + '\'', self._verbose)                                                     
        elif viset.util.isfile(self.cachefile(url)):            
            filename = self.cachefile(url)            
            quietprint('[viset.cache][HIT]: retrieving \'' + filename + '\'', self._verbose)                                     
        elif viset.util.isfile(self.abspath(path.basename(url))):
            filename = self.abspath(path.basename(url))
            quietprint('[viset.cache][HIT]: retrieving \'' + filename + '\'', self._verbose)                                     
        else:
            # nothing in cache or backing store
            quietprint('[viset.cache][MISS]: \'' + url + '\' not available in cache', self._verbose)                                     
            return None
        
        quietprint('[viset.cache][HIT]: loading \'' + filename + '\'', self._verbose)                                         
        return self.load(filename, reader=urlopts['reader'], id=urlopts['id'])

    def put(self, obj):
        """Put a numpy object into cache and return cached file""" 
        if 'numpy' not in str(type(obj)):
            if type(obj) in [list, tuple]:
                obj = numpy.array(obj)  # type coersion
            else:
                raise ValueError('numpy object required for caching')                    
        byteview = obj.view(numpy.uint8)
        hash = hashlib.sha1(byteview).hexdigest() # byte view sha1
        filename = self.abspath(hash + '.h5')
        f = h5py.File(filename, 'w')
        f['obj'] = obj
        f.close()
        return filename
        
    def get(self, url, sha1=None):
        """Get an object from cache""" 
        try:
            return self.fetch(url, sha1=sha1)
        except KeyboardInterrupt:
            pass
        except NotImplementedError:
            raise
        except:
            if self._refetch:
                quietprint('[viset.cache][ERROR]: cache fetch error - discarding and refetching', self._verbose)                                                     
                self.discard(url)
                return self.fetch(url, sha1=sha1)
            else:
                raise
            
    def discard(self, url):
        """Delete element from cache"""
        if path.isfile(self.cachefile(url)):
            os.remove(self.cachefile(url))
        if path.isdir(self.cachedir(url)):
            shutil.rmtree(self.cachedir(url))
        
    def size(self, source=_cacheroot):
        """Recursively compute the size of a cache directory: http://snipplr.com/view/47686/"""
        total_size = os.path.getsize(source)
        for item in os.listdir(source):
            itempath = os.path.join(source, item)
            if os.path.isfile(itempath):
                total_size += os.path.getsize(itempath)
            elif os.path.isdir(itempath):
                total_size += self.size(itempath)
        return total_size

    def hash(self, url, prettyhash=_prettyhash):
        """Compute a SHA1 file hash to uniquely name a cached file"""
        p = urlparse.urlsplit(url)
        urlquery = urlparse.urlunsplit([p[0],p[1],p[2],p[3],None])
        urlpath = urlparse.urlunsplit([p[0],p[1],p[2],None,None])        
        (filename, ext) = viset.util.splitextension(urlpath)
        urlopt = self._url_fragment_options(url)
        urlhash = hashlib.sha1(urlquery).hexdigest()
        if prettyhash:    
            return path.basename(filename) + '_' + urlhash[0:7]
        else:
            return urlhash 

    def free(self):
        """Garbage collection"""
        if self._free_ctr == 0:
            if self._cachesize is not None:
                if self._cachesize.get() > self._maxsize:
                    print 'WARNING: cachesize is larger than maximum.  Clean resources!'
            quietprint('[viset.cache.free]: spawning cache garbage collection process', self._verbose)
            self._cachesize = Pool(1).apply_async(self.size(), self._cacheroot)
            self._free_ctr = self._free_maxctr
        self._free_ctr -= 1
    
    def cachefile(self, url):
        """The file name of the element in the cache"""
        p = urlparse.urlsplit(url)
        urlquery = urlparse.urlunsplit([p[0],p[1],p[2],p[3],None])        
        urlpath = urlparse.urlunsplit([p[0],p[1],p[2],None,None])
        urlopt = self._url_fragment_options(url)
        urlhash = self.hash(url)
        if urlopt['item'] is not None:
            return self.abspath(path.join(urlhash, urlopt['item']))
        else:
            (filename, ext) = viset.util.splitextension(path.basename(urlpath))
            return self.abspath(urlhash + ext)

    def cachedir(self, archive_url):
        """The unpacked directory name of archive files in the cache"""
        p = urlparse.urlsplit(archive_url)
        urlpath = urlparse.urlunsplit([p[0],p[1],p[2],p[3],None])
        urlopt = self._url_fragment_options(archive_url)
        urlhash = self.hash(urlpath)
        return self.abspath(urlhash)
        
    def abspath(self, filename):
        """The absolute file path for elements in the cache"""
        return path.join(self._cacheroot, filename)
        
    def iscached(self, url):
        """Return true if an element is in the cache"""
        return path.isfile(self.cachefile(url)) or path.isdir(self.cachedir(url))
            
            
def quietprint(mystr, is_verbose):
    if is_verbose:
        print mystr
            

class CachedResult(object):
    """Mirrors Python's AsyncResult class"""
    _obj = None    
    _url = None
    _verbose = False
    _cache = None

    def __init__(self, url, verbose=True, cache=None):
        self._verbose = verbose
        if cache is None:
            self._cache = Cache(verbose=self._verbose)
        else:
            self._cache = cache
        self._url = url
    
    def successful():
        return (self._obj is not None)
    def wait(timeout):
        socket.setdefaulttimeout(timeout)  # do not set globally        
    def ready():
        return (self._obj is not None)
    
    def get(self):
        self._obj = self._cache.get(self._url)
        return self._obj
        
    def __repr__(self):
        return str('<viset.cache: obj=' + str(type(self._obj)) + ', cached=' + str(self._cache.iscached(self._url)) + ', URL=\'' + str(self._url) + '\'>')        
    

class CachedImage(CachedResult):
    def __repr__(self):
        return str('<viset.image: obj=' + str(type(self._obj)) + ', cached=' + str(self._cache.iscached(self._url)) + ', URL=\'' + str(self._url) + '\'>')        

    
