# -*- coding: utf-8 -*-
# Copyright (C) 2010  Michał Masłowski  <mtjm@mtjm.eu>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.


"""
Functions caching data from `getmediumurl.reader.URLReader` subclasses.
"""


from __future__ import with_statement

from heapq import heappush, heappop
import os.path
import random

from getmediumurl.reader import ReaderError, NotFoundError, StaticURLReader


__all__ = ("dict_cache", "directory_cache")


def dict_cache(urlreader, max_size=None):
    """Return a function returning a cached instance of `urlreader`.

    The argument of returned function is an URL string, the same
    return value is used for multiple calls with the same argument.

    The `max_size` argument is the maximum number of URLs cached, or
    `None` (unlimited).  The URLs last applied to the returned
    function are removed first.
    """
    if max_size is not None:
        if not isinstance(max_size, int):
            raise ValueError("max_size must be an int")
        if max_size < 1:
            raise ValueError("max_size must be greater than zero")

    # None maps to the number of accesses, strings map to lists of
    # current cache[None], URL and URLReader.
    cache = {None: 0}
    urls = []  # heap of cache entries

    def cached_urlreader(url):
        """Return `urlreader` instance using the `url`."""
        cache[None] += 1
        try:
            value = cache[url][2]
        except KeyError:
            try:
                value = urlreader(url)
            except ReaderError, ex:
                value = ex
            cache[url] = [cache[None], url, value]
            if max_size is not None:
                heappush(urls, cache[url])
                if len(urls) > max_size:
                    eldest_url = heappop(urls)[1]
                    del cache[eldest_url]
        if isinstance(value, ReaderError):
            raise value
        else:
            return value

    return cached_urlreader


def directory_cache(urlreader, directory):
    """Return a function returning a cached instance of `urlreader`.

    The argument of returned function is an URL string, the same
    return value is used for multiple calls with the same argument.

    The `directory` argument specifies the directory where the data
    obtained is stored.  Data format might depend on installed
    packages and their versions, and might be changed in any
    GetMediumURL release.  If the needed files are not found, then
    they are replaced with empty ones (but the directory is not
    created).
    """

    import anydbm

    index = anydbm.open(os.path.join(directory, "index"), "c", 0600)

    def cached_urlreader(url):
        """Return `urlreader` instance using the `url`."""
        # dumbdbm doesn't support Unicode strings, while dbm.dumb
        # does.
        if type("") != type(u""):
            eurl = url.encode("ascii")
        else:
            eurl = url
        # raise exception if cached message found
        message = index.get("e" + eurl, None)
        if message is not None:
            raise NotFoundError(message)
        # get cached data
        content_filename = index.get("c" + eurl, None)
        if content_filename is None:
            content = None
        else:
            with open(os.path.join(directory,
                                   content_filename.decode("ascii")), "rb") \
                    as content_file:
                content = content_file.read()
        target = index.get("r" + eurl, url)
        content_type = index.get("t" + eurl, "")
        # return if found
        if (content is not None) or (target != url):
            return StaticURLReader(url, content=content, target=target,
                                   content_type=content_type)
        # get the data and write cache
        try:
            data = urlreader(url)
        except NotFoundError, ex:
            # cache the error message
            index["e" + eurl] = repr(ex.args)
            raise ex
        else:
            # cache the data
            content = data.content
            if content is not None:
                # Make a file name.
                name = None
                while name is None or name in index:
                    name = "".join(random.choice("0123456789")
                                   for i in xrange(0, 8))
                index[name] = ""
                with open(os.path.join(directory, name), "wb") as content_file:
                    content_file.write(content)
                index["c" + eurl] = name
            if data.url is not None:
                index["r" + eurl] = data.url.encode("ascii")
            index["t" + eurl] = data.content_type.encode("ascii")
            return data

    return cached_urlreader
