# -*- coding: utf-8 -*-
# Copyright (C) 2010, 2011  Michał Masłowski  <mtjm@mtjm.eu>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.


"""
Implementations of `urlreader.cache.Cache`.
"""


from __future__ import absolute_import

from heapq import heappush, heappop
import os.path
import random
import anydbm

from urlreader.exceptions import ReaderError, NotFoundError
from urlreader.cache import Cache
from urlreader.document import Document


__all__ = ("DictionaryCache", "DirectoryCache")


class DictionaryCache(Cache):

    """A cache keeping documents in a `dict`.

    The *max_size* argument of the constructor is the maximum number
    of URLs cached (an `int`), or `None` (unlimited).  The URLs last
    assigned are removed first.

    Currently documents cannot be manually removed from the cache.
    """

    def __init__(self, max_size=None):
        """Make a new dictionary cache."""
        super(DictionaryCache, self).__init__()
        if max_size is not None:
            if not isinstance(max_size, int):
                raise TypeError("max_size must be an int")
            if max_size < 1:
                raise ValueError("max_size must be greater than zero")
        self._max_size = max_size
        # TODO: refactor it in simpler way.
        # None maps to the number of accesses, strings map to lists of
        # current cache[None], URL and URLReader.
        self._cache = {None: 0}
        self._urls = []  # heap of cache entries

    def __getitem__(self, url):
        """Get a cached `Document` of specified *url*."""
        value = self._cache[url][2]
        if isinstance(value, ReaderError):
            raise value
        else:
            return value

    def __setitem__(self, url, document):
        """Store a *document* in the cache."""
        self._cache[None] += 1
        self._cache[url] = [self._cache[None], url, document]
        if self._max_size is not None:
            heappush(self._urls, self._cache[url])
            if len(self._urls) > self._max_size:
                eldest_url = heappop(self._urls)[1]
                del self._cache[eldest_url]

    # TODO: support deleting objects from the cache

    def __iter__(self):
        """Iterate keys of documents in the cache."""
        return iter(self._cache)


class _StaticDocument(Document):

    """A document with data completely from the cache."""

    def __init__(self, url, content, *args, **kwargs):
        """Make an instance with specified data."""
        super(_StaticDocument, self).__init__(url, *args, **kwargs)
        self._content = content
        if self._content is not None:
            self._size = len(content)

    def __iter__(self):
        """Iterate content fragments."""
        yield self._content

    @property
    def content(self):
        """The document content."""
        return self._content


class _FetchingDocument(Document):

    """A document saving its data while fetching."""

    def __init__(self, document, index, cache_dict, directory):
        """Make an instance proxying specified *document*.

        File name will be saved in the database *index* and content to
        *directory*.  After it's complete, this object will be removed
        from *cache_dict* indexed by the URL.
        """
        super(_FetchingDocument, self).__init__(document.request_url,
                                                document.url,
                                                document.content_type,
                                                document.size)
        self._document = document
        self._index = index
        self._saved = False
        self._cache_dict = cache_dict
        self._directory = directory

    def _get_file_name(self):
        """Return file object to save content to."""
        # Make a file name.
        eurl = self.request_url.encode("ascii")
        name = None
        while name is None or name.encode("ascii") in self._index:
            name = "".join(random.choice("0123456789")
                           for i in xrange(0, 8))
        self._index[name.encode("ascii")] = ""
        self._index[b"c" + eurl] = name.encode("ascii")
        return os.path.join(self._directory, name)

    def __iter__(self):
        """Iterate document fragments."""
        if not self._saved:
            with open(self._get_file_name(), "wb") as file_object:
                for fragment in self._document:
                    file_object.write(fragment)
                    yield fragment
            del self._cache_dict[self.request_url]
            return
        with open(self._get_file_name(), "rb") as file_object:
            while True:
                fragment = file_object.read(4096)
                if fragment:
                    yield fragment
                else:
                    return

    def prepare_content(self):
        """Cache the document content."""
        if self._saved:
            return
        with open(self._get_file_name(), "wb") as file_object:
            for fragment in self._document:
                file_object.write(fragment)
        del self._cache_dict[self.request_url]


class DirectoryCache(Cache):

    """A cache storing documents persistently in files.

    The data format used might depend on installed packages and their
    versions, and might be changed in any URLReader release.  If the
    needed files are not found, then they are replaced with empty ones
    (but the *directory* used is not created).

    Currently the cache cannot be iterated and its documents cannot be removed.
    """

    def __init__(self, directory):
        """Make new instance storing documents in specified *directory*."""
        super(DirectoryCache, self).__init__()
        self._directory = directory
        self._index = anydbm.open(os.path.join(directory, "index"), "c", 0600)
        self._fetching_cache = {}

    def __getitem__(self, url):
        """Get a cached `Document` of specified *url*."""
        try:
            return self._fetching_cache[url]
        except KeyError:
            assert True
        eurl = url.encode("ascii")
        # raise exception if cached message found
        try:
            message = self._index[b"e" + eurl]
        except KeyError:
            message = None
        if message is not None:
            raise NotFoundError(message)
        # get cached data
        try:
            content_filename = self._index[b"c" + eurl]
        except KeyError:
            content_filename = None
        if content_filename is None:
            content = None
        else:
            with open(os.path.join(self._directory,
                                   content_filename.decode("ascii")), "rb") \
                    as content_file:
                content = content_file.read()
        try:
            target = self._index[b"r" + eurl]
        except KeyError:
            target = url
        try:
            content_type = self._index[b"t" + eurl]
        except KeyError:
            content_type = ""
        # return if found
        if (content is not None) or (target != url):
            return _StaticDocument(url, content=content, target=target,
                                   content_type=content_type)
        else:
            raise KeyError()

    def __setitem__(self, url, document):
        """Store a *document* in the cache."""
        if type("") != type(u""):
            eurl = url.encode("ascii")
        else:
            eurl = url
        if isinstance(document, NotFoundError):
            # cache the error message
            self._index["e" + eurl] = repr(document.args)
            return document
        else:
            # cache the data
            if document.url is not None:
                self._index["r" + eurl] = document.url.encode("ascii")
            self._index["t" + eurl] = document.content_type.encode("ascii")
            self._fetching_cache[url] = _FetchingDocument(document,
                                                          self._index,
                                                          self._fetching_cache,
                                                          self._directory)

    # TODO: support deleting objects from the cache and iterating their URLs
