# -*- coding: utf-8 -*-
# Copyright (C) 2011  Michał Masłowski  <mtjm@mtjm.eu>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.


"""This module contains the class designed to be used in other applications."""


from __future__ import absolute_import

from urlreader.backend import Backend
from urlreader.backends import UrllibBackend
from urlreader.cache import Cache
from urlreader.caches import DictionaryCache
from urlreader.exceptions import (ReaderError, UnknownURLError,
                                  NonWritableCacheError, NotFoundError)


__all__ = ("URLReader",)


_DEFAULT_USER_AGENT = "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.2.6) " \
    "Gecko/20100709 Trisquel/4.0 (taranis) Firefox/3.6.6"


class URLReader(object):

    """Get documents from URLs using specified backends and caches.

    An application should use only one instance of this class, since
    caches and backends used share some data for use with different
    documents.

    The constructor arguments *caches* and *backends* are iterables of
    `urlreader.cache.Cache` or `urlreader.backend.Backend` objects, or
    a single object of such type.  If left as `None` the default
    choice of `urlreader.caches.DictionaryCache` and
    `urlreader.backends.UrllibBackend` will be used.

    If multiple caches are used, the first one doing the task will be
    used, i.e. they will be tried from first to last to get the
    document and it will be saved only to the first one which can be
    written to.
    """

    def __init__(self, caches=None, backends=None):
        """Make a new `URLReader` instance."""
        if caches is None:
            caches = (DictionaryCache(),)
        elif isinstance(caches, Cache):
            caches = (caches,)
        self._caches = caches
        if backends is None:
            backends = (UrllibBackend(),)
        elif isinstance(backends, Backend):
            backends = (backends,)
        self._backends = backends
        self._user_agent = ""  # avoid pylint warning
        self.user_agent = _DEFAULT_USER_AGENT

    @property
    def user_agent(self):
        """
        The HTTP ``User-Agent`` header used by HTTP backends.

        Silently ignored by backends which use a hardwired one or
        don't use protocols using such header.
        """
        return self._user_agent

    @user_agent.setter
    def user_agent(self, value):
        """
        The HTTP ``User-Agent`` header used by HTTP backends.

        Silently ignored by backends which use a hardwired one or
        don't use protocols using such header.
        """
        for backend in self._backends:
            backend.user_agent = value
        self._user_agent = value

    def _get_from_cache(self, url):
        """Get a document from cache or `None` if not found."""
        for cache in self._caches:
            try:
                return cache[url]
            except KeyError:
                assert True

    def _get_from_backend(self, url, document, refetch):
        """Get a document from backend.

        Returns a pair of document and boolean specifying it it was
        replaced.
        """
        for backend in self._backends:
            try:
                # TODO: use only the original backend for refetch
                if refetch and document is not None:
                    new = backend.refetch(document)
                    if new is not None:
                        return new, True
                else:
                    return backend.fetch(url), True
            except UnknownURLError:
                assert True
            except NotFoundError as ex:
                document = ex
                return ex, True
        return document, False

    def fetch(self, url, refetch=False):
        """Make an `urlreader.document.Document` instance for given *url*.

        By default the object is obtained from cache if possible.  If
        the *refetch* argument is set to `True`, then it will be
        refetched unless it is known to be not needed using HTTP
        (e.g. if the server states that it is not modified since
        previous fetch).

        Calling an instance of `URLReader` has the same effect as
        calling this method.
        """
        if url is None:
            raise ValueError("cannot fetch with an url of None")
        document = self._get_from_cache(url)
        if document is not None and not refetch:
            return document
        document, replaced = self._get_from_backend(url, document, refetch)
        if document is None:
            raise ReaderError("Couldn't get %s from any cache or backend"
                              % url)
        if replaced:
            # Replace in first cache which can have it changed.
            for cache in self._caches:
                try:
                    cache[url] = document
                except NonWritableCacheError:
                    assert True
                else:
                    document = cache[url]
                    break
        if isinstance(document, ReaderError):
            raise document
        else:
            return document

    __call__ = fetch
