# -*- coding: utf-8 -*-
# Copyright (C) 2010 Osama Khalid osamak[at]gnu.org
# Copyright (C) 2010, 2011  Michał Masłowski  <mtjm@mtjm.eu>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.


# Based on youogg's utilities.py file from SVN revision 50.


"""
Implementations of `urlreader.backend.Backend`.
"""


from __future__ import absolute_import, unicode_literals

import socket

from urlreader._compat import build_opener, URLError, HTTPError
from urlreader.backend import Backend
from urlreader.exceptions import ReaderError, NotFoundError
from urlreader.document import Document


__all__ = ("UrllibBackend",)


socket.setdefaulttimeout(30)


#: Largest document which is kept cached between iterations.
_MAX_CACHED = 1024 * 1024


class _UrllibDocument(Document):

    """A document with content fetched by `urllib2`."""

    def __init__(self, url, document, *args, **kwargs):
        """Make an instance with specified data."""
        super(_UrllibDocument, self).__init__(url, *args, **kwargs)
        self._document = document
        self._content = None

    def __iter__(self):
        """Iterate content fragments."""
        if self._content is not None:
            if self._size > _MAX_CACHED:
                raise ReaderError("iterated again, but too large to keep")
            yield self._content
            return
        content = "".encode("ascii")
        downloaded = 0
        while self._size is None or downloaded < self._size:
            try:
                fragment = self._document.read(4096)
            except URLError as ex:
                raise ReaderError(ex)
            if not fragment:
                break
            downloaded += len(fragment)
            if self._size < _MAX_CACHED:
                content += fragment
            yield fragment
        self._document.close()
        self._content = content
        if self._size is None:
            self._size = downloaded


class UrllibBackend(Backend):

    """Find documents at URLs using `urllib2`.

    A separate connection is used for each fetch, documents are not
    compressed and they are refetched even if they are unchanged on
    the server.
    """

    def __init__(self):
        """Initialize."""
        super(UrllibBackend, self).__init__()
        self._opener = build_opener()

    def _set_user_agent(self, user_agent):
        """Set the HTTP ``User-Agent`` header used."""
        self._opener.addheaders = [("User-Agent", user_agent)]

    user_agent = property(fset=_set_user_agent)

    def fetch(self, url):
        """Read an *url*."""
        try:
            document = self._opener.open(url)
        except HTTPError as ex:
            if ex.code == 404:
                raise NotFoundError(ex)
            else:
                raise HTTPError(ex.code, ex)
        except URLError as ex:
            raise ReaderError(ex)
        info = document.info()
        try:
            size = int(info["Content-Length"])
        except KeyError:
            size = None
        return _UrllibDocument(url, document=document, target=document.url,
                               content_type=info.get("Content-Type", ""),
                               size=size)

    # TODO: implement refetch
