# -*- coding: utf-8 -*-
# Copyright (C) 2010, 2011  Michał Masłowski  <mtjm@mtjm.eu>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.


"""
This module provides the base class to get data of a document.
"""


from __future__ import absolute_import

from urlreader.exceptions import ReaderError


__all__ = ("Document",)


class Document(object):

    """Access to content and metadata of a document at an URL.

    The constructor's arguments are the URL where the document is
    located and optionally static values used by `url` (named
    *target*), `content_type` and `size` properties.

    Iterating over an instance of this class gives fragments of the
    document content, using `bytes` objects.

    Applications using URLReader will probably obtain instances of
    this class or its subclasses only using the
    `urlreader.urlreader.URLReader.fetch` method.
    """

    def __init__(self, url, target=None, content_type=u"", size=None):
        """Make a `Document` instance for a specific document."""
        #: The original URL of the document.
        self._url = url
        self._target = target if target is not None else url
        self._content_type = content_type
        self._size = size

    def __iter__(self):
        """Iterate over `bytes` fragments of the document."""
        assert self
        raise ReaderError("Document.__iter__ not overridden")

    def prepare_content(self):
        """Fetch content if needed for cache.

        Does nothing unless the cache has a use for it.
        """
        assert self

    @property
    def content(self):
        """The document content as a `bytes` object.

        Raises `urlreader.exceptions.ReaderError` if the content is
        known to be larger than 1 MiB.
        """
        if self.size is not None and self.size > 1024 * 1024:
            raise ReaderError("Document too large for the content method")
        return "".encode("ascii").join(self)

    @property
    def size(self):
        """The document's size in octets or `None` if unknown."""
        return self._size

    @property
    def content_type(self):
        """The document's MIME Content-Type header.

        An empty string is returned in case of insufficient data for a
        meaningful answer.
        """
        return self._content_type

    @property
    def url(self):
        """The target URL of the document.

        Should be different than the URL used to obtain this document
        in case of e.g. HTTP redirects.
        """
        return self._target

    @property
    def request_url(self):
        """The requested URL before any redirection."""
        return self._url
