"""Module to process Project Gutenberg E-Texts into a more usable format."""


from __future__ import absolute_import
from .data.headers import HEADERS
from .data.footers import FOOTERS
import collections
import logging
import os


def strip_headers(data):
    """Remove lines that are part of the Project Gutenberg header or footer.

    Args:
        data: A path to a file to filter, an iterable over some lines to filter
          or a raw text to filter.

    Returns:
        str: The text that don't belong to a header or footer.

    Raises:
        ValueError: If data is not a path to an existing file, an iterable over
          text lines or a raw text to filter.

    """
    if isinstance(data, basestring) and os.path.exists(data):
        data = open(data)
    elif isinstance(data, basestring):
        data = data.splitlines()
    elif isinstance(data, collections.Iterable):
        data = iter(data)
    else:
        raise ValueError('data ')

    try:
        return os.linesep.join(_strip_headers(data))
    finally:
        if isinstance(data, file):
            data.close()


def _strip_headers(iterable):
    """This is a port of the C++ utility by Johannes Krugel (link:
    http://www14.in.tum.de/spp1307/src/strip_headers.cpp)


    Args:
        iterable (iter): An iterable over some lines to filter.

    Returns:
        list: The lines that don't belong to a header or footer.

    """
    out = []
    i = 0
    reset = True
    footer_found = False

    for lineno, line in enumerate(iterable, start=1):
        if len(line) <= 12:
            continue  # just a shortcut for short lines

        reset = False

        if i <= 600:
            # Check if the header ends here
            if any(line.startswith(header) for header in HEADERS):
                logging.debug('found end of header on line %s', lineno)
                reset = True

            # If it's the end of the header, delete the output produced so far.
            # May be done several times, if multiple lines occur indicating the
            # end of the header
            if reset:
                out = []
                continue

        if i >= 100:
            # Check if the footer begins here
            if any(line.startswith(footer) for footer in FOOTERS):
                logging.debug('found start of footer on line %s', lineno)
                footer_found = True

            # If it's the beginning of the footer, stop output
            if footer_found:
                break

        out.append(line.strip())
        i += 1

    return out
