"""HTML parser classes."""
# Copyright (C) 2013 Scott Garrett <mail@exovenom.net>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.

import datetime
import getpass
import html.entities
import html.parser
import http.cookiejar
import logging



logger = logging.getLogger(__name__)



class EditEntryLinks(html.parser.HTMLParser):
    """Makes a list of all of the entry editor URLs on a VBulletin Blogs page.

    Instance attributes:
    urls -- the list of entry editor URLs found on the page
    next_page_url -- the URL of the next page of blog entries or None
    """
    def __init__(self):
        super().__init__(strict=False)
        self.urls = []
        self.next_page_url = None


    def handle_starttag(self, tag, attrs):
        attrs = dict(attrs)
        if tag == "a" and "rel" in attrs:
            rel = attrs["rel"]
            # Entry editor
            if rel == "nofollow" and attrs.get("class") == "edit_blog":
                logger.debug("Found entry editor URL: %s", attrs["href"])
                self.urls.append(attrs["href"])
            # Next page
            elif rel == "next":
                self.next_page_url = attrs["href"]
                logger.debug("Found next page URL: %s", self.next_page_url)



class DJParser(html.parser.HTMLParser):
    """Parses DV dream journal entries.

    Class attributes:
    category_map -- mapping of checkbox id names to the tag they represent

    Instance attributes:
    date -- date of the dream journal entry
    title -- title of the entry
    tags -- category tags describing the type of journal entry
    """
    category_map = {
        'cb_2': 'lucid',
        'cb_3': 'non-lucid',
        'cb_4': 'nightmare',
        'cb_5': 'false awakening',
        'cb_6': 'memorable',
        'cb_7': 'task of the month',
        'cb_8': 'task of the year',
        'cb_9': 'dream fragment',
        'cb_10':'side notes'
    }


    def __init__(self):
        super().__init__(strict=False)
        self.reset()


    def reset(self):
        logger.debug("Reset parser")
        super().reset()

        self._date = {
            'year': None,
            'month': None,
            'day': None,
            'hour': None,
            'minute': None
        }
        self.date = None
        self.title = None
        self.tags = []
        self.entry = []

        self.keep_reading_entry = False


    def handle_starttag(self, tag, attrs):
        attrs = dict(attrs)
        if tag == "input":
            eid = attrs.get("id")
            # Title of entry
            if eid == "titlefield":
                self.title = attrs["value"]
                logger.debug("Found entry title: %s", self.title)
            # A selected category tag
            elif attrs.get("name") == "categories[]" and "checked" in attrs:
                category = DJParser.category_map.get(attrs["id"])
                logger.debug("Found entry category tag: %s", category)
                self.tags.append(category)
            # Day of entry
            elif eid == "publish_date":
                n = int(attrs["value"])
                logger.debug("Found entry publish day: %s", n)
                self._date["day"] = n
            # Year of entry
            elif eid == "publish_year":
                n = int(attrs["value"])
                logger.debug("Found entry publish year: %s", n)
                self._date["year"] = n
            # Hour of entry
            elif eid == "publish_hour":
                n = int(attrs["value"])
                logger.debug("Found entry publish hour: %s", n)
                self._date["hour"] = n
            # Minute of entry
            elif attrs.get("name") == "publish[minute]":
                n = int(attrs["value"])
                logger.debug("Found entry publish minute: %s", n)
                self._date["minute"] = n
        # Month of publish. If we hit this, use ``self._month`` as a flag...
        elif tag == "select" and attrs.get("name") == "publish[month]":
            logger.debug("Found entry publish month selection area")
            self._date["month"] = 0
        # ...to actually set it when we find the selected month option.
        elif self._date["month"] == 0 and tag == "option" and "selected" in attrs:
            n = int(attrs["value"])
            logger.debug("Found entry publish month: %s", n)
            self._date["month"] = n
        # The journal entry. Set a flag to collect its body if we hit it.
        elif tag == "textarea" and attrs.get("id") == "vB_Editor_001_editor":
            self.keep_reading_entry = True
            logger.debug("Found entry text area")


    def handle_data(self, data):
        # Append contents if we're supposed to be collecting the journal entry.
        if self.keep_reading_entry:
            logger.debug("Append chunk: %s char(s)", len(data))
            self.entry.append(data)


    def handle_entityref(self, name):
        # Convert and append the appropriate character if we run into a
        # character entity while parsing and collecting the journal entry.
        if self.keep_reading_entry:
            c = chr(html.entities.name2codepoint.get(name, 0x20))
            logger.debug("Append entity: %s -> %s", name, c)
            self.entry.append(c)


    def handle_charref(self, name):
        # Convert and append the appropriate character if we run into a
        # character reference while parsing and collecting the journal entry.
        if self.keep_reading_entry:
            c = chr(int(name[1:], 16)) if name.startswith('x') else chr(int(name))
            logger.debug("Append char ref: %s -> %s", name, c)
            self.entry.append(c)


    def handle_endtag(self, tag):
        # If we hit the end of the journal entry textarea, join the chunks of
        # the journal entry into one string and unset the flag.
        if tag == "textarea" and self.keep_reading_entry:
            logger.debug("Concatenating")
            self.entry = ''.join(self.entry).replace("\r", "")
            logger.debug("Done parsing entry text area")
            self.keep_reading_entry = False
        # When we hit the end of the body, create a datetime object from the
        # publish date parts.
        elif tag == "body":
            self.date = datetime.datetime(**self._date)

