# Copyright (c) 2014  Niklas Rosenstein
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.

import sys
if sys.version_info[0] == 3:
    string_types = (str,)
else:
    string_types = (basestring,)


def is_data(node):
    """ Returns True if the supplied :class:`xml.dom.Node` is
    a Data node that contains anything other than whitespace. """

    if node.nodeType == node.TEXT_NODE:
        return bool(node.nodeValue.strip())
    return False

def is_element(node):
    """ Returns True if the supplied :class:`xml.dom.Node` is
    an Element node, False if it is not. """

    return node.nodeType == node.ELEMENT_NODE

def iter_children(node):
    """ This function is a generator that iterates over the
    child nodes of the specified :class:`xml.dom.Node`. """

    child = node.firstChild
    while child:
        yield child
        child = child.nextSibling


class Schema(object):
    """ This class represents a compiled XML Schema. Only additions
    can be made in this state, no changes. Use builder class to easily
    create an :class:`Schema`.

    .. seealso:: :class:`SchemaBuilder` """

    @staticmethod
    def validate_child_decl(children_decl):
        for child in children_decl:
            if isinstance(child, Schema):
                if not child.root_tag:
                    message = "'Schema.root_tag' must be set on child schema"
                    raise ValueError(message)
            elif not isinstance(child, str):
                message = "item in 'children' must be str or Schema"
                raise ValueError(message)

    @staticmethod
    def process_attr_decl(attr_decl):
        attrs = {}
        for attr_name in attr_decl:
            if not isinstance(attr_name, string_types):
                raise ValueError('attribute name must be a string')
            attr_name = attr_name.strip()

            options = {'optional': False, 'non-empty': 0}
            while True:
                if attr_name.endswith('?'):
                    attr_name = attr_name[:-1].strip()
                    options['optional'] = True
                    continue
                if attr_name.endswith('!'):
                    attr_name = attr_name[:-1].strip()
                    options['non-empty'] += 1
                    continue
                break

            if not attr_name:
                raise ValueError('attribute name must not be empty')
            attrs[attr_name] = options

        return attrs

    def __init__(self, root_tag=None):
        super(Schema, self).__init__()
        self.root_tag = root_tag
        self.tag_types = {}

    def define_tag(self, name, attrs, children, data_allowed):
        """ Defines a new tag type that is accepted and validated
        by the schema.

        :param name: The name of the tag.
        :param attrs: A list of strings that define the attributes
                for the tag. Appending a question-mark to the name
                marks the attribute as optional. Appending an
                exclamation mark marks the attribute as required
                non-empty. Both can be combined. Two exclamation
                marks will strip the whitespace of the attribute
                before figuring if its non-empty.
        :param children: A list of tag-names or :class:`Schema`
                objects that are accepted as child-nodes for the
                tag, or None if any child elements are accepted.

                Note that the :attr:`root_tag` must be set on
                the :class:`Schema` objects, a *ValueError*
                is raised otherwise.
        :param data_allowed: Whether the tag accepts data node
                in its child list.
        :raise ValueError: see above or if a tag with the
                specified *name* was already registered.
        """

        if name in self.tag_types:
            raise ValueError("'{0}' already registered.".format(name))

        if children is not None:
            children = list(children)
            Schema.validate_child_decl(children)

        if attrs is not None:
            attrs = Schema.process_attr_decl(attrs)

        self.tag_types[name] = {
            'data_allowed': data_allowed,
            'children': children,
            'attrs': attrs,
        }

    def validate(self, node, is_root=True):
        # If this is the root tag and we expect a specific
        # tag at the root, we'll check that and raise an
        # exception if it doesn't match our requirements.
        if is_root and self.root_tag and self.root_tag != node.nodeName:
            message = 'expected <{0}>, got <{1}>'
            message = message.format(self.root_tag, node.nodeName)
            raise UnexpectedRootError(message, node)

        data = self._get_tag_data(node)
        self._validate_attrs(node, data['attrs'])

        # Iterate over all child nodes and validate them.
        for child in iter_children(node):

            # If data-nodes are not allowed for this tag and
            # the current child node is a data node, we'll
            # raise a ValidationError.
            if not data['data_allowed'] and is_data(child):
                message = '<{0}> accepts no data nodes'.format(node.nodeName)
                raise ValidationError(message, node)

            # If the curent child node is an element, we'll
            # continue recursively with the validation.
            if is_element(child) and data['children'] is not None:
                # Check if the child is any of the accepted
                # child tags.
                subschema = self._validate_child(node, child, data['children'])
                if subschema:
                    try:
                        subschema.validate(child, True)
                    except UnexpectedRootError:
                        message = 'UnexpectedRootError raised from ' \
                                  'sub-Schema, this should not happen'
                        raise RuntimeError(message)
                else:
                    self.validate(child, False)

    def _get_tag_data(self, node):
        data = self.tag_types.get(node.nodeName, None)
        if data is None:
            message = '<{0}> is an unknown tag'
            raise ValidationError(message.format(node.nodeName), node)
        return data

    def _validate_attrs(self, node, attr_decl):
        if attr_decl is None:
            return

        # Create sets from the attributes the node has
        # and the attributes that are declared for the node.
        got_attrs = set(node.attributes.keys())
        declared_attrs = set(attr_decl.keys())

        # (declared - got) => missing attributes
        for missing_attr in (declared_attrs - got_attrs):
            # Check if the attribute is optional.
            if attr_decl[missing_attr]['optional']:
                continue
            message = 'attribute <{0} {1}> missing'
            message = message.format(node.nodeName, missing_attr)
            raise ValidationError(message, node)

        # (got - declared) => unexpected attributes
        for superflous_attr in (got_attrs - declared_attrs):
            message = 'unexpected attribute <{0} {1}>'
            message = message.format(node.nodeName, superflous_attr)
            raise ValidationError(message, node)

        # Make sure all attributes that are declared
        # non-empty actually have a non-empty value.
        for attr_name in got_attrs:
            empty_mode = attr_decl[attr_name]['non-empty']
            if empty_mode == 0:
                continue

            value = node.attributes[attr_name].nodeValue
            message = 'attribute <{0} {1}> must not be empty'
            message = message.format(node.nodeName, attr_name)
            if empty_mode == 1:
                if not value:
                    raise ValidationError(message, node)
            else:
                value = value.strip()
                if not value:
                    message = message + ' (whitespace stripped)'
                    raise ValidationError(message, node)

    def _validate_child(self, parent, child, children_decl):
        """ Checks if *child* is accapted by the *parent*
        node from its *children_decl* (a list of strings or
        sub :class:`Schema`s).

        :return: The sub :class:`Schema` that should be
                used for the validation, None if the current
                schema should be used again. """

        for tagname in children_decl:
            # If the accepted children is a sub-schema, we
            # try to match it when the tag names apply.
            if isinstance(tagname, Schema):
                schema = tagname
                if schema.root_tag == child.nodeName:
                    return schema # use this subschema
            elif isinstance(tagname, string_types):
                if tagname == child.nodeName:
                    return None # use self
            else:
                raise RuntimeError('this should not happen')

        message = 'unexpected <{0}> in <{1}>'
        message = message.format(child.nodeName, parent.nodeName)
        raise ValidationError(message, child)

class SchemaBuilder(object):
    """ This class makes it easier to build a :class:`Schema`.
    Features:

    * Cross-reference child declarations
    * Define optional attributes by appending a ``?``
    """

    def __init__(self, root_tag=None):
        super(SchemaBuilder, self).__init__()
        self.root_tag = root_tag
        self.tag_types = {}

    def define_tag(self, name, attrs=[], children=[], data_allowed=False):
        """ Defines a tag in the schema.

        :param name: The name of the tag.
        :param attrs: Just like the attribute declaration for
                :meth:`Schema.define_tag`.
        :param children: A list of strings or sub :class:`Schema`s.
                A string item may end with ``/children`` to
                cross-reference the children declaration of another tag.
                The string '*' may be passed to indicate that the
                node accepts all kinds of nodes and the validator
                will not continue validating its child nodes.
        :param data_allowed: If data nodes are accepted in the tag.
        :raise ValueError: If a tag-type with the specified *name*
                was already specified, if the :attr:`Schema.root_tag`
                attribute was not set on a subschema or if any other
                kind of invalid declaration data was detected.
        """

        if name in self.tag_types:
            raise ValueError('tag {0} already registered'.format(name))

        # Process the attributes declaration.
        if attrs == '*':
            attrs = None
        elif isinstance(attrs, string_types):
            raise ValueError("'attrs' must be \"*\" if a string is passed")
        else:
            # Process to validate, but not use returned
            # structure.
            Schema.process_attr_decl(attrs)

        # Process the children declaration.
        if children == '*':
            children = None
        elif isinstance(children, string_types):
            raise ValueError("'children' must be \"*\"* if a string is passed")
        else:
            children = list(children)
            Schema.validate_child_decl(children)

        # Save the declarations.
        self.tag_types[name] = {
            'data_allowed': data_allowed,
            'children': children,
            'attrs': attrs,
        }

    def expand_children(self, children_decl):
        """ Returns a new list from *children_decl* where
        cross-referenced children are expanded into the list. """

        results = []
        for child in children_decl:
            if isinstance(child, Schema):
                pass
            elif isinstance(child, string_types):
                if child.endswith('/children'):
                    tag_name = child[:-len('/children')]
                    sub_children = self.tag_types[tag_name]['children']
                    results.extend(self.expand_children(sub_children))
                    continue
            else:
                raise RuntimeError('this should not happen')

            results.append(child)
        return results

    def get_schema(self):
        """ Converts the :class:`SchemaBuilder` to an actual
        :class:`Schema` object. """

        schema = Schema(self.root_tag)
        for name, data in self.tag_types.items():
            schema.define_tag(
                    name, data_allowed=data['data_allowed'],
                    children=self.expand_children(data['children']),
                    attrs=data['attrs'])
        return schema


class ValidationError(Exception):
    """ This Exception is raised when an XML structure did not validate
    with an :class:`Schema`. """

    def __init__(self, message, node):
        super(ValidationError, self).__init__(message)
        self.node = node

class UnexpectedRootError(ValidationError):
    """ This Exception is a specification of the :class:`ValidationError`
    that is raised when the root-node did not match the expected tag name.
    """

    pass


