#!/usr/bin/env python

"""
DOM wrapper around libxml2, specifically the libxml2mod Python extension module.

Copyright (C) 2003, 2004, 2005, 2006 Paul Boddie <paul@boddie.org.uk>

This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.

This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Lesser General Public License for more details.

You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
"""

__version__ = "0.3.5"

from libxml2dom.macrolib import *
from libxml2dom.macrolib import \
    createDocument as Node_createDocument, \
    parseString as Node_parseString, parseURI as Node_parseURI, \
    parseFile as Node_parseFile, \
    toString as Node_toString, toStream as Node_toStream, \
    toFile as Node_toFile

# Attribute and node list wrappers.

class NamedNodeMap(object):

    """
    A wrapper around Node objects providing DOM and dictionary convenience
    methods.
    """

    def __init__(self, node):
        self.node = node

    def getNamedItem(self, name):
        return self.node.getAttributeNode(name)

    def getNamedItemNS(self, ns, localName):
        return self.node.getAttributeNodeNS(ns, localName)

    def setNamedItem(self, node):
        try:
            old = self.getNamedItem(node.nodeName)
        except KeyError:
            old = None
        self.node.setAttributeNode(node)
        return old

    def setNamedItemNS(self, node):
        try:
            old = self.getNamedItemNS(node.namespaceURI, node.localName)
        except KeyError:
            old = None
        self.node.setAttributeNodeNS(node)
        return old

    def removeNamedItem(self, name):
        try:
            old = self.getNamedItem(name)
        except KeyError:
            old = None
        self.node.removeAttribute(name)
        return old

    def removeNamedItemNS(self, ns, localName):
        try:
            old = self.getNamedItemNS(ns, localName)
        except KeyError:
            old = None
        self.node.removeAttributeNS(ns, localName)
        return old

    # Dictionary emulation methods.

    def __getitem__(self, name):
        return self.getNamedItem(name)

    def __setitem__(self, name, node):
        if name == node.nodeName:
            self.setNamedItem(node)
        else:
            raise KeyError, name

    def __delitem__(self, name):
        # NOTE: To be implemented.
        pass

    def values(self):
        return [Attribute(_node, self.node.ownerDocument) for _node in Node_attributes(self.node.as_native_node()).values()]

    def keys(self):
        return [(attr.namespaceURI, attr.localName) for attr in self.values()]

    def items(self):
        return [((attr.namespaceURI, attr.localName), attr) for attr in self.values()]

    def __repr__(self):
        return str(self)

    def __str__(self):
        return "{%s}" % ",\n".join(["%s : %s" % (repr(key), repr(value)) for key, value in self.items()])

    def _length(self):
        return len(self.values())

    length = property(_length)

class NodeList(list):

    "A wrapper around node lists."

    def item(self, index):
        return self[index]

    def _length(self):
        return len(self)

    length = property(_length)

# Node classes.

class Node(object):

    """
    A DOM-style wrapper around libxml2mod objects.
    """

    ATTRIBUTE_NODE = xml.dom.Node.ATTRIBUTE_NODE
    COMMENT_NODE = xml.dom.Node.COMMENT_NODE
    DOCUMENT_NODE = xml.dom.Node.DOCUMENT_NODE
    DOCUMENT_TYPE_NODE = xml.dom.Node.DOCUMENT_TYPE_NODE
    ELEMENT_NODE = xml.dom.Node.ELEMENT_NODE
    ENTITY_NODE = xml.dom.Node.ENTITY_NODE
    ENTITY_REFERENCE_NODE = xml.dom.Node.ENTITY_REFERENCE_NODE
    NOTATION_NODE = xml.dom.Node.NOTATION_NODE
    PROCESSING_INSTRUCTION_NODE = xml.dom.Node.PROCESSING_INSTRUCTION_NODE
    TEXT_NODE = xml.dom.Node.TEXT_NODE

    def __init__(self, node, ownerDocument=None):
        self._node = node
        self.ownerDocument = ownerDocument

    def as_native_node(self):
        return self._node

    def _nodeType(self):
        return Node_nodeType(self._node)

    def _childNodes(self):

        # NOTE: Consider a generator instead.

        return NodeList([Node(_node, self.ownerDocument) for _node in Node_childNodes(self._node)])

    def _attributes(self):
        return NamedNodeMap(self)

    def _namespaceURI(self):
        return Node_namespaceURI(self._node)

    def _nodeValue(self):
        return Node_nodeValue(self._node)

    def _setNodeValue(self, value):
        Node_setNodeValue(self._node, value)

    def _prefix(self):
        return Node_prefix(self._node)

    def _nodeName(self):
        return Node_nodeName(self._node)

    def _tagName(self):
        return Node_tagName(self._node)

    def _localName(self):
        return Node_localName(self._node)

    def _parentNode(self):
        return get_node(Node_parentNode(self._node), self)

    def _previousSibling(self):
        return Node(Node_previousSibling(self._node), self.ownerDocument)

    def _nextSibling(self):
        return Node(Node_nextSibling(self._node), self.ownerDocument)

    def _doctype(self):
        return Node(Node_doctype(self._node), self.ownerDocument)

    def _publicId(self):
        # NOTE: To be fixed when the libxml2mod API has been figured out.
        if self.nodeType != self.DOCUMENT_TYPE_NODE:
            return None
        declaration = self.toString()
        return self._findId(declaration, "PUBLIC")

    def _systemId(self):
        # NOTE: To be fixed when the libxml2mod API has been figured out.
        if self.nodeType != self.DOCUMENT_TYPE_NODE:
            return None
        declaration = self.toString()
        if self._findId(declaration, "PUBLIC"):
            return self._findIdValue(declaration, 0)
        return self._findId(declaration, "SYSTEM")

    # NOTE: To be removed when the libxml2mod API has been figured out.

    def _findId(self, declaration, identifier):
        i = declaration.find(identifier)
        if i == -1:
            return None
        return self._findIdValue(declaration, i)

    def _findIdValue(self, declaration, i):
        q = declaration.find('"', i)
        if q == -1:
            return None
        q2 = declaration.find('"', q + 1)
        if q2 == -1:
            return None
        return declaration[q+1:q2]

    def hasAttributeNS(self, ns, localName):
        return Node_hasAttributeNS(self._node, ns, localName)

    def hasAttribute(self, name):
        return Node_hasAttribute(self._node, name)

    def getAttributeNS(self, ns, localName):
        return Node_getAttributeNS(self._node, ns, localName)

    def getAttribute(self, name):
        return Node_getAttribute(self._node, name)

    def getAttributeNodeNS(self, ns, localName):
        return Attribute(Node_getAttributeNodeNS(self._node, ns, localName), self.ownerDocument, self)

    def getAttributeNode(self, localName):
        return Attribute(Node_getAttributeNode(self._node, localName), self.ownerDocument, self)

    def setAttributeNS(self, ns, name, value):
        Node_setAttributeNS(self._node, ns, name, value)

    def setAttribute(self, name, value):
        Node_setAttribute(self._node, name, value)

    def setAttributeNodeNS(self, node):
        Node_setAttributeNodeNS(self._node, node._node)

    def setAttributeNode(self, node):
        Node_setAttributeNode(self._node, node._node)

    def removeAttributeNS(self, ns, localName):
        Node_removeAttributeNS(self._node, ns, localName)

    def removeAttribute(self, name):
        Node_removeAttribute(self._node, name)

    def createElementNS(self, ns, name):
        return Node(Node_createElementNS(self._node, ns, name), self.ownerDocument)

    def createElement(self, name):
        return Node(Node_createElement(self._node, name), self.ownerDocument)

    def createAttributeNS(self, ns, name):
        tmp = self.createElement("tmp")
        return Attribute(Node_createAttributeNS(tmp._node, ns, name))

    def createAttribute(self, name):
        tmp = self.createElement("tmp")
        return Attribute(Node_createAttribute(tmp._node, name))

    def createTextNode(self, value):
        return Node(Node_createTextNode(self._node, value), self.ownerDocument)

    def createComment(self, value):
        return Node(Node_createComment(self._node, value), self.ownerDocument)

    def importNode(self, node, deep):
        if hasattr(node, "as_native_node"):
            return Node(Node_importNode(self._node, node.as_native_node(), deep), self.ownerDocument)
        else:
            return Node(Node_importNode_DOM(self._node, node, deep), self.ownerDocument)

    def insertBefore(self, tmp, oldNode):
        if hasattr(tmp, "as_native_node"):
            return Node(Node_insertBefore(self._node, tmp.as_native_node(), oldNode.as_native_node()), self.ownerDocument)
        else:
            return Node(Node_insertBefore(self._node, tmp, oldNode.as_native_node()), self.ownerDocument)

    def replaceChild(self, tmp, oldNode):
        if hasattr(tmp, "as_native_node"):
            return Node(Node_replaceChild(self._node, tmp.as_native_node(), oldNode.as_native_node()), self.ownerDocument)
        else:
            return Node(Node_replaceChild(self._node, tmp, oldNode.as_native_node()), self.ownerDocument)

    def appendChild(self, tmp):
        if hasattr(tmp, "as_native_node"):
            return Node(Node_appendChild(self._node, tmp.as_native_node()), self.ownerDocument)
        else:
            return Node(Node_appendChild(self._node, tmp), self.ownerDocument)

    def removeChild(self, tmp):
        if hasattr(tmp, "as_native_node"):
            Node_removeChild(self._node, tmp.as_native_node())
        else:
            Node_removeChild(self._node, tmp)

    def getElementsByTagName(self, tagName):
        return self.xpath("//" + tagName)

    def getElementsByTagNameNS(self, namespaceURI, localName):
        return self.xpath("//ns:" + localName, namespaces={"ns" : namespaceURI})

    def normalize(self):
        text_nodes = []
        for node in self.childNodes:
            if node.nodeType == node.TEXT_NODE:
                text_nodes.append(node)
            elif len(text_nodes) != 0:
                self._normalize(text_nodes)
                text_nodes = []
        if len(text_nodes) != 0:
            self._normalize(text_nodes)

    def _normalize(self, text_nodes):
        texts = []
        for text_node in text_nodes[:-1]:
            texts.append(text_node.nodeValue)
            self.removeChild(text_node)
        texts.append(text_nodes[-1].nodeValue)
        self.replaceChild(self.ownerDocument.createTextNode("".join(texts)), text_nodes[-1])

    childNodes = property(_childNodes)
    value = data = nodeValue = property(_nodeValue, _setNodeValue)
    name = nodeName = property(_nodeName)
    tagName = property(_tagName)
    namespaceURI = property(_namespaceURI)
    prefix = property(_prefix)
    localName = property(_localName)
    parentNode = property(_parentNode)
    nodeType = property(_nodeType)
    attributes = property(_attributes)
    previousSibling = property(_previousSibling)
    nextSibling = property(_nextSibling)
    doctype = property(_doctype)
    publicId = property(_publicId)
    systemId = property(_systemId)

    # NOTE: To be fixed - these being doctype-specific values.

    entities = {}
    notations = {}

    #def isSameNode(self, other):
    #    return self._node.nodePath() == other._node.nodePath()

    #def __eq__(self, other):
    #    return self._node.nodePath() == other._node.nodePath()

    # 4DOM extensions to the usual PyXML API.
    # NOTE: To be finished.

    def xpath(self, expr, variables=None, namespaces=None):
        result = Node_xpath(self._node, expr, variables, namespaces)
        if isinstance(result, str):
            return to_unicode(result)
        elif hasattr(result, "__len__"):
            return NodeList([get_node(_node, self) for _node in result])
        else:
            return result

    # Convenience methods.

    def toString(self, encoding=None, prettyprint=0):
        return toString(self, encoding, prettyprint)

    def toStream(self, stream, encoding=None, prettyprint=0):
        toStream(self, stream, encoding, prettyprint)

    def toFile(self, f, encoding=None, prettyprint=0):
        toFile(self, f, encoding, prettyprint)

# Attribute nodes.

class Attribute(Node):

    "A class providing attribute access."

    def __init__(self, node, ownerDocument=None, ownerElement=None):
        Node.__init__(self, node, ownerDocument)
        self.ownerElement = ownerElement

    def _parentNode(self):
        return self.ownerElement

    parentNode = property(_parentNode)

# Document housekeeping mechanisms.

class Document(Node):

    "A class providing document-level housekeeping."

    def __init__(self, node):
        self._node = node

    def _ownerDocument(self):
        return self

    def _parentNode(self):
        return None

    def __del__(self):
        #print "Freeing document", self._node
        libxml2mod.xmlFreeDoc(self._node)

    ownerDocument = property(_ownerDocument)
    parentNode = property(_parentNode)

class DocumentType(object):

    "A class providing a container for document type information."

    def __init__(self, localName, publicId, systemId):
        self.name = self.localName = localName
        self.publicId = publicId
        self.systemId = systemId

        # NOTE: Nothing is currently provided to support the following
        # NOTE: attributes.

        self.entities = {}
        self.notations = {}

# Factory functions.

def get_node(_node, context_node):
    if Node_nodeType(_node) == context_node.DOCUMENT_NODE:
        return context_node.ownerDocument
    elif Node_nodeType(_node) == context_node.ATTRIBUTE_NODE:
        return Attribute(_node, context_node.ownerDocument, context_node)
    else:
        return Node(_node, context_node.ownerDocument)

# Utility functions.

def createDocumentType(localName, publicId, systemId):
    return DocumentType(localName, publicId, systemId)

def createDocument(namespaceURI, localName, doctype):
    return Document(Node_createDocument(namespaceURI, localName, doctype))

def parse(stream_or_string, html=0):

    """
    Parse the given 'stream_or_string', where the supplied object can either be
    a stream (such as a file or stream object), or a string (containing the
    filename of a document). If the optional 'html' parameter is set to a true
    value, the content to be parsed will be treated as being HTML rather than
    XML.

    A document object is returned by this function.
    """

    if hasattr(stream_or_string, "read"):
        stream = stream_or_string
        return parseString(stream.read(), html)
    else:
        return parseFile(stream_or_string, html)

def parseFile(filename, html=0):

    """
    Parse the file having the given 'filename'. If the optional 'html' parameter
    is set to a true value, the content to be parsed will be treated as being
    HTML rather than XML.

    A document object is returned by this function.
    """

    return Document(Node_parseFile(filename, html))

def parseString(s, html=0):

    """
    Parse the content of the given string 's'. If the optional 'html' parameter
    is set to a true value, the content to be parsed will be treated as being
    HTML rather than XML.

    A document object is returned by this function.
    """

    return Document(Node_parseString(s, html))

def parseURI(uri, html=0):

    """
    Parse the content found at the given 'uri'. If the optional 'html' parameter
    is set to a true value, the content to be parsed will be treated as being
    HTML rather than XML.

    The parseURI does not currently work with HTML. Use parse with a stream
    object instead. For example:

    d = parse(urllib.urlopen("http://www.python.org"), html=1)

    A document object is returned by this function.
    """

    return Document(Node_parseURI(uri, html))

def toString(node, encoding=None, prettyprint=0):

    """
    Return a string containing the serialised form of the given 'node' and its
    children. The optional 'encoding' can be used to override the default
    character encoding used in the serialisation. The optional 'prettyprint'
    indicates whether the serialised form is prettyprinted or not (the default
    setting).
    """

    return Node_toString(node.as_native_node(), encoding, prettyprint)

def toStream(node, stream, encoding=None, prettyprint=0):

    """
    Write the serialised form of the given 'node' and its children to the given
    'stream'. The optional 'encoding' can be used to override the default
    character encoding used in the serialisation. The optional 'prettyprint'
    indicates whether the serialised form is prettyprinted or not (the default
    setting).
    """

    Node_toStream(node.as_native_node(), stream, encoding, prettyprint)

def toFile(node, filename, encoding=None, prettyprint=0):

    """
    Write the serialised form of the given 'node' and its children to a file
    having the given 'filename'. The optional 'encoding' can be used to override
    the default character encoding used in the serialisation. The optional
    'prettyprint' indicates whether the serialised form is prettyprinted or not
    (the default setting).
    """

    Node_toFile(node.as_native_node(), filename, encoding, prettyprint)

def adoptNodes(nodes):

    """
    A special utility method which adopts the given low-level 'nodes' and which
    returns a list of high-level equivalents. This is currently experimental and
    should not be casually used.
    """

    if len(nodes) == 0:
        return []
    doc = Document(libxml2mod.doc(nodes[0]))
    results = []
    for node in nodes:
        results.append(Node(node, doc))
    return results

# vim: tabstop=4 expandtab shiftwidth=4
