#! /usr/bin/env python3

"""
Directory : orpyste
Name      : reader
Version   : 2013.09
Author    : Christophe BAL
Mail      : projetmbc@gmail.com

This module allows to read efficient to type and simple structured string datas
contained in files using the ``peuf`` specifications.
"""

import collections

from mistool import os_use, regex_use
from orpyste import model


# ------------------------- #
# -- FOR ERRORS TO RAISE -- #
# ------------------------- #

class ReaderError(ValueError):
    """
:::::::::::::::::
Small description
:::::::::::::::::

Base class for errors in the ``reader`` module of the package ``orpyste``.
    """
    pass


# --------------------------- #
# -- LEGAL NAME FOR BLOCKS -- #
# --------------------------- #

def DEFAULT_IS_BLOCK_NAME(text):
    """
:::::::::::::::::
Small description
:::::::::::::::::

This function tests that the string ``text`` is a legal name for a ¨peuf block.
    """
    return regex_use.isVarName(
        text = text,
        kind = "lazzy"
    )


# -------------------- #
# -- THE MAIN CLASS -- #
# -------------------- #

class Build(model.parser.Skeleton):
    """
:::::::::::::::::
Small description
:::::::::::::::::

-----------------
?? ?? ?? ?? ?? ??
-----------------








-----------------
?? ?? ?? ?? ?? ??
-----------------

Suppose that you have the following ``orpyste`` file (sorry for the lake of
originality).

orpyste::
    // Two single line comments.
    // The start of the comment must be at the very beginning of the line !

    game_rules::
        Play !
        Try to win.
        Win or let go.

    /* The start of the comment must be at the very beginning of the line !
     *
     * One comment on several lines...
     */

    names_of_levels::
        easy
        medium
        hard

    player_1::
        age   = 18
        sexe  = male
        score = 1874
        alias = Super Mario

    player_2::
        age   = 24
        sex   = female
        score = 2007
        alias = Sonic


Let suppose that the preceding file has the path path::``user/myFile.txt``. Then
we can extract informations by using the following code.

python::
    import orpyste

    infos = orpyste.reader.Build(
        path = "user/myFile.txt",
        mode = {
            'block': "game_rules",
            'line' : "names_of_levels",
            'equal': ["player_1", "player_2"]
        }
    )


Using the preceding code, then ``infos.dict`` will be the following ordered
dictionary.

python::
    {
        'games_rules'    : "Play !\\nTry to win.\\nWin or let go.",
        'names_of_levels': ["easy", "medium", "hard"],
        'player_1': {
            'age'  : '18',
            'sexe' : 'male',
            'score': '1874',
            'alias': 'Super Mario'
        }
        'player_2': {
            'age'  : '24',
            'sexe' : 'female',
            'score': '2007',
            'alias': 'Sonic'
        },
    }


You can also work directly with string like in the following example which has
the same effect as the preceding one.

python::
    import orpyste

    myContent = '''
    game_rules::
        Play !
        Try to win.
        Win or let go.

    names_of_levels::
        easy
        medium
        hard

    player_1::
        age   = 18
        sexe  = male
        score = 1874
        alias = Super Mario

    player_2::
        age   = 24
        sex   = female
        score = 2007
        alias = Sonic
    '''

    infos = orpyste.reader.Build(
        content = myContent,
        mode    = {
            'block': "game_rules",
            'line' : "names_of_levels",
            'equal': ["player_1", "player_2"]
        }
    )


If you want to have a finer control for the block and line contents, just use
the attribut ``dictNbLine``. For the preceding code, ``infos.dictNbLine`` is
equal to following ordered dictionary where the content are replaced by uplets
``(Number of the line, Content)``.

python::
    {
        'game_rules'     : (3, 'Play !\\nTry to win.\\nWin or let go.'),
        'names_of_levels': [(8, 'easy'), (9, 'medium'), (10, 'hard')],
        'player_1': {
            'age'  : '18',
            'sexe' : 'male',
            'score': '1874',
            'alias': 'Super Mario'
        }
        'player_2': {
            'age'  : '24',
            'sexe' : 'female',
            'score': '2007',
            'alias': 'Sonic'
        },
    }


Let finish with a very special mode with the following ``orpyste`` file where we
want to use ``<==>``, ``==>``, ``=`` like ``key-value`` separators.

orpyste::
    logic::
        a <==> b
        x ==> y

    constraint::
        i     = j
        alpha <= beta


Let suppose that the preceding file has the path path::``user/myFile.txt``. Then
we can extract informations by using the following code.

python::
    import orpyste

    infos = orpyste.reader.Build(
        path = "user/myFile.txt",
        mode = "keyval",
        sep  = ["<==>", "==>", "=", "<="]
    )


Using the preceding code, then ``infos.dict`` will be the following ordered
dictionary.

python::
    {
        'logic': {
            '<==>': {'a': "b"},
            '==>' : {'x': "y"}
        },
        'constraint': {
            '=' : {'i': "j"},
            '<=': {'alpha': "beta"}
        }
    }


:::::::::::::
The arguments
:::::::::::::

This class uses the following variables.

    1) ``content`` is one content to analyse. You can use this variable or the
    variable ``path``.

    2) ``path`` is the complete path of the file to read. You can use this
    variable or the variable ``content``.

    3) ``encoding`` is the encoding of the file using the standard python name
    for encoding. By default, its value is the string ``"utf8"``.

    4) ``mode`` indicates how the informations have been stored in the file to
    read. The possible values are the next ones.

        a) ``"equal"`` is for informations stored line by line with key-value
        syntax ``key = value`` in every blocks to analyse. This is the default
        value of the variable ``mode``.

        b) ``"equal+"`` is similar to ``"equal"`` except that newlines are
        allowed inside values. To use one sign ``=`` in one value, you have to
        escape at least the first sign ``=`` via ``\=``.

        warning::
            Each empty line will be translated to one single space.

        c) ``"keyval"`` extends the mode ``"equal"`` by allowing to use
        different separators defined in the variable ``sep`` (see below).

        d) ``"keyval+"`` plays for ``"keyval"`` the same role that ``"equal+"``
        does for ``"equal"``.

        e) ``"line"`` is for informations stored line by line without any
        special syntax in each line in every blocks to analyse.

        f) ``"block"`` is for information stored in a whole paragraph made of
        several lines in every blocks to analyse.

        g) We can also use one of the preceding modes for different blocks. In
        that case, we simply use one dictionary like in the following example
        where the special mode ``"container"`` is for blocks that contains other
        blocks. Indeed, you can just use only the keys you need.

        python::
            mode = {
                'default'  : the default mode,
                'equal'    : [names of the blocks],
                'equal+'   : [names of the blocks],
                'keyval'   : [names of the blocks],
                'keyval+'  : [names of the blocks],
                'line'     : [names of the blocks],
                'block'    : [names of the blocks],
                'container': [names of the blocks]
            }

        If you only need one name of block, you don't have to put in one single
        value list. Just give its name in a string.

        If you don't define the value associated to the key ``'default'`` then
        the value ``"container"`` will be use by default.

    5) ``sep`` indicates which text(s) must be used to separate ``key`` and
    ``value`` for the modes ``"equal"``, ``"equal+"``, ``"keyval"`` and
    ``"keyval+"``. You can use a single string or a list of strings.

    By default, ``sep = "="`` which is justified for the use of the classical
    modes ``"equal"`` and ``"equal+"``.

    warning::
        You can't use a list of separators with one of the modes ``"equal"`` and
        ``"equal+"``.

    6) ``strip`` is a boolean variable to ask to strip the start of text
    containing informations for line or block contents. The default value is
    ``False``.

    7) ``isBlockName`` is one method that must return one boolean. This method
    is used to validate the name used for blocks.

    By default, ``isBlockName(text)`` is equal to ``regex_use.isVarName(text,
    kind = "lazzy")`` where ``regex_use`` is one module of the package ¨mistool.
    """

    ALLOWED_MODE = [
        "equal", "equal+",
        "keyval", "keyval+",
        "line",
        "block"
    ]

    def __init__(
        self,
        content     = "",
        path        = "",
        encoding    = "utf8",
        mode        = "equal",
        sep         = "=",
        strip       = False,
        isBlockName = DEFAULT_IS_BLOCK_NAME
    ):
        self.path        = path
        self.content     = content
        self.encoding    = encoding
        self.mode        = mode
        self.sep         = sep
        self.strip       = strip
        self.isBlockName = isBlockName

        self.build()


# ------------------------------- #
# -- BUILDING THE DICTIONARIES -- #
# ------------------------------- #

    def build(self):
        """
:::::::::::::::::
Small description
:::::::::::::::::

Each time you make an instance of ``Reader``this method is called so as to
automatically build the ``(Number line, Content)`` and the simple ``Content``
dictionaries.

You can use this method if you want to rebuild this dictionaries after a change
of the content of the file or of the value of the attribut ``content``.
        """
# Which mode is used ?
        if isinstance(self.mode, dict) \
        or isinstance(self.mode, collections.OrderedDict):
            self.__singleMode__ = ":mix:"
            self.__mixedModes__ = self.mode

            if not 'default' in self.__mixedModes__:
                self.__mixedModes__['default'] = "container"

        else:
            if self.mode not in self.ALLOWED_MODE:
                raise ReaderError(
                    'Unknown mode << {0} >>.'.format(self.mode)
                )

            self.__singleMode__ = self.mode
            self.__mixedModes__ = None

# Normalize ``sep``.
        if isinstance(self.sep, str):
            self.sep = [self.sep]

        else:
            self.sep = sorted(
                self.sep,
                key = lambda t: -len(t)
            )

        if self.mode in ["equal", "equal+"] and len(self.sep) !=1:
            raise ReaderError(
                'Only one separator is allowed for the mode '
                '<< {0} >>.'.format(self.mode)
            )

# Which content must be analysed ?
        if self.path:
            content = os_use.readTextFile(self.path)

        else:
            content = self.content

        content = content.split('\n')

# The (number line, content) dictionary.
#
# << Warning ! >> We keep the comments so as to have the right numbers of
# the lines.
        self.dictNbLine = self.__analyse(
            mode        = self.__singleMode__,
            linesWithNb = [(n + 1, x) for n, x in enumerate(content)]
        )

# The only content dictionary.
        self.dict = self.__flatDict(self.dictNbLine)

    def __analyse(
        self,
        linesWithNb,
        mode,
        level   = 0,
        inBlock = False
    ):
        """
:::::::::::::::::
Small description
:::::::::::::::::

This method does all the ugly job for analysing the ``orpyste`` file so as to
build the ``(number line, content)`` dictionary.
        """
        blockFound  = False
        lastKeyPlus = None

        popNewLine = True
        inComment  = False

        answer = collections.OrderedDict()

        while(linesWithNb):
# One new line to pop ?
            if popNewLine:
                nbLine, oneLine = linesWithNb.pop(0)

            else:
                popNewLine = True

# We have to take care of empty lines starting the file when we use the block
# mode !
            isLineEmpty = (oneLine.strip() == '')

            if mode == "block":
                storeThisLine = (not isLineEmpty or answer)

            else:
                storeThisLine = not isLineEmpty


# -- COMMENT ON SEVERAL LINES -- #

            if inComment:
                if self.isMultiLineCommentEnd(oneLine):
                    inComment = False

            elif self.isMultiLineCommentStart(oneLine):
# We can have  /*...*/  in the same line.
                if not self.isMultiLineCommentEnd(oneLine):
                    inComment = True


# -- COMMENT IN ONE SINGLE LINE -- #

            elif self.isOneLineComment(oneLine):
                ...


# -- BLOCK -- #

            elif self.isNewBlock(oneLine):
                lastKeyPlus = None

                currentLevel = self.indentLevel(oneLine)

                if currentLevel != level:
                    self.__sendError(
                        'Wrong indentation in the line {0}.'.format(nbLine)
                    )

                blockFound     = True
                nameBlockFound = oneLine.strip()[:-2]

# Is the name of the block a legal one ?
                if not self.isBlockName(nameBlockFound):
                    self.__sendError(
                        'Illegal name for one block : '
                        'see "{0}" in the line "{1}".'.format(
                            nameBlockFound,
                            nbLine
                        )
                    )

# Let's find the content of the block.
                blockContent = []
                popNewLine   = False

                inCommentInContent = False

                while(linesWithNb):
                    nbLine, oneLine = linesWithNb.pop(0)
#    + Comment in the content of a block
                    if inCommentInContent:
                        if self.isMultiLineCommentEnd(oneLine):
                            inCommentInContent = False

                    elif self.isMultiLineCommentStart(oneLine):
                        if not self.isMultiLineCommentEnd(oneLine):
                            inCommentInContent = True

#    + Comment in one single line in the content of a block
                    elif self.isOneLineComment(oneLine):
                        ...

#    + An empty line in the content of a block
                    elif not oneLine.strip():
                        blockContent.append((nbLine, oneLine))

#    + Content with higher level of indentation
                    elif self.indentLevel(oneLine) > currentLevel:
                        blockContent.append((nbLine, oneLine))

                    else:
                        break

# Which mode must be used with the current block ?
                if mode == ":mix:":
                    blockMode = ''

                    for oneMode, oneBlock in self.__mixedModes__.items():
                        if nameBlockFound == oneBlock \
                        or nameBlockFound in oneBlock:
                            blockMode = oneMode
                            break

                    if not blockMode:
                        blockMode = self.__mixedModes__['default']

                    if blockMode == "container":
                        blockMode = mode

                else:
                    blockMode = mode

# Let's analyse and store the content of the block
                answer[nameBlockFound] = self.__analyse(
                    linesWithNb = blockContent,
                    mode        = blockMode,
                    level       = currentLevel + 4,
                    inBlock     = True
                )


# -- CONTENT TO STORE -- #

            elif storeThisLine:
                if not inBlock or blockFound:
                    self.__sendError(
                        'Data and block have the same level '\
                        'of indentation. See the line {0}.'.format(nbLine)
                    )

# Mode used --> key = value
                if mode in ["equal", "keyval"]:
                    iKey, iVal, sep = self.__findSep(oneLine)

                    if iKey == -1:
                        self.__sendError(
                            'Missing equal sign in the line {0}.'\
                            .format(nbLine)
                        )

                    self.__storeKeyval(
                        answer = answer,
                        key    = oneLine[:iKey].strip(),
                        val    = oneLine[iVal:].strip(),
                        sep    = sep,
                        mode   = mode
                    )

# Mode used --> key = value with newlines
                elif mode in ["equal+", "keyval+"]:
                    iKey, iVal, sep = self.__findSep(oneLine)

                    if iKey == -1:
                        if lastKeyPlus == None:
                            self.__sendError(
                                'Missing equal sign in the line {0}.'\
                                .format(nbLine)
                            )

                        answer[lastKeyPlus] += ' ' + self.__cleanEscape(
                            text         = oneLine.strip(),
                            escapeKeyval = True
                        )

                    else:
                        lastKeyPlus = oneLine[:iKey].strip()

                        self.__storeKeyval(
                            answer = answer,
                            key    = lastKeyPlus,
                            val    = oneLine[iVal:].strip(),
                            sep    = sep,
                            mode   = mode
                        )

# Simply one content to finish to store
                elif mode in ["line", "block", ":mix:"]:
                    answer[nbLine] = self.stripIt(
                        self.__cleanEscape(oneLine)
                    )

                else:
                    raise ReaderError('Unknown mode "{0}".'.format(mode))


# -- ONLY A CONTENT OF LINES OR A BLOCK -- #

        if not blockFound:
# List for the line mode (verbatim)
            if mode == "line":
                listLines = []

                for oneLineNb, oneLineFound in answer.items():
                    oneLineFound = self.cleanIndent(
                        oneLineFound,
                        level
                    )
                    listLines.append((oneLineNb, oneLineFound))

                return listLines

# Glue lines in block mode (verbatim)
            elif mode == "block":
                blockContent = []
                firstLineNb = None

                for oneLineNb, oneLineFound in answer.items():
                    if firstLineNb == None:
                        firstLineNb = oneLineNb

                    oneLineFound = self.cleanIndent(
                        oneLineFound,
                        level
                    )
                    blockContent.append(oneLineFound)

                blockContent = '\n'.join(blockContent)
                blockContent = self.stripIt(blockContent)

                return (firstLineNb, blockContent)


# -- ALL THE JOB HAS BEEN DONE ! -- #

# Return the ordered dictionary.
        return answer


# --------------------- #
# -- FLAT DICTIONARY -- #
# --------------------- #

    def __flatDict(self, oneDict):
        """
:::::::::::::::::
Small description
:::::::::::::::::

This method transforms the ``(Number line, Content)`` dictionary to a simple
``Content`` dictionary.
        """
        if oneDict == (None, ''):
            return {}

        flatDict = collections.OrderedDict()

        for oneKey, oneValue in oneDict.items():
            if isinstance(oneValue, dict) \
            or isinstance(oneValue, collections.OrderedDict):
                oneValue = self.__flatDict(oneValue)

            elif isinstance(oneValue, tuple):
                oneValue = oneValue[1]

            elif isinstance(oneValue, list):
                oneValue = [x[1] for x in oneValue]

            flatDict[oneKey] = oneValue

        return flatDict


# ---------------------------- #
# -- EQUAL (PLUS) LIKE MODE -- #
# ---------------------------- #

    def __findOneSep(self, text, sep):
        """
:::::::::::::::::
Small description
:::::::::::::::::

This methods returns the position in ``text`` of the first string ``sep`` which
is not preceded by a backslash.

If there is no matchning, the value ``(-1)`` is returned.
        """
        i = text.find(sep)
        j = text.find('\\' + sep)

        if i == j + 1:
            i = -1

        return i

    def __findSep(self, text):
        """
:::::::::::::::::
Small description
:::::::::::::::::

This method returns the possible position of the first not escaped separator
(this is for for the the modes ``"equal"``, ``"equal+"``, ``"keyval"`` and
``"keyval+"``). Indeed, the method returns ``(iKey, iVal, sep)`` where each
parameter has the following meaning.

    1) ``iKey`` is the position where the separator has been found.

    2) ``iVal`` is the position of the end of the separator.

    3) ``sep`` is the separator found.
        """
        iKey = -1
        iVal = -1
        sep  = None

        for oneSep in self.sep:
            j = self.__findOneSep(
                text = text,
                sep  = oneSep
            )

            if j != -1 and (iKey == -1 or j < iKey):
                iKey = j
                iVal = iKey + len(oneSep)
                sep  = oneSep

        return iKey, iVal, sep

    def __cleanEscape(
        self,
        text,
        escapeKeyval = False
    ):
        """
:::::::::::::::::
Small description
:::::::::::::::::

This methods simply removes backslashes that escapes double points.

If ``escapeKeyval = True``, then there is alos a cleaning of the backslashes
escaping separators for the the modes ``"equal"``, ``"equal+"``, ``"keyval"``
and ``"keyval+"``.

By default, ``escapeKeyval = False``.
        """
        text = text.replace('\::', '::')

        if escapeKeyval:
            for oneSep in self.sep:
                text = text.replace('\\' + oneSep, oneSep)

        return text

    def __storeKeyval(
        self,
        answer,
        key,
        val,
        sep,
        mode
    ):
        """
:::::::::::::::::
Small description
:::::::::::::::::

This methods manages the storing for values for the modes ``"equal"``,
``"equal+"``, ``"keyval"`` and ``"keyval+"``.
        """
        if mode.startswith("keyval"):
            subCategory = answer.get(sep, {})

            subCategory[key] = self.__cleanEscape(
                text         = val,
                escapeKeyval = True
            )

            answer[sep] = subCategory

        else:
            answer[key] = self.__cleanEscape(
                text         = val,
                escapeKeyval = True
            )

# -------------------- #
# -- SENDING ERRORS -- #
# -------------------- #

    def __sendError(self, message):
        """
:::::::::::::::::
Small description
:::::::::::::::::

This method simply eases the raising of errors by adding infos about the file or
the string content analysed.
        """
        if self.path:
            message += "\nPath of the file :\n\t" + self.path

        elif self.content:
            message += "\nString content :\n\n" + self.content

        raise ReaderError(message)
