#! /usr/bin/env python3

"""
Directory : orpyste
Name      : reader
Version   : 2013.04
Author    : Christophe BAL
Mail      : projetmbc@gmail.com

This module allows  to read efficient to type and simple structured datas using
in files by using the ``peuf`` specifications.

See the documentation for more details.
"""

import collections

from mistool import os_use, regex_use
from orpyste import model


# ------------------------- #
# -- FOR ERRORS TO RAISE -- #
# ------------------------- #

class ReaderError(ValueError):
    pass


# --------------------------- #
# -- LEGAL NAME FOR BLOCKS -- #
# --------------------------- #

def DEFAULT_IS_BLOCK_NAME(text):
    return regex_use.isVarName(
        text = text,
        kind = "lazzy"
    )


# -------------------- #
# -- THE MAIN CLASS -- #
# -------------------- #

class Build(model.parser.Skeleton):
    """
Suppose that you have the following ``orpyste`` file (sorry for the lake of
originality).

orpyste::
    // Two single line comments.
    // The start of the comment must be at the very beginning of the line !

    game_rules::
        Play !
        Try to win.
        Win or let go.

    /* The start of the comment must be at the very beginning of the line !
     *
     * One comment on several lines...
     */

    names_of_levels::
        easy
        medium
        hard

    player_1::
        age   = 18
        sexe  = male
        score = 1874
        alias = Super Mario

    player_2::
        age   = 24
        sex   = female
        score = 2007
        alias = Sonic


Let suppose that the preceding file has the path path::``user/myFile.txt``. Then
we can extract informations by using the following code.

python::
    import orpyste

    infos = orpyste.reader.Build(
        path = "user/myFile.txt",
        mode = {
            'block': "game_rules",
            'line' : "names_of_levels",
            'equal': ["player_1", "player_2"]
        }
    )


Using the preceding code, then ``infos.dict`` will be the following ordered
dictionary.

python::
    {
        'games_rules'    : "Play !\nTry to win.\nWin or let go.",
        'names_of_levels': ["easy", "medium", "hard"],
        'player_1': {
            'age'  : '18',
            'sexe' : 'male',
            'score': '1874',
            'alias': 'Super Mario'
        }
        'player_2': {
            'age'  : '24',
            'sexe' : 'female',
            'score': '2007',
            'alias': 'Sonic'
        },
    }


You can also work directly with string like in the following example which has
the same effect as the pre ceding one.

python::
    import orpyste

    myContent = '''
    game_rules::
        Play !
        Try to win.
        Win or let go.

    names_of_levels::
        easy
        medium
        hard

    player_1::
        age   = 18
        sexe  = male
        score = 1874
        alias = Super Mario

    player_2::
        age   = 24
        sex   = female
        score = 2007
        alias = Sonic
    '''

    infos = orpyste.reader.Build(
        content = myContent,
        mode    = {
            'block': "game_rules",
            'line' : "names_of_levels",
            'equal': ["player_1", "player_2"]
        }
    )


One last thing, if you want to have a finer control for the block and line
contents, just use the attribut ``dictNbLine``. For the preceding code,
``infos.dictNbLine`` is equal to following ordered dictionary where the content
are replaced by uplets ``(Number of the line, Content)``.

python::
    {
        'game_rules'     : (3, 'Play !\nTry to win.\nWin or let go.'),
        'names_of_levels': [(8, 'easy'), (9, 'medium'), (10, 'hard')],
        'player_1': {
            'age'  : '18',
            'sexe' : 'male',
            'score': '1874',
            'alias': 'Super Mario'
        }
        'player_2': {
            'age'  : '24',
            'sexe' : 'female',
            'score': '2007',
            'alias': 'Sonic'
        },
    }


This class uses the following variables.

    1) ``content`` is one content to analyse. You can use this variable or the
    variable ``path``.

    2) ``path`` is the complete path of the file to read. You can use this
    variable or the variable ``content``.

    3) ``encoding`` is the encoding of the file using the standard python name
    for encoding. By default, its value is the string ``"utf8"``.

    4) ``mode`` indicates how the informations have been stored in the file to
    read. The possible values are the next ones.

        a) ``"equal"`` is for informations stored line by line with key-value
        syntax ``key = value`` in every blocks to analyse. This is the default
        value of the variable ``mode``.

        b) ``"equal+"`` is similar to ``"equal"`` except that newlines are
        allowed inside values. To use one sign ``=`` in one value, you have to
        escape at least the first sign ``=`` via ``"="``.

        << WARNING ! >> Each newline will be translated to one single space.

        c) ``"line"`` is for informations stored line by line without any
        special syntax in each line in every blocks to analyse.

        d) ``"block"`` is for information stored in a whole paragraph made of
        several lines in every blocks to analyse.

        e) We can also use one of the three preceding modes for different
        blocks. In that case, we simply use one dictionary like in the following
        example where the special mode ``"container"`` is for blocks that
        contains other blocks. Indeed, you can just use the keys you want to
        use.

        python::
            mode = {
                'default'  : the default mode,
                'equal'    : [names of the blocks],
                'equal+'   : [names of the blocks],
                'line'     : [names of the blocks],
                'block'    : [names of the blocks],
                'container': [names of the blocks]
            }

        If you only need one name of block, you don't have to put in one single
        value list. Just give its name in a string.

        If you don't define the value associated to the key ``'default'`` then
        the value ``"container"`` will be use by default.

    5) ``strip`` is a boolean variable to ask to strip the start of text
    containing informations for line or block contents. The default value is
    ``False``.

    6) ``isBlockName`` is one method that must return one boolean. This method
    is used to validate the name used for blocks.

    By default, ``isBlockName(text)`` is equal to ``regex_use.isVarName(text,
    kind = "lazzy")`` where ``regex_use`` is one module of the package ¨mistool.
    """

    def __init__(
        self,
        content     = "",
        path        = "",
        encoding    = "utf8",
        mode        = "equal",
        strip       = False,
        isBlockName = DEFAULT_IS_BLOCK_NAME
    ):
        self.path        = path
        self.content     = content
        self.encoding    = encoding
        self.mode        = mode
        self.strip       = strip
        self.isBlockName = isBlockName

        self.build()

# --------------------- #
# -- FLAT DICTIONARY -- #
# --------------------- #

    def __flattenDict(self, oneDict):
        """
This method transforms the ``(Number line, Content)`` dictionary to a simple
``Content`` dictionary.
        """
        flatDict = collections.OrderedDict()

        for oneKey, oneValue in oneDict.items():
            typeValue = type(oneValue)

            if typeValue in [dict, collections.OrderedDict]:
                oneValue = self.__flattenDict(oneValue)

            elif typeValue == tuple:
                oneValue = oneValue[1]

            elif typeValue == list:
                oneValue = [x[1] for x in oneValue]

            flatDict[oneKey] = oneValue

        return flatDict

# ------------------------------- #
# -- BUILDING THE DICTIONARIES -- #
# ------------------------------- #

    def build(self):
        """
Each time you make an instance of ``Reader``this method is called so as to
automatically build the ``(Number line, Content)`` and the simple ``Content``
dictionaries.

You can use this method if you want to rebuild this dictionaries after a change
of the content of the file or of the value of the attribut ``content``.
        """
# Which mode is used ?
        if type(self.mode) in [dict, collections.OrderedDict]:
            self.__singleMode__ = ":mix:"
            self.__mixedModes__ = self.mode

            if not 'default' in self.__mixedModes__:
                self.__mixedModes__['default'] = "container"

        else:
            if self.mode not in ["equal", "equal+", "line", "block"]:
                raise ReaderError(
                    'Unknown mode "{0}".'.format(self.mode)
                )

            self.__singleMode__ = self.mode
            self.__mixedModes__ = None

# Which content must be analysed ?
        if self.path:
            content = os_use.readTextFile(self.path)

        else:
            content = self.content

        content = content.split('\n')

# The (number line, content) dictionary.
        self.dictNbLine = self.__analyse(
            mode        = self.__singleMode__,
            linesWithNb = [(n + 1, x) for n, x in enumerate(content)]
        )

# The only content dictionary.
        self.dict = self.__flattenDict(self.dictNbLine)

    def __analyse(
        self,
        linesWithNb,
        mode,
        level   = 0,
        inBlock = False
    ):
        """
This method does all the ugly job for analysing the ``orpyste`` file.
        """
        oneBlockHasBeenFound = False
        lastKeyPlus          = None

        popNewLine = True
        weAreInComment  = False

        answer = collections.OrderedDict()

        while(linesWithNb):
# One new line to pop ?
            if popNewLine:
                nbLine, oneLine = linesWithNb.pop(0)

            else:
                popNewLine = True

            storeThisLine = (mode == "block" or not oneLine.strip() == '')


# COMMENT ON SEVERAL LINES
            if weAreInComment:
                if self.isMultiLineCommentEnd(oneLine):
                    weAreInComment = False

            elif self.isMultiLineCommentStart(oneLine):
# We can have  /*...*/  in the same line.
                if not self.isMultiLineCommentEnd(oneLine):
                    weAreInComment = True


# COMMENT IN ONE SINGLE LINE
            elif self.isOneLineComment(oneLine):
                ...


# BLOCK
            elif self.isNewBlock(oneLine):
                lastKeyPlus = None

                currentLevel = self.indentLevel(oneLine)

                if currentLevel != level:
                    self.__sendError(
                        'Wrong indentation in the line {0}.'.format(nbLine)
                    )

                oneBlockHasBeenFound = True
                nameBlockFound       = oneLine.strip()[:-2]

# Is the name of the block a legal one ?
                if not self.isBlockName(nameBlockFound):
                    self.__sendError(
                        'Illegal name for one block : '
                        'see "{0}" in the line "{1}".'.format(
                            nameBlockFound,
                            nbLine
                        )
                    )

# Let's find the content of the block.
                blockContent = []
                popNewLine   = False

                weAreInCommentInContent = False

                while(linesWithNb):
                    nbLine, oneLine = linesWithNb.pop(0)
#    + Comment in the content of a block
                    if weAreInCommentInContent:
                        if self.isMultiLineCommentEnd(oneLine):
                            weAreInCommentInContent = False

                    elif self.isMultiLineCommentStart(oneLine):
                        if not self.isMultiLineCommentEnd(oneLine):
                            weAreInCommentInContent = True

#    + Comment in one single line in the content of a block
                    elif self.isOneLineComment(oneLine):
                        ...

#    + An empty line in the content of a block
                    elif not oneLine.strip():
                        blockContent.append((nbLine, oneLine))

#    + Content with higher level of indentation
                    elif self.indentLevel(oneLine) > currentLevel:
                        blockContent.append((nbLine, oneLine))

                    else:
                        break

# Which mode must be used with the current block ?
                if mode == ":mix:":
                    blockMode = ''

                    for oneMode, oneBlock in self.__mixedModes__.items():
                        if nameBlockFound == oneBlock \
                        or nameBlockFound in oneBlock:
                            blockMode = oneMode
                            break

                    if not blockMode:
                        blockMode = self.__mixedModes__['default']

                    if blockMode == "container":
                        blockMode = mode

                else:
                    blockMode = mode

# Let's analyse and store the content of the block
                answer[nameBlockFound] = self.__analyse(
                    linesWithNb = blockContent,
                    mode        = blockMode,
                    level       = currentLevel + 4,
                    inBlock     = True
                )


# CONTENT TO STORE
            elif storeThisLine:
                if not inBlock or oneBlockHasBeenFound:
                    self.__sendError(
                        'Data and block have the same level '\
                        'of indentation. See the line {0}.'.format(nbLine)
                    )

# Mode used --> key = value
                if mode == "equal":
                    i = oneLine.find('=')

                    if i == -1:
                        self.__sendError(
                            'Missing equal sign in the line {0}.'\
                            .format(nbLine)
                        )

                    answer[oneLine[:i].strip()] = oneLine[i + 1:].strip()

# Mode used --> key = value with newlines
                elif mode == "equal+":
                    i = self.findEqualPlus(oneLine)

                    if i == -1:
                        if lastKeyPlus == None:
                            self.__sendError(
                                'Missing equal sign in the line {0}.'\
                                .format(nbLine)
                            )

                        answer[lastKeyPlus] \
                        += ' ' + oneLine.replace('"="', '=').strip()

                    else:
                        lastKeyPlus = oneLine[:i].strip()

                        answer[lastKeyPlus] \
                        = oneLine[i + 1:].replace('"="', '=').strip()

# Simply one content to finish to store
                elif mode in ["line", "block", ":mix:"]:
                    answer[nbLine] = self.stripIt(oneLine)

                else:
                    raise ReaderError('Unknown mode "{0}".'.format(mode))


# ONLY ONE CONTENT OF LINES OR A BLOCK
        if not oneBlockHasBeenFound:
# List for the line mode (verbatim)
            if mode == "line":
                listLines = []

                for oneLineNb, oneLineFound in answer.items():
                    oneLineFound = self.cleanIndent(
                        oneLineFound,
                        level
                    )
                    listLines.append((oneLineNb, oneLineFound))

                return listLines

# Glue lines in block mode (verbatim)
            elif mode == "block":
                blockContent = []
                firstLineNb = None

                for oneLineNb, oneLineFound in answer.items():
                    if firstLineNb == None:
                        firstLineNb = oneLineNb

                    oneLineFound = self.cleanIndent(
                        oneLineFound,
                        level
                    )
                    blockContent.append(oneLineFound)

                blockContent = '\n'.join(blockContent)
                blockContent = self.stripIt(blockContent)

                return (firstLineNb, blockContent)

# Return the ordered dictionary.
        return answer

# -------------------- #
# -- SENDING ERRORS -- #
# -------------------- #

    def __sendError(self, message):
        """
This method simply eases the raising of errors by adding infos about the file or
the string content analysed.
        """
        if self.path:
            message += "\nPath of the file :\n\t" + self.path

        elif self.content:
            message += "\nString content :\n\n" + self.content

        raise ReaderError(message)
