#!/usr/bin/env python3

# Source :
#    http://docs.python.org/library/re.html

"""
Directory : mistool
Name      : regex_use
Version   : 2013.10
Author    : Christophe BAL
Mail      : projetmbc@gmail.com

This script gathers some testing functions based on regex patterns.
"""

import re


# ------------------------- #
# -- FOR ERRORS TO RAISE -- #
# ------------------------- #

class RegexUseError(ValueError):
    """
:::::::::::::::::
Small description
:::::::::::::::::

Base class for errors in the ``regex_use`` module of the package ``mistool``.
    """
    pass


# ----------- #
# -- WORDS -- #
# ----------- #

__FR_ACCENTUED_LETTERS = "âàéèêëîïôùüç"

PATTERN_GROUP_WORD = {
# Natural language
    'en': re.compile("([a-zA-Z]+)"),
    'fr': re.compile(
        "([a-z{0}A-Z{1}]+)".format(
            __FR_ACCENTUED_LETTERS,
            __FR_ACCENTUED_LETTERS.upper()
        )
    ),
# Coding
    'var': re.compile("([a-zA-Z][\d_a-zA-Z]*)"),
}


# ----------------------------- #
# -- LEGAL NAME FOR VARIABLE -- #
# ----------------------------- #

PATTERN_VAR_NAME = {
    'standard'  : re.compile("^[a-zA-Z][\d_a-zA-Z]*$"),
    'python'    : re.compile("^[_a-zA-Z][\d_a-zA-Z]*$"),
    'lazzy'     : re.compile("^[\d_a-zA-Z]+$"),
    'lexicBlock': re.compile(
        "^[a-zA-Z][\d_a-zA-Z]*(-[a-zA-Z][\d_a-zA-Z]*)*(\*)?$"
    )
}

def isVarName(
    text,
    kind = "standard"
):
    """
:::::::::::::::::
Small description
:::::::::::::::::

This function tests if the string ``text`` is one legal name for variable for
different contexts given by the value of ``kind``. This argument corresponds to
one key of the following dictionary.

python::
    PATTERN_VAR_NAME = {
        'standard'  : re.compile("^[a-zA-Z][\d_a-zA-Z]*$"),
        'python'    : re.compile("^[_a-zA-Z][\d_a-zA-Z]*$"),
        'lazzy'     : re.compile("^[\d_a-zA-Z]+$"),
        'lexicBlock': re.compile(
            "^[a-zA-Z][\d_a-zA-Z]*(-[a-zA-Z][\d_a-zA-Z]*)*(\*)?$"
        )
    }


:::::::::::::
The arguments
:::::::::::::

This function uses the following variables.

    1) The default value is ``"standard"`` which is for names starting with one
    ASCII letter followed by possible digits, ASCII letters or the underscore
    ``_``.

    2) ``"python"`` is for ¨python names for variables which can start with one
    ASCII letter or one underscore ``_`` following by possible digits, ASCII
    letters or the underscore ``_``.

    3) ``"lazzy"`` is for names which use anywhere digits, ASCII letters and the
    underscore ``_``.

    4) ``"lexicBlock"`` is used in ¨lexic which is another project of the author
    of ``Mistool``.

    This kind of variables can be a list of words with one ASCII letter followed
    by possible digits, ASCII letters, underscores ``_``, this words being
    separated by single minus signs ``-``, and can be eventually followed at the
    end by one single star ``*``.
    """
    if not kind in PATTERN_VAR_NAME:
        raise RegexUseError(
            "Unknown kind << {0} >>.".format(kind)
        )

    return bool(PATTERN_VAR_NAME[kind].search(text))


# ------------------ #
# -- ROMAN NUMBER -- #
# ------------------ #

# The following pattern comes from the book "Dive into Python".

PATTERN_ROMAN_NUMERAL = re.compile(
    """
    ^                   # beginning of string
    M{0,4}              # thousands: 0 to 4 M's
    (CM|CD|D?C{0,3})    # hundreds: 900 (CM),
                        #           400 (CD),
                        #           0-300 (0 to 3 C's) or
                        #           500-800 (D, followed by 0 to 3 C's)
    (XC|XL|L?X{0,3})    # tens: 90 (XC), 40 (XL),
                        #       0-30 (0 to 3 X's) or
                        #       50-80 (L, followed by 0 to 3 X's)
    (IX|IV|V?I{0,3})    # ones: 9 (IX), 4 (IV),
                        #       0-3 (0 to 3 I's) or
                        #       5-8 (V, followed by 0 to 3 I's)
    $                   # end of string
    """ ,
    re.VERBOSE
)

def isRoman(
    text,
    case = "upper"
):
    """
:::::::::::::::::
Small description
:::::::::::::::::

This function tests if the string ``text`` corresponds to one roman number using
one case given by ``case``, the default value being upper case.


:::::::::::::
The arguments
:::::::::::::

This function uses three variables.

    1) ``text`` is the string to test.

    2) ``case`` is a string that can be equal to one of the three following
    values.

        a) ``"upper"`` is for upper roman number. This is the default value.

        b) ``"lower"`` is for lower roman number.

        c) ``"mix"`` says that we do not care of the case.


::::::::::::::
Technical note
::::::::::::::

The pattern used is the following one coming from the book "Dive into Python".

python::
    PATTERN_ROMAN_NUMERAL = re.compile(
        '''
        ^                   # beginning of string
        M{0,4}              # thousands: 0 to 4 M's
        (CM|CD|D?C{0,3})    # hundreds: 900 (CM),
                            #           400 (CD),
                            #           0-300 (0 to 3 C's) or
                            #           500-800 (D, followed by 0 to 3 C's)
        (XC|XL|L?X{0,3})    # tens: 90 (XC), 40 (XL),
                            #       0-30 (0 to 3 X's) or
                            #       50-80 (L, followed by 0 to 3 X's)
        (IX|IV|V?I{0,3})    # ones: 9 (IX), 4 (IV),
                            #       0-3 (0 to 3 I's) or
                            #       5-8 (V, followed by 0 to 3 I's)
        $                   # end of string
        ''' ,
        re.VERBOSE
    )
    """
    if case == "mix":
        text = text.upper()

    elif case == "upper":
        if not text.isupper():
            return False

    elif case == "lower":
        if not text.islower():
            return False

        text = text.upper()

    return bool(PATTERN_ROMAN_NUMERAL.search(text))


# ------------------------- #
# -- MATHEMATICAL NUMBER -- #
# ------------------------- #

PATTERN_NATURAL = re.compile("^(\d)+$")

def isNatural(text):
    """
:::::::::::::::::
Small description
:::::::::::::::::

This function tests if the string ``text`` corresponds to one natural number.
    """
    return bool(PATTERN_NATURAL.search(text))

def isInteger(text):
    """
:::::::::::::::::
Small description
:::::::::::::::::

This function tests if the string ``text`` corresponds to one integer.
    """
    if text[0] in '+-':
        text = text[1:]

    return isNatural(text)

def __isTwoParts(
    text,
    sep,
    tests
):
    """
:::::::::::::::::
Small description
:::::::::::::::::

This function is used by either ``isRational``, ``isDecimal``, or ``isFloat``.
It splits the string ``text`` regarding to ``sep``.

If there is more than two parts, then ``False`` is returned. In the contrary,
the function tests the first part by calling the function ``test[0]``, and
eventually the second part by calling the function ``test[1]``.
    """
    parts = text.split(sep)

    if len(parts) == 1:
        return tests[0](text)

    if len(parts) == 2:
        return tests[0](parts[0]) and tests[1](parts[1])

    else:
        return False

def isRational(text):
    """
:::::::::::::::::
Small description
:::::::::::::::::

This function tests if the string ``text`` corresponds to one rational number.
    """
    return __isTwoParts(
        text  = text,
        sep   = "/",
        tests = (isInteger, isNatural)
    )

def isDecimal(
    text,
    sep = "."
):
    """
:::::::::::::::::
Small description
:::::::::::::::::

This function tests if the string ``text`` corresponds to one decimal number
using ``sep`` as decimal separator. By default, ``sep = "."``.
    """
    return __isTwoParts(
        text  = text,
        sep   = sep,
        tests = (isInteger, isNatural)
    )

def isFloat(
    text,
    sep  = ".",
    expo = "E"
):
    """
:::::::::::::::::
Small description
:::::::::::::::::

This function tests if the string ``text`` corresponds to one float number like
``-123.45E+45`` where the function uses the optional arguments ``sep = "."`` for
the decimal separator, and ``expo = "E"`` for the ten power shortcut.
    """
    def __isFractional(text):
        return isDecimal(text = text, sep = sep)

    return __isTwoNaturalParts(
        text  = text,
        sep   = expo,
        tests = (__isFractional, isinteger)
    )
