# -*- coding: iso8859-1 -*-
#
# $Source: /home/blais/repos/cvsroot/arubomu/lib/python-fallback/strxtra.py,v $
# $Id: strxtra.py,v 1.8 2004/12/17 23:44:23 blais Exp $
#

"""Extra strings support functions."""

__version__ = "$Revision: 1.8 $"
__author__ = "Martin Blais <blais@furius.ca>"


import sys, os
import string, re

import distutils.fancy_getopt
from curses.ascii import isalnum

from pprint import pprint, pformat



ratbl = string.maketrans('ѡ',
                         'aaaaaeeeeiiiiooooouuuuconAAAAEEEEIIIIOOOOUUUUCON!?')


def removeAccents(s):
    "Removes accents from a given string."
    return string.translate(s, ratbl)


def idify(i, strip=1, preserve_chars=[]):
    "Removes accents and spaces from a given string."
    s = removeAccents(i)
    ss = ''

    preserve_list = ['\n'] + preserve_chars
    for c in s:
        if isalnum(c):
            ss += c
        elif c in preserve_list:
            ss += c
        else:
            ss += '_'
    if strip:
        b, e = 0, len(ss)
        while b < len(ss) and ss[b] == '_': b += 1
        while e > 1 and ss[e-1] == '_': e -= 1
        ss = ss[b:e]
    return ss


def simplifyWhitespace(str):
    """Strips, and replaces substrings of multiple whitespace characters by one
    space. This makes it all one a single line."""

    return string.join( string.split( str ) )


if sys.version_info[0:2] < (2,3):
    leadws_re = re.compile('^\s*')
    
    def formatText(str, width=80, indent=0):

        """Formats the text to fix in at most width characters and remove
        extraneous whitespace."""
    
        # strip leading ws
        olines = []
        for line in str.splitlines():
            olines.append( leadws_re.sub('', line) )
        ostr = string.join(olines, '\n')
        
        pars = splitParagraphs( ostr )
    
        outpars = []
        for par in pars:
            lines = distutils.fancy_getopt.wrap_text(par, width)
            olines = []
            for l in lines:
                olines.append( ' ' * indent + l )
            outpars.append( string.join(olines, '\n') )
    
        return string.join(outpars, '\n\n')
        
    

wsl_re = re.compile('^\s*$', re.M)

def splitParagraphs(str):
    "Splits the string into multiple paragraphs."

    pars = []
    acc = []
    for l in str.splitlines():
        if wsl_re.match(l):
            if acc:
                pars.append( acc )
                acc = []
        else:
            acc.append(l)

    return map( lambda x: string.join(x, '\n'), pars )



def split_quoted(str, sep, quotechar):
    """Split a string, but with a quoting character."""
    oo = []
    cs = ''
    intxt = 0
    for c in str:
        if not intxt and c == sep:
            oo += [cs]
            cs = ''
        elif c == quotechar:
            if intxt == 0:
                intxt = 1
            else:
                intxt = 0
            cs += c
        else:
            cs += c
    if cs:
        oo += [cs]

    # remove quotes around fully quoted elements.
    ooo = []
    for cs in oo:
        css = string.strip(cs)
        if css.startswith(quotechar) and css.endswith(quotechar):
            ooo.append( css[1:-1] )
        else:
            ooo.append( css )
    return ooo


def quoteSpecial(fn, special_chars):
    """Returns a quoted version of the string if necessary due to a list of
    special characters."""

    for s in special_chars:
        if s in fn:
            return '"%s"' % fn
    return fn



tag_re = re.compile('<[^>]+>')

def stripHTMLTags(str):
    """Removes all <.*> tags."""
    return tag_re.sub('', str)


ecre = re.compile('{([^\}]+)}')

def expandCombinations(str):
    """Expand {(.*,)+(.*)} combinations and returns a list of the expanded
    string."""

    combis = ['']
    pos = 0
    while 1:
        mo = ecre.search(str, pos)
        if not mo:
            break
        cc = string.split( mo.group(1), ',' )

        inbtw = str[ pos : mo.start(1)-1 ]

        newcombis = []
        for c in cc:
            for s in combis:
                newcombis.append( s + inbtw + c )
        combis = newcombis
        pos = mo.end()
        
    ends = str[ pos : ]
    for i in range(0,len(combis)):
        combis[i] += ends

    return combis



def main():
    #print formatText(""" """)
    #print simplifyWhitespace( ' \t\t\t ah \t \r\n sds')
    #print quoteSpecial('Hibou coucou', [','])
    #print quoteSpecial('Hibou,  coucou', [','])
    #print quoteSpecial('Hibou | coucou', [',', '|'])
    #print quoteSpecial('Hibou,  | coucou', [',', '|'])
    from pprint import pprint
    pprint(split_quoted('Luis "Perico" Ortiz, Title', ',', '"'))
    pprint(split_quoted('"Perico, Bla", Title', ',', '"'))
    pprint(split_quoted('"1,2"', ',', '"'))
    pprint(split_quoted('"Eleggua, Oggun y Ochossi" ', ',', '"'))

    pprint(idify("  Eleggua, Oggun y Ochossi   "))
    pprint(idify("  Eleggua, Oggun y Ochossi   ", 1))
    pprint(idify("  Eleggua, -Oggun y Ochossi   ", 1, ['-']))

# Run main if loaded as a script
if __name__ == "__main__":
    main()
