#
# $Source: /home/blais/repos/cvsroot/arubomu/lib/python-fallback/enjot.py,v $
# $Id: enjot.py,v 1.7 2004/12/17 23:44:23 blais Exp $
#

"""Generic parser for simple file format for keeping notes on some data items.

This file format is a simple format that can be used for multiple purposes to
manually store and edit notes on some simple data items.

The format consists of a list of entries, each described by a finite set of
fields, and accompanied by a chunk of arbitrary description text; here is an
example::

  - field1, field2, field3, field4, ...

    Arbitrary description text (indented).

The components can be quoted with double-quotes. The number of fields is not
fixed, it can vary between entries.  Lines starting with # are ignored as
comments.  Also note that the initial '-' character can be anything.
"""

__example_input__ = """

# comment
# comment
# commentdisjdsj

- key1, key2, key3

- key1, key2, key3, key4

  Some Text.
  BLa bla.
  # not a comment.

# cutting comment, cuts the previous line.

  ahldhs

- key1, key2

- key1, "Entry, with, commas", key3

  Bla Bla. The usual.

  There is a line that should have been preserved above this one.

"""


__version__ = "$Revision: 1.7 $"
__author__ = "Martin Blais <blais@furius.ca>"


import sys, os
import re

from strxtra import split_quoted
from pprint import pprint


idxe = re.compile('^[-=&/\?O] (.+)$')
attrre = re.compile('^  (.+)$')
atere = re.compile('^(.+)="(.*)"$')
tlre = re.compile('^TL-(\d+).(\d+)$')
pagre = re.compile('^\014')
wsre = re.compile('^\s*$')


comment_re = re.compile('^#')
entry_re = re.compile('^[^ ] (.*)')
empty_re = re.compile('^\s*$')

debug = False


def load(f, ignore_errors=False):

    """Parse a file and return a list of (fieldlist, text) pairs. The returned
    fieldlist is a tuple of the extracted fields."""

    entries = []
    curfields, curtext = None, None
    no = 0
    tmptext = []
    while True:
        line = f.readline()
        no += 1
        if not line:
            break
        line = line[:-1] # remove end-of-line.
        if debug: print '=== reading:', line

        if comment_re.match(line):
            if debug: print 'comment'
            # push accumulated entry if needed
            if curfields:
                entries.append( (tuple(curfields), curtext) )
                curfields, curtext = None, None
            # skip line
            continue

        mo = entry_re.match(line)
        if mo:
            if debug: print 'entry'
            # push accumulated entry if needed
            if curfields:
                entries.append( (tuple(curfields), curtext) )
                curfields, curtext = None, None

            # create new entry from field
            curfields = split_quoted(mo.group(1), ',', '"')
            tmptext = []
        else:
            if debug: print 'non-entry'
            # add text to current chunk
            if not curfields:
                if empty_re.match(line):
                    if debug: print 'skipping empty'
                    continue
                else:
                    if not ignore_errors:
                        raise RuntimeError(
                            'Error: text chunk not in entry at line (%s).' % no)
                    else:
                        continue
            if curtext == None:
                if empty_re.match(line):
                    if debug: print 'skipping empty line at head'
                    continue
                else:
                    curtext = []
            else:
                # otherwise keep empty lines for later, will be added only if a
                # non-empty line follows.
                if empty_re.match(line):
                    if debug: print 'keeping empty line for later'
                    tmptext.append(line)
                    continue

            curtext.extend(tmptext)
            curtext.append(line)
            tmptext = []

            if debug: print curtext, tmptext
    # push accumulated entry if needed
    if curfields:
        entries.append( (tuple(curfields), curtext) )
        curfields, curtext = None, None

    # remove prepending whitespace from entries text.
    newentries = []
    for fields, lines in entries:
        text = None
        if lines != None:
            lines = remove_prepending_ws(lines)
            text = os.linesep.join(lines)
        newentries.append( (fields, text) )
    entries = None

    return newentries


prews_re = re.compile('^([ ]*)[^ \n]')

def remove_prepending_ws(textlines):

    """Finds the shortest amount of whitespace in all the lines of the given
    list and remove it from all the lines.  We're assuming that the lines do not
    contain end-of-line markers (this is important)."""

    minlen = 1000
    for l in textlines:
        mo = prews_re.match(l)
        if mo:
            minlen = min(minlen, len(mo.group(1)))

    if minlen > 0:
        newlines = []
        for l in textlines:
            newlines.append(l[minlen:])
    else:
        newlines = textlines

    return newlines


def save(f, entries):

    """Output a list of entries in the expected format."""

    for fields, text in entries:
        f.write('- ')

        flds = list(fields)

        # remove useless fields
        while flds and flds[-1] == None:
            del flds[-1]

        for i in xrange(len(flds)):
            v = flds[i] 
            if v == None:
                flds[i] = ''
            elif ',' in v:
                flds[i] = '"%s"' % v

        f.write(', '.join(flds))
        f.write('\n')

        if text != None:
            f.write('\n')
            for l in text.splitlines():
                f.write('  ')
                f.write(l)
                f.write('\n')

        f.write('\n')




def test():
    import StringIO
    entries = load(StringIO.StringIO(__example_input__), 1)
    print '=' * 70
    pprint(entries)
    print '=' * 70
    save(sys.stdout, entries)


def test_pp():

    pptest = """
   Por el suelo.
     Mamacita te vamos a matar.

   Escuchando la ultima rola.
"""

    newlines = remove_prepending_ws(pptest.splitlines())
    pprint(newlines)

    pptest = """

     Escuchando la ultima rola.

"""
    newlines = remove_prepending_ws(pptest.splitlines())
    pprint(newlines)

# Run main if loaded as a script
if __name__ == "__main__":
    test()
