#!/usr/bin/env python

# Author: Jonathan Slenders, City Live

# PO file specifications: http://www.gnu.org/software/hello/manual/gettext/PO-Files.html

import datetime

class POEntry(object):
    def __init__(self, msgid, msgstr=''):
        self.comments = [] # List of comment lines before this entry
        self.references = [] # List of POEntryReference
        self.msgid = msgid
        self.msgstr = msgstr
        self.fuzzy = False

    def unicode(self):
        print 'Entry: =%s= -> =%s=' % (self.msgid, self.msgstr)


class POEntryReference(object):
    def __init__(self, filename, line):
        self.filename = filename
        self.line = line


class ParseError(Exception):
    pass


class Parser(object):
    def __init__(self, file_content=''):
        """
        Parses the PO file, returns a dictionary of msgid
        """
        if isinstance(file_content, str):
            file_content = file_content.decode('utf-8')

        self._line = 0

        self._input = file_content.replace('\r', '').split('\n')

        def error(message):
            print 'PO parse error, line %s ==%s==' % (self._line, self._input[0])
            raise ParseError(message + '  line %s' % self._line)

        def get_char():
            " Pops and returns the next char in the stream "
            if self._input[0]:
                char = self._input[0][0]
                self._input[0] = self._input[0][1:]
                if not len(self._input[0]):
                    self._input = self._input[1:]
                    self._line += 1
                return char

        def current_char():
            " Returns the next char in the stream "
            if self._input and self._input[0]:
                    return self._input[0][0]

        def skip_spaces():
            while current_char().isspace():
                get_char()

        def get_line():
            " Pops the next line in the stream "
            self._line += 1
            l = self._input[0]
            self._input = self._input[1:]
            return l

        def read_string():
            " Read a string in the stream, expects to be at the start of this stream. "
            string = []
            if current_char() == '"':
                get_char()
                while current_char() != '"':
                    # Skip escape character
                    if current_char() == '\\':
                        get_char()
                        c = get_char()
                        if c == 'n':
                            string.append('\n')
                        elif c == 't':
                            string.append('\t')
                        else:
                            string.append(c)
                    else:
                        string.append(get_char())
                if current_char() == '"':
                    get_char()
                else:
                    raise error('String not terminated by double quotes')
                return ''.join(string)
            else:
                raise error('Not the beginning of a string, expecting double quote')

        # Result vars
        self.entries = []

        # Temp vars
        class State():
            def __init__(self):
                self.reset()

            def reset(self):
                self.references = []
                self.current_comments = []
                self.msgid = []
                self.msgstr = []
                self.msg_state = False
                self.fuzzy = False

        state = State()

        def remember():
            if ''.join(state.msgid):
                # Save last translation
                entry = POEntry(''.join(state.msgid), ''.join(state.msgstr))
                entry.comments = '\n'.join(state.current_comments)
                entry.references = state.references
                entry.fuzzy = state.fuzzy
                self.entries.append(entry)

                # print '--'
                # print ' *%s* '% entry.msgid
                # print ' *%s* '% entry.msgstr

            state.reset()

        # Parse loop
        while self._input:
            l = self._input[0]
            if l:
                if l[0].isspace():
                    skip_spaces()

                # Found comment
                if l[0] == '#':
                    if state.msgstr:
                        remember()
                            # NOTE:
                            # We suppose there are no comments between
                            # msgid and msgstr, and because comments are
                            # always written above the entries, we take this
                            # as the start of a new entry.

                    # Remember comment
                    # (Not when it starts with #~, that is for older, unused
                    # translation strings; drop these comments for now...)
                    if l.startswith('#~'):
                        get_line()
                    elif l.startswith('#:'):
                        for reference in get_line()[2:].split():
                            filename, line = reference.split(':')
                            state.references.append(POEntryReference(filename, int(line)))
                    elif l.startswith('#,'):
                        for flag in get_line()[2:].split(','):
                            # Start of a flag
                            if flag.strip() == 'fuzzy':
                                state.fuzzy = True
                            # Ignore other flags, like 'python-format', for now...
                    else:
                        state.current_comments.append(get_line())
                # Found string
                elif l[0] == '"':
                    if state.msg_state:
                        state.msgid.append(read_string())
                    else:
                        state.msgstr.append(read_string())
                # Ignore empty lines
                elif l[0].strip() == '':
                    pass
                # msgid
                elif l.startswith('msgid') and not state.msg_state:
                    if state.msgstr:
                        remember() # Save previous entry
                    state.msg_state = True
                    self._input[0] = l[len('msgid'):]
                # msgstr
                elif l.startswith('msgstr') and state.msg_state:
                    state.msg_state = False
                    self._input[0] = l[len('msgstr'):]

                else:
                    #print l
                    error('parse error')
            else:
                get_line()
        remember()

    # =======================================================================================
    #    PO file generator
    # =======================================================================================

    def to_string(self, header=''):
        """
        Returns a PO file as an encoded string
        """
        output = []

        # Header
        output.append('# Translations from City Live i18n database\n')
        # Don't add timestamp to prevent unnecessary changes
        #output.append('# Generated: %s\n' % str(datetime.datetime.now()))

        if header:
            for line in header.split('\n'):
                output.append('# %s\n' % line)

        output.append('#, fuzzy\n')
        output.append('msgid ""\n')
        output.append('msgstr ""\n')
        output.append('"MIME-Version: 1.0\\n"\n')
        output.append('"Content-Type: text/plain; charset=UTF-8\\n"\n')
        output.append('"Content-Transfer-Encoding: 8bit\\n"\n\n')

        # Entries
        for entry in self.entries:
            if entry.msgid:
                for c in entry.comments:
                    if c and c[0] != '#':
                        output.append('# ')
                    output.append(c)
                    output.append('\n')

                for r in entry.references:
                    output.append('#: %s:%s\n' % (r.filename, r.line))

                if entry.fuzzy:
                    output.append('#, fuzzy\n')

                output.append('msgid "')
                for c in entry.msgid:
                    if c == '"':
                        output.append(r'\"')
                    elif c == '\\':
                        output.append(r'\\')
                    elif c == '\t':
                        output.append(r'\t')
                    elif c == '\n':
                        output.append('\\n"\n"')
                    else:
                        output.append(c)
                output.append('"\n')

                output.append('msgstr "')
                for c in entry.msgstr:
                    if c == '"':
                        output.append(r'\"')
                    elif c == '\\':
                        output.append(r'\\')
                    elif c == '\t':
                        output.append(r'\t')
                    elif c == '\n':
                        output.append('\\n"\n"')
                    else:
                        output.append(c)
                output.append('"\n\n')

        return u''.join(output).encode('utf-8')


if __name__ == '__main__':
    p = Parser(open('/home/jonathan/git/mvne_platform/mobilife/locale/fr/LC_MESSAGES/django.po', 'r', 'utf-8').read())
    print p.entries
