"""
A Pegasus log parser.
"""
__author__ = '$Author: dang $'
__rcsid__ = '$Id: kickstart.py 24753 2010-04-29 04:14:31Z dang $'

import time

from netlogger.parsers.base import BaseParser, getGuid, parseDate
ks = None
try:
    from netlogger.parsers.modules import ks
except ImportError:
    pass

from xml.parsers.expat import ExpatError
try:
    from xml.etree.cElementTree import XML
except:
    try:
        from xml.etree.ElementTree import XML
    except:  # Python < 2.5
        from elementtree.ElementTree import XML

PEGASUS_NS = 'http://pegasus.isi.edu/schema/invocation'
MAIN_JOB_STATUS_XPATH = '{%(pns)s}mainjob/{%(pns)s}status/{%(pns)s}regular' % \
                            {'pns':PEGASUS_NS}

# put in pegasus namespace
_ns = lambda x: '{' + PEGASUS_NS + '}' + x
# make xpath in pegasus namespace
_xp = lambda comp: '/'.join(map(_ns, comp))

class Parser(BaseParser):
    """Parse the Kickstart job wrapper output.
    See also: http://pegasus.isi.edu/

    Parameters:
        - one_event {yes,no,no*}: If 'yes', generate one event per kickstart invocation 
                                  record, otherwise generate a start/end event pair.
       - use_c {yes,no,no*}: Use the *experimental* C parser instead. This requires that
                             you compiled the parser with "python setup.py swig".

    """
    def __init__(self, f, one_event=False, use_c=False, **kwargs):
        """ Construct and initialize class vars. """
        BaseParser.__init__(self, f, fullname=__name__, **kwargs)

        self._one_event = one_event or use_c
        self._xml_began = False
        if use_c:
            self.input = ""
            self.process = self._c_process
        else:
            # A list for storing the invocation document line by
            # line until the complete document has been read.
            self.input = []

            # The resulting event list from an invocation
            self.events = []

            # The ElementTree used for this invocation
            self.root = None

            # The guid used to tie all events for this invocation together
            self.guid = None


    def process(self, line):
        """Process a line. 

        When a complete invocation document has been read, process it.
        Otherwise return an empty list.
        Skip non-xml prologue.
        """
        if self._xml_began:
            self.input.append(line)
        else:
            p = line.find("<?xml ")
            if p >= 0:
                self.input.append(line[p:])
                self._xml_began = True
            else:
                return () # still in header
        if line.find("</invocation>") == -1:
            return ()

        try:
            self.root = XML(''.join(self.input))
        except ExpatError, experr:
            self.input = [ ]
            raise ValueError("expat error: %s" % experr)
        self.input, self.events = [ ], [ ]

        # We've got a complete ElementTree
        return self._process_root()

    def _c_process(self, line):
        self.input += line
        if line.find("</invocation>") == -1:
            return ()
        event = ks.parseBuffer(self.input)
        self.input = ""
        # if last char before newline is ' ', this is a flag
        # indicating there are errors (hopefully rare)
        if event[-2] == ' ':
            return [s + '\n' for s in event.split('\n')[:-1]]
        else:
            return (event,)


    def _process_root(self):
        """ Process the entire invocation doc parsed and rooted at
        self.root."""

        qname = self.root.tag.split('}')
        if len(qname) != 2 or qname[0][1:] != PEGASUS_NS or \
                qname[1] != 'invocation':
            raise ValueError("Invalid pegasus invocation document")

        # Initialize invocation
        if self._one_event:
            invoke = { 'ts' : self.root.get('start') }
        else:
            invoke = { 'ts' : parseDate(self.root.get('start')),
                       'guid' : getGuid(repr(time.time()), 
                                        *self.root.attrib.values()) }

        # Add in invocation optional attributes, if they are found
        attrs = (('hostname', 'host'), ('user', 'user'),
                 ('transformation', 'transformation'), ('wf-label', 'workflow.id'))
        self._populate_event(invoke, self.root, attrs)

        # Pull usage info in from usage element
        usage = self.root.find(_xp(('mainjob', 'usage')))
        self._populate_event(invoke, usage, (('nsignals', 'nsignals'),))

        # Pull in duration and exit status in from mainjob element 
        duration = float(self.root.get('duration'))
        mainjob_status = int(self.root.find(MAIN_JOB_STATUS_XPATH).get('exitcode'))

        ## TEST HERE - get cwd
        cwd = self.root.find(_ns('cwd'))
        if cwd :
            invoke['cwd'] = cwd.text
        ## get environment variables. For the moment, let's put them all in one big string with ':' as separator 

        ### only store environment and limits if status is non-zero.
        if mainjob_status != 0 :
            environment = self.root.find(_ns('environment'))
            envString = '-env-' + '::'.join([node.get('key')  + ':' + node.text for node in environment])

        ### get resource limits. Each resource has a 'soft' and a 'hard' limit, so we'll put the key, the soft limit, 
        ## the loader only wants one 'text' entry per field, so we'll 
        ## concatenate the environment and limit strings, in the following format:
        ## -env-name:value::name:value:: ... -limits-name:{hard|soft}:value::...

        
            resource = self.root.find(_ns('resource'))
            if resource :
                rstring =  '::'.join([n.get('id') + ':' + n.tag + ':' + n.text for n in resource.getchildren()])
                envString += '-limits-' + rstring
                invoke['text=longvars'] = envString



        ### get arguments. They may be empty, or of the form '-P 4 -p 1 base-uri se-mount-mount' The semantics of this
        ### are hard to follow, so let's just build a string out of the arguments in order. 
        argvector = self.root.find(_xp(('mainjob', 'argument-vector')))
        argstr = ' '.join([k.text for k in argvector.getchildren()])
        invoke['arguments'] = argstr


        self.events.append(invoke)
        if self._one_event:
            # Spit out only one event per invocation
            invoke['event'] = "pegasus.invocation"
            invoke['duration'] = duration
            invoke['status'] = mainjob_status
        else:
            # Spit out a pair of start/end events per invocation
            invoke['event'] = "pegasus.invocation.start"
            
            invoke_end = {
                'event' : "pegasus.invocation.end",
                'ts' : invoke['ts'] + duration,
                'guid' : invoke['guid'],
                'status' : mainjob_status
                }
            self.events.append(invoke_end)

        # Add events for failed stat calls
        for statcall in self.root.findall(_ns('statcall')):
            errnum = int(statcall.get('error'))
            # assume non-zero means "failed" (?)
            if errnum != 0:
                filename = statcall.find(_ns('file')).get('name')
                statinfo = statcall.find(_ns('statinfo'))
                if statinfo is None: # fix Issue #217
                    statinfo = {'user':'unknown', 'group':'unknown'} 
                _e = { 'event' : 'pegasus.invocation.stat.error',
                       'ts' : invoke['ts'],
                       'file' : filename, 
                       'user' : statinfo.get('user'),
                       'group' : statinfo.get('group'),
                       'status' : errnum }
                if invoke.has_key('guid'):
                       _e['guid'] = invoke['guid']
                self.events.append(_e)

        return self.events


    def _populate_event(self, event, elem, attrs):
        """ Ultility method for populating the given event with
        attributes from the given element, if those attributes exist
        within that element."""

        if elem is not None:
            for attr, new_name in attrs:
                if elem.attrib.has_key(attr):
                    event[new_name] = elem.get(attr)
