"""
Parse output of Pegasus 'gensim' parser.

THIS PARSER IS EXPERIMENTAL

You can get the gensim parser from  svn at 
 https://smarty.isi.edu/svn/repo1/pegasus/trunk/contrib/showlog
The file is called gensim.

The only way this currently runs is with BOTH the 'out' and 'jobs' files
of the gensim output, in that order, e.g.:

    cat out logs | nl_parser -m gensim > parsed.out
"""
"""
The concatenation of two files, 'job' and 'out' are parsed.
The 'job' file has the timing breakdown, and the 'out' file has
the timestamps.

.Example out file (snippet)
----
201 cln_mProjectPP_ID000003 POST_SCRIPT_STARTED - cluster1 -
201 mDiffFit_ID000004 JOB_TERMINATED 29448.0 cluster1 -
201 mDiffFit_ID000004 POST_SCRIPT_STARTED - cluster1 -
----

.Example job file (header and first line)
----
#Job                                                     Site Kickstart     Post   DAGMan   Condor Resource  Runtime CondorQLen
chmod_mAdd_ID000013_0                                cluster1     0.43     5.00     6.00    20.00     0.00     0.00     5
----

Explanation:
    Kickstart is the time seen from the kickstart output
    POST is the time take by postscript of that job
    DAGMAN is the time take by dagman to release the job after all the parents of the job have finished.
    CONDOR is the time taken by condor from the Submit even to the EXECUTE or GRID-SUBMIT event
    REs is the remote queue delay (differ between EXECUTE and GridSUBMIT even when job submission is of type globus)
    CQLen is the number of jobs int he condor queue
"""

from netlogger.parsers.base import BaseParser

class Parser(BaseParser):
    """Parse the 'job' + 'out' file output by Pegasus 'gensim' parser. 
       Note: EXPERIMENTAL.
    """
    def __init__(self, fileobj, **kw):
        BaseParser.__init__(self, fileobj, **kw)
        self._ts_offs = None
        self._ts = { } # timestamps by id
        self._parsing_out = True

    def process(self, line):
        if line.startswith('#Job'):
            self._parsing_out = False
            return ()
        if self._parsing_out:
            self._parseOut(line)
            return ()
        else:
            return self._parseJob(line)

    def _parseOut(self, line):
        fields = line.split()
        relts = int(fields[0])
        job = fields[1]
        if job == 'INTERNAL' and self._ts_offs is None:
            self._ts_offs = relts
        else:
            try:
                # squirrel away timestamp for job
                j_action, j_type, j_id= self._splitJob(job)
                self._ts[j_id] = relts
            except ValueError:
                pass # some lines don't have an ID

    def _parseJob(self, line):
        fields = line.split()
        job = fields[0]
        j_action, j_type, j_id= self._splitJob(job)
        # retrieve job timestamp
        ts = self._ts[j_id] + self._ts_offs
        # build returned event
        e = { 'ts':ts, 'event':j_type,  'job.id':j_id,
                'action':j_action,  'site.id': fields[1],
                'kickstart':fields[2], 'post':fields[3],
                'dagman':fields[4], 'condor':fields[5],
                'resource':fields[6], 'runtime':fields[7],
                'condor.queue':fields[8] }
        return (e,)

    def _splitJob(self, job):
        # Find '_ID'
        p_id = job.find('_ID')
        if p_id < 0:
            raise ValueError("no ID")
        # Job ID is part after that
        j_id = job[p_id+3:]
        # Is there is an '_' in the rest?split
        p_action = job.find('_',0,p_id-1)
        if p_action < 0:
            # No: default action, whole thing is type
            j_action = 'run'
            j_type = job[:p_id]
        else:
            # Yes: split into action/type
            j_action = job[:p_action]
            j_type = job[p_action+1:p_id]
        return j_action, j_type, j_id

