#!/usr/bin/env python
"""
Read NetLogger logs, output the interval between 
the .start and .end events.

The output is the log format, or a simple CSV with the event name, key, 
and interval
"""
__author__ = "Dan Gunter <dkgunter@lbl.gov>"
__rcsid__ = "$Id: nl_interval 802 2008-06-06 18:15:21Z dang $"

import csv
import logging
import optparse
import re
import socket
import sys
import syslog
import time
from netlogger import nlapi
from netlogger import nllog
from netlogger.util import ProgressMeter, NullProgressMeter
from netlogger.nlapi import Level, Log
from netlogger.parsers.base import NLFastParser
from netlogger.analysis.startend import StartEndMatcher

# Logging
log = nllog.NullLogger()
def activateLogging(name="netlogger.nl_interval"):
    global log
    log = nllog.getScriptLogger(name)

def parseIds(o):
    """Split ["foo:bar,baz", "elmo:room,theme"] into:
         [(re.compile('foo'), ('bar','baz')),
          (re.compile('elmo'), ('room', 'theme'))]
       and return this list of tuples.
    """
    result = [ ]
    all_fields = [ ]
    for optval in o:
        if ':' not in optval:
            if all_fields:
                raise ValueError("empty event pattern can only appear once")
            all_fields = [(None, optval.split(','))]
        else:
            regex, fields = optval.split(':', 1)
            regex = re.compile(regex)
            fields = fields.split(',')
            result.append((regex, fields))                
    return result + all_fields

class HWriter:
    def __init__(self, ofile):
        self._o = ofile
        self._first = True
    def writerow(self, x):
        if self._first:
            self._first = False
        else:
            self._o.write("%s %s\n" % (x[0], x[2]))

def run(event_ids=None, infile=None, writer=None, log=None,
        progress_meter=None, hdr=True):
    """Read from infile until EOF, write to writer or log obj 
    (only one will be non-None).
    """
    if writer and hdr:
        writer.writerow(('event','key','interval_sec'))
    matchers = [StartEndMatcher(idlist=ei[1]) for ei in event_ids]
    parser = NLFastParser(infile, verify=False)
    for i, d in enumerate(parser):
        progress_meter.setLine(i+1)
        if not d.has_key('event'):
            continue #ignore junk
        matcher = None
        for i, (regex, _) in enumerate(event_ids):
            if regex is None or regex.match(d['event']):
                matcher = matchers[i]
                break
        if matcher is not None:
            added = matcher.add(d)
            if len(matcher) > 0:
                for start, end, key in matcher.getResults():
                    interval = end['ts'] - start['ts']
                    if start and end:
                        event_base = start['event'][:-6]
                        key_str = '/'.join(key)
                        if writer:
                            interval_str = '%lf' % interval
                            writer.writerow((event_base, key_str, interval_str))
                        else: # log
                            intvl = start.copy()
                            for k, v in end.items():
                                if k in ('ts', 'event', 'level'):
                                    del intvl[k]
                                elif intvl.has_key(k):
                                    if v != intvl[k]:
                                        intvl[k + '.start'] = intvl[k]
                                        intvl[k + '.end'] = v
                                        del intvl[k]
                                else:
                                    intvl[k] = v
                            intvl['ts'] = start['ts']
                            intvl['level'] = Level.getLevel(start.get('level',end.get('level','INFO')).upper())
                            intvl['nl.intvl'] = interval
                            log.write(event_base + '.intvl', **intvl)

def main():
    usage = "%prog [options] [files..]"
    desc = ' '.join(__doc__.split('\n')[1:3])
    parser = optparse.OptionParser(usage=usage, version="%prog 0.1", 
                                   description=desc)
    parser.add_option('-i', '--ids',
                      action='append', default=[], dest='ids',
                      help="Set of identifying fields for a given "
                      "event pattern, " +
                      "using the syntax: " +
                      "[EVENT_REGEX:]FIELD1,..,FIELDN (default='guid')")
    parser.add_option("-p", "--progress", action="store_true", 
                      dest="progress",
                      default=False, help="report progress to stderr")
    parser.add_option('-t', '--type',
                      action="store", type="choice",
                      choices=('csv', 'log', 'human'),
                      dest='fmt', default="human",
                      help="Output type (default=%default)")
    options, args= parser.parse_args()
    if len(options.ids) == 0:
        ids = [(None, ('event','guid'))]
    else:
        try:
            ids = parseIds(options.ids)
        except ValueError, E:
            parser.error("bad event:id : %s" % E)
    # output format
    writer, log = None, None
    if options.fmt == 'csv':
        writer = csv.writer(sys.stdout)
    elif options.fmt == 'log':
        log = Log(logfile=sys.stdout)
        log.setLevel(999) # everything!
    elif options.fmt == 'human':
        writer = HWriter(sys.stdout)
    else:
        parser.error("bad output type: %s" % options.fmt)
    # progress meter
    if options.progress:
        pm = ProgressMeter(sys.stderr)
    else:
        pm = NullProgressMeter()
    # input files
    if args:
        infile_names = args
    else:
        infile_names = [sys.stdin.name]
    # go!
    hdr = True
    for filename in infile_names:
        if filename == sys.stdin.name:
            infile = sys.stdin
        else:
            infile = file(filename)
        run(event_ids=ids, infile=infile, writer=writer, log=log,
            progress_meter=pm, hdr=hdr)
        hdr = False
    
if __name__ == "__main__": 
    main()
