#!/usr/bin/env python
"""
Read NetLogger logs, output missing events
The output is the log format, or a simple CSV with the event name and key.
"""
__author__ = "Dan Gunter <dkgunter@lbl.gov>"
__rcsid__ = "$Id: nl_findmissing 802 2008-06-06 18:15:21Z dang $"

import csv
import logging
import optparse
import re
import socket
import sys
import syslog
import time
from netlogger import nlapi
from netlogger import nllog
from netlogger.util import ProgressMeter, NullProgressMeter
from netlogger.nlapi import Level, Log
from netlogger.parsers.base import NLFastParser
from netlogger.analysis.startend import StartEndMatcher

# Logging
log = nllog.NullLogger()
def activateLogging(name="netlogger.nl_findmissing"):
    global log
    log = nllog.getScriptLogger(name)

def parseIds(o):
    """Split ["foo:bar,baz", "elmo:room,theme"] into:
         [(re.compile('foo'), ('bar','baz')),
          (re.compile('elmo'), ('room', 'theme'))]
       and return this list of tuples.
    """
    result = [ ]
    all_fields = [ ]
    for optval in o:
        if ':' not in optval:
            if all_fields:
                raise ValueError("empty event pattern can only appear once")
            all_fields = [(None, optval.split(','))]
        else:
            regex, fields = optval.split(':', 1)
            regex = re.compile(regex)
            fields = fields.split(',')
            result.append((regex, fields))                
    return result + all_fields

class HWriter:
    def __init__(self, ofile):
        self._o = ofile
        self._first = True
    def writerow(self, x):
        if self._first:
            self._first = False
        else:
            self._o.write("%s missing %s\n" % (x[0], x[1]))

def processResults(results, olog=None, writer=None):
    for start, end, key in results:
        if start and end:
            continue
        missing = ('end','start')[start is None]
        e = (start, end)[start is None] 
        if writer:
            mlen = (6,4)[start is None]
            event_base = e['event'][:-mlen]
            key_str = '/'.join(key)
            writer.writerow((event_base, missing, key_str))
        else: # log
            e['level'] = Level.getLevel(e['level'].upper())
            olog.write(nl__missing=missing, **e)

def run(event_ids=None, infile=None, writer=None, olog=None,
        progress_meter=None):
    """Read from infile until EOF, write to writer or log obj 
    (only one will be non-None).
    """
    if writer:
        writer.writerow(('event','missing','key'))
    matchers = [StartEndMatcher(idlist=ei[1]) for ei in event_ids]
    parser = NLFastParser(verify=False)
    for i, line in enumerate(infile):
        d = parser.parseLine(line)
        progress_meter.setLine(i+1)
        if not d.has_key('event'):
            continue #ignore junk
        matcher = None
        for i, (regex, _) in enumerate(event_ids):
            if regex is None or regex.match(d['event']):
                matcher = matchers[i]
                break
        if matcher is not None:
            added = matcher.add(d)
            if len(matcher) > 0:
                processResults(matcher.getResults(), olog=olog, writer=writer)
    for matcher in matchers:
        matcher.flush()
        if len(matcher) > 0:
            processResults(matcher.getResults(), olog=olog, writer=writer)

def main():
    activateLogging("netlogger.nl_findmissing")
    usage = "%prog [options]"
    parser = optparse.OptionParser(usage=usage, version="%prog 0.1")
    parser.add_option('-i', '--ids',
                      action='append', default=[], dest='ids',
                      help="Set of identifying fields for a given event pattern, " +
                           "using the syntax: " +
                           "[EVENT_REGEX:]FIELD1,..,FIELDN (default='guid')")
    parser.add_option('-t', '--type',
                      action="store", type="choice", choices=('csv', 'log', 'human'),
                      dest='fmt', default="human",
                      help="Output type (default=%default)")
    parser.add_option("-p", "--progress", action="store_true", 
                      dest="progress",
                      default=False, help="report progress to stderr")
    options, args= parser.parse_args()
    if len(options.ids) == 0:
        ids = [(None, ('event','guid'))]
    else:
        try:
            ids = parseIds(options.ids)
        except ValueError, E:
            parser.error("bad event:id : %s" % E)
    # output format
    writer, log = None, None
    if options.fmt == 'csv':
        writer = csv.writer(sys.stdout)
    elif options.fmt == 'log':
        olog = Log(logfile=sys.stdout)
        olog.setLevel(999) # everything!
    elif options.fmt == 'human':
        writer = HWriter(sys.stdout)
    else:
        parser.error("bad output type: %s" % options.fmt)
    # progress meter
    if options.progress:
        pm = ProgressMeter(sys.stderr)
    else:
        pm = NullProgressMeter()
    # run
    run(event_ids=ids, infile=sys.stdin, writer=writer, olog=olog, 
        progress_meter=pm)
    
if __name__ == "__main__": 
    main()
