#!/usr/bin/env python
"""
Read NetLogger logs, output missing events.
The output is the log format, or a simple CSV with the event name and key.
"""
__author__ = "Dan Gunter <dkgunter@lbl.gov>"
__rcsid__ = "$Id: nl_findmissing 26525 2010-09-28 02:47:51Z dang $"

import csv
import re
import sys

from netlogger.analysis.startend import StartEndMatcher
from netlogger.nlapi import Level, Log
from netlogger.nllog import get_logger, OptionParser
from netlogger.parsers.base import NLFastParser
from netlogger.util import ProgressMeter, NullProgressMeter

## Classes

class WriterBase:
    def __init__(self):
        self._ns = None

    def setNamespace(self, ns):
        self._ns = ns

class HWriter(WriterBase):
    """Human-readable output writer
    """
    def __init__(self, ofile):
        self._o = ofile
        self._first = True
        WriterBase.__init__(self)

    def writerow(self, x):
        if self._first:
            self._first = False
        else:
            self._o.write("%s: %s missing %s\n" % (
                    self._ns, x[0], x[1]))

class WriterWrapper(WriterBase):
    def __init__(self, ofile, wrapped_class):
        WriterBase.__init__(self)
        self._obj = wrapped_class(ofile)
        self._first = True

    def writerow(self, x):
        if self._first:
            y = x
            self._first = False
        else:
            y = [self._ns] + list(x)
        return self._obj.writerow(y)

class LogWrapper(WriterBase):
    def __init__(self, ofile, wrapped_class):
        WriterBase.__init__(self)
        self._obj = wrapped_class(ofile)
        self._first = True

    def write(self, **kw):
        self._obj.write(file=self._ns, **kw)

    def getobj(self):
        return self._obj

## Functions

def parseIds(o):
    """Split ["foo:bar,baz", "elmo:room,theme"] into:
         [(re.compile('foo'), ('bar','baz')),
          (re.compile('elmo'), ('room', 'theme'))]
       and return this list of tuples.
    """
    result = [ ]
    all_fields = [ ]
    for optval in o:
        if ':' not in optval:
            if all_fields:
                raise ValueError("empty event pattern can only appear once")
            all_fields = [(None, optval.split(','))]
        else:
            regex, fields = optval.split(':', 1)
            regex = re.compile(regex)
            fields = fields.split(',')
            result.append((regex, fields))
    return result + all_fields

def processResults(results, olog=None, writer=None):
    for start, end, key in results:
        if start and end:
            continue
        missing = ('end','start')[start is None]
        e = (start, end)[start is None]
        if writer:
            mlen = (6, 4)[start is None]
            event_base = e['event'][:-mlen]
            key_str = '/'.join(key)
            writer.writerow((event_base, missing, key_str))
        else: # log
            e['level'] = Level.getLevel(e['level'].upper())
            olog.write(nl__missing=missing, **e)

def run(event_ids=None, infiles=None, writer=None, olog=None,
        progress_meter=None):
    """Read from infile until EOF, write to writer or log obj
    (only one will be non-None).
    """
    if writer:
        writer.writerow(('file', 'event', 'missing', 'key'))
    for f in infiles:
        if not hasattr(f,'close'):
            f = open(f, "r")
        if writer:
            writer.setNamespace(f.name)
        else:
            olog.setNamespace(f.name)
        runOne(event_ids, f, writer, olog, progress_meter)

def runOne(event_ids,  infile, writer, olog, progress_meter):
    matchers = [StartEndMatcher(idlist=ei[1]) for ei in event_ids]
    parser = NLFastParser(verify=False)
    for i, line in enumerate(infile):
        d = parser.parseLine(line)
        progress_meter.advance(i+1)
        if not d.has_key('event'):
            continue #ignore junk
        matcher = None
        for i, (regex, _) in enumerate(event_ids):
            if regex is None or regex.match(d['event']):
                matcher = matchers[i]
                break
        if matcher is not None:
            added = matcher.add(d)
            if len(matcher) > 0:
                processResults(matcher.getResults(), olog=olog, writer=writer)
    for matcher in matchers:
        matcher.flush()
        if len(matcher) > 0:
            processResults(matcher.getResults(), olog=olog, writer=writer)

def main():
    usage = "%prog [options] [files..]"
    desc = ' '.join(__doc__.split())
    parser = OptionParser(usage=usage, description=desc)
    parser.add_option('-i', '--ids',
                      action='append', default=[], dest='ids',
                      help="Set of identifying fields for a given "
                      "event pattern, " +
                      "using the syntax: " +
                      "[EVENT_REGEX:]FIELD1,..,FIELDN (default='guid')")
    parser.add_option('-t', '--type',
                      action="store", type="choice",
                      choices=('csv', 'log', 'human'),
                      dest='fmt', default="human",
                      help="Output type (default=%default)")
    parser.add_option("-p", "--progress", action="store_true",
                      dest="progress",
                      default=False, help="report progress to stderr")
    options, args = parser.parse_args()
    log = get_logger(__file__)  # Should be first done, just after parsing args
    if len(options.ids) == 0:
        ids = [(None, ('event','guid'))]
    else:
        try:
            ids = parseIds(options.ids)
        except ValueError, E:
            parser.error("bad event:id : %s" % E)
    # inputs
    if len(args) > 0:
        infiles = args
    else:
        infiles = (sys.stdin,)
    # output format
    writer, olog = None, None
    if options.fmt == 'csv':
        writer = WriterWrapper(sys.stdout, csv.writer)
    elif options.fmt == 'log':
        olog = LogWrapper(sys.stdout, Log)
        olog.getobj().setLevel(999) # everything!
    elif options.fmt == 'human':
        writer = HWriter(sys.stdout)
    else:
        parser.error("bad output type: %s" % options.fmt)
    # progress meter
    if options.progress:
        pm = ProgressMeter(sys.stderr)
    else:
        pm = NullProgressMeter()
    # run
    log.info("run.start", num_files = len(infiles))
    run(event_ids=ids, infiles=infiles, writer=writer, olog=olog,
        progress_meter=pm)
    log.info("run.end", status=0)
    return 0

if __name__ == "__main__":
    sys.exit(main())
