#!/usr/bin/env python
"""
Read NetLogger logs, output a 'profile' of inclusive and
exclusive function times and counts.
It is very important that the list of identifiers allows
each '.start' event to match the appropriate '.end' event.
"""
__author__ = "Dan Gunter <dkgunter@lbl.gov>"
__rcsid__ = "$Id$"

import csv
import math
from operator import itemgetter
import re
import sys
#
from netlogger.analysis.startend import StartEndProfiler
from netlogger.nlapi import Level, Log
from netlogger.nllog import get_logger, OptionParser
from netlogger.parsers.base import NLFastParser
from netlogger.util import ProgressMeter, handleSignals

## Constants

CSV_OUTPUT, TABLE_OUTPUT = "csv", "table"

## Signal handlers

def on_kill(signo, frame):
    "Signal handler for a graceful exit."
    get_logger(__file__).warn("abort", signal=signo)
    sys.exit(1)

## Classes

class ProfilerError(Exception):
    pass

class Profile:

    PRECISION = 3 # digits of precision for timings
    MIN_COL = 5 # Minimum column widht


    class Columns:
        """Column codes
        """
        CODES = ("ev", "co", "ti", "et", "mi" , "me")
        
        @staticmethod
        def get_code(code):
            for i, c in enumerate(Profile.Columns.CODES):
                if c == code:
                    return i
            raise ValueError("Unknown code")

    def __init__(self, sort_keys=()):
        """Constructor.

        Parameters:
          - sort_keys: How to sort the results. A list of pairs
            (index-in-Columns.CODES, reverse[bool]). 
        """
        self._events = { }
        self.sort_keys = sort_keys
        self.sort_keys.reverse() # sort by last key first..
        self.ofile = None

    def add(self, event, incl, excl):
        record = self._events.get(event, None)
        if record is None:
            record = self._events[event] = [1, 0, 0]
        else:
            record[0] += 1 # count
            record[1] += incl
            record[2] += excl

    def get_rows(self):
        """Make dictionary into list of rows.
        At the same time, calculate derived values.
        """
        rows = [ ]
        for key, values in self._events.items():
            row = [key]
            row.extend(values)
            # means
            row.append(values[1] / values[0])
            row.append(values[2] / values[0])
            rows.append(row)
        return rows

    def sort_rows(self, rows):
        """Sort rows
        """
        for idx, rev_flag in self.sort_keys:
            rows = sorted(rows, key=itemgetter(idx), reverse=rev_flag)
        return rows

    def get_column_widths(self, rows):
        """Get desired width of each column
        """
        widths = [self.MIN_COL] * len(rows[0])
        for row in rows:
            for i in range(len(row)):
                v = row[i]
                if isinstance(v, str):
                    vlen = len(v)
                else:
                    if v == 0:
                        v = 1
                    elif v < 0:
                        v = -v
                    vlen = int(math.ceil(math.log(v,10)))
                    if isinstance(v, float):
                        vlen += self.PRECISION + 1
                widths[i] = max(widths[i], vlen)
        return widths

    def write_table(self):
        self.ofile.write("+ Profile +\n")
        rows = self.sort_rows(self.get_rows())
        cols = self.get_column_widths(rows)
        # Write header
        hdr_fmt = "%%-%ds %%-%ds %%-%ds %%-%ds %%-%ds %%-%ds\n" % tuple(cols)
        self.ofile.write(hdr_fmt % ("", "", "Total", "Total", 
                                    "Mean", "Mean"))
        self.ofile.write(hdr_fmt % ("Event", "Count", "Incl.", "Excl.",
                                    "Incl.", "Excl."))
        dash = "-" * self.MIN_COL
        self.ofile.write(hdr_fmt % tuple([dash] * 6))
        # Write rows
        row_fmt = "%%-%ds %%-%dd %%-%d.%dlf %%-%d.%dlf " \
            "%%-%d.%dlf %%-%d.%dlf\n" % (
            cols[0], cols[1], cols[2], self.PRECISION, 
            cols[3], self.PRECISION, cols[4], self.PRECISION, cols[5],
            self.PRECISION)
        for row in rows:
            self.ofile.write(row_fmt % tuple(row))

    def write_csv(self):
        rows = self.get_rows()
        self.ofile.write("event,count,total_incl,total_excl,"
                         "mean_incl,mean_excl\n")
        row_fmt = "%%s,%%d,%%.%dlf,%%.%dlf," \
            "%%.%dlf,%%.%dlf\n" % tuple([self.PRECISION]*4)
        for row in rows:
            self.ofile.write(row_fmt % tuple(row))

## Functions

def run(event_ids=None, infiles=None, outfile=None, 
        progress_meter=None, output_type=None, tolerant=False, **kw):
    """Process infile until EOF or error.
    """
    log = get_logger(__file__)
    if infiles is None:
        infiles = [sys.stdin]
    if outfile is None:
        outfile = sys.stdout
    profiler = StartEndProfiler(event_ids, tolerant=tolerant)
    prof = Profile(**kw)
    for infile in infiles:
        if isinstance(infile,str):
            try:
                infile = file(infile)
            except IOError:
                log.warn("open.error", file=infile)
                continue
        parser = NLFastParser(infile, verify=False)
        for i, d in enumerate(parser):
            if progress_meter:
                progress_meter.advance(i+1)
            try:
                result = profiler.add(d)
            except ValueError, err:
                raise ProfilerError("%s line %d: %s" % (
                        infile.name, i+1, err))
            if result:
                event, key, incl, excl = result
                prof.add(event, incl, excl)
    prof.ofile = outfile
    if output_type == CSV_OUTPUT:
        prof.write_csv()
    elif output_type == TABLE_OUTPUT:
        prof.write_table()
    else:
        raise ProfilerError("Unknown output type: %s" % output_type)

def main(sys_argv):
    usage = "%prog [options] [files..]"
    desc = ' '.join(__doc__.split('\n'))
    parser = OptionParser(usage=usage, description=desc)
    output_types = TABLE_OUTPUT, CSV_OUTPUT
    parser.add_option('-f', '--fields',
                      action='store', default="guid", dest='ids',
                      help="Identifying fields separated by commas "
                      "(default=%default)")
    parser.add_option("-g", "--progress", action="store_true",
                      dest="progress",
                      default=False, help="report progress to stderr")
    parser.add_option("-i", "--tolerant", action="store_true",
                     dest="tolerant", default=False, help="Ignore mis-ordered events")
    parser.add_option("-s", "--sort", action="store", 
                      dest="sort_keys",
                      default="co-ev+",
                      help="""A sequence of
strings, each string having two parts: column and direction.
 Column is a two-letter code for the column name and direction
is either '+' for ascending or '-' for descending.
Column codes: ev=event, co=count, ti=total inclusive time,
te=total exclusive time, mi=mean inclusive time,
me=mean exclusive time. For example, to sort by event name ascending, then count descending, use "ev+co-". (default=%default)""")
    parser.add_option("-t", "--type", action="store", dest="otype",
                      default=TABLE_OUTPUT, help="Output type. " +
                      "Must be one of %s " % (', '.join(output_types)) +
                      "(default=%default)")
    options, args = parser.parse_args(sys_argv[1:])
    log = get_logger(__file__) 
    # Set up signal handlers
    handleSignals((on_kill, ('SIGTERM', 'SIGINT', 'SIGUSR1',
                             'SIGUSR2', 'SIGHUP')))
    # Validate and parse args
    if len(args) > 0:
        infiles = args
    else:
        infiles = None
    ids = options.ids.split(',')
    if options.progress:
        pm = ProgressMeter(sys.stderr)
    else:
        pm = None
    if options.otype not in output_types:
        parser.error("Bad value for -t/--type")
    codes = re.split("\-|\+", options.sort_keys)
    if len(codes) < 2:
        parser.error("Empty sort key")
    if codes[-1] != "":
        parser.error("Bad code: %s" % codes[-1])
    sort_keys = [ ]
    for i, code in enumerate(codes[:-1]):
        if len(code) != 2:
            parser.error("Bad code: %s" % code)
        try:
            real_code = Profile.Columns.get_code(code)
        except ValueError, err:
            parser.error("%s: %s" % (err, code))
        reverse_flag = options.sort_keys[i*3+2] == '-'
        sort_keys.append((real_code, reverse_flag))
    # Run
    try:
        log.info("run.start")
        run(event_ids=ids, progress_meter=pm, output_type=options.otype,
            infiles=infiles, sort_keys=sort_keys, tolerant=options.tolerant)
        log.info("run.end", status=0)
    except ProfilerError, err:
        log.error("run.end", status=-1, msg=err)

if __name__ == "__main__":
    sys.exit(main(sys.argv))
