"""
Parser for output of 'vmstat' UNIX utility.
"""
__author__ = 'Dan Gunter dkgunter@lbl.gov'
__rcsid__ = '$Id: vmstat.py 24755 2010-04-29 20:01:18Z dang $'

from itertools import izip
from logging import DEBUG
import re
import time
#
from netlogger.parsers import base
from netlogger import nldate

class Parser(base.BaseParser):
    """Parser for output of 'vmstat' UNIX utility.

    Parameters:
       - start_time {yyyy-mm-ddThh:mm:ss (GMT),<now>*}: Time at which 
         netstat started, for generating timestamps.
       - interval {INT,1*}: Interval in seconds between reports, also for
         generating proper timestamps.
    """
    def __init__(self, f, start_time=None, interval='1', **kwargs):
        base.BaseParser.__init__(self, f, fullname=__name__, **kwargs)
        self._dbg = self.log.isEnabledFor(DEBUG)
        # start time
        if start_time is None:
            self._t0 = time.time()
        else:
            try:
                t0 = nldate.parseISO(start_time + 'Z')
            except ValueError:
                raise ValueError("Bad start time '%s': must be in format "
                                 "'yyyy-mm-ddThh:mm:ss', in GMT" % 
                                 start_time)
            self._t0 = t0
        # parse interval
        try:
            self._isec = int(interval)
        except ValueError:
            raise ValueError("Bad interval '%s': integer expected" % 
                             interval)
        self._nlines = 0 # Number of value-filled lines processed

    def process(self, line):
        """Process header or body line from vmstat.

        Sample:
        procs -----------memory---------- ---swap-- -----io---- --system-- -----cpu------
        r  b   swpd   free   buff  cache   si   so    bi    bo   in   cs us sy id wa st
        0  0      0 5414724 245476 481588    0    0  4425  1225    4   10  2  8 90  0  0
        """
        # Ignore first header line.
        if line.startswith("procs"):
            return ()
        # Tokenize
        fields = line.split()
        # Look for header.
        # Recognize it by non-numeric first token.
        v0 = None
        try:
            v0 = int(fields[0])
        except:
            pass
        if v0 is None:
            # Set field names from header. 
            self._field_names = fields
            return ()
        # We must have a body line.
        # These are all integers. Parse them.
        try:
            values = map(int, fields)
        except ValueError:
            raise # will be handled by parent
        # Add to count of values.
        self._nlines += 1
        # Create output event.
        # Calculate time as initial time + (num. * interval).
        ts = self._t0 + self._nlines * self._isec        
        # Add required fields.
        event = { base.TS_FIELD : ts,
                  base.EVENT_FIELD : 'vmstat' }
        # Add values
        for (name, value) in izip(self._field_names, values):
            event[name] = value
        # Return output event
        return (event,)

