#!/usr/bin/env python
"""
Unittests for nvParser.py
"""
__author__ = 'Keith Jackson krjackson@lbl.gov'
__rcsid__ = '$Id: testParsers.py 880 2008-07-18 21:56:09Z dang $'

import os
from StringIO import StringIO
import sys
from tempfile import TemporaryFile
import unittest
from netlogger.parsers.base import NLFastParser, NLPyParser, parseDate
from netlogger.parsers.base import BaseParser
# some simple parsers
from netlogger.parsers.modules import bp, generic
import testBase

log_events = (
    (' ts=2007-08-16T13:11:42.643255Z id=10270 event=e1 u=host:30003/', ''),
    ('ts=2007-08-16T13:12:20.648918-08:00 id=1 event=e2 foo=" bA a r"',  ''),
    ('ts=2007-08-16T13:17:31.824379Z id=foo', 'no event'),
    ('ts=2007-08-16T13:18:02.404602Z id=this is not right at all', 'format'),
    ('ts=2007-08-16T13:18:02.477603Z id=10324 event="e 3" session_id=22', ''),
    ('ts=2007-08-16T13:18:02.575615+99 event=e4 session_id=2 status=0', 'ts'),
    ('id=10324 event=e5 ts=2007-08-16T13:18:02.576034+01:30', ''),
)
test_log = '\n'.join([e[0] for e in log_events]) + '\n'
err_events = filter(lambda e: e[1] != '', log_events)
num_ok = { 
    'total': len(log_events) - len(err_events),
    'beforeError': min([(99,i)[bool(e[1])] for i,e in enumerate(log_events)]) 
    }

class TestCase(testBase.BaseParserTestCase):
    def setUp(self):
        testBase.BaseParserTestCase.setUp(self)
        self.had_err = [ ]

    def dummy(self, line=None, error=None, linenum=0):
        self.debug_("parse error on line %d", linenum)
        self.had_err.append((linenum , str(error)))

    def testParseStream(self):
         """Parse a stream with pyparsing.
         """
         self.debug_("NLPyParsing")
         sio = StringIO(test_log)
         try:
             parser = NLPyParser(sio, err_cb=self.dummy)
         except NotImplementedError:
             self.debug_("pyparsing is not installed, skipping test")
             return
         for x in parser.parseStream():
             pass
         self.debug_("error list: %s" % self.had_err)         
         for i, e in self.had_err:
             self.failUnless(log_events[i][1] != '',
                             "unexpected failure on '%s': %s" % 
                             (log_events[i][0], e))
         for i,(msg, is_err) in enumerate(log_events):
             if not is_err:
                 continue
             j = -1
             for j, _ in self.had_err:                 
                 if i == j:
                     break
             if i != j:
                 self.failUnless(i == j, "undetected bad event '%s'" % msg)

    def testParseFastStream(self):
        """Parse with the regex-based parser.
        """
        errfn_ok = { None : num_ok['beforeError'],
                     False : num_ok['total'],
                     self.dummy : num_ok['total'] }
        self.debug_("parse fast stream:\n%s" % test_log)
        for errfn in errfn_ok.keys():
            self.debug_("error fn=%s" %errfn)
            sio = StringIO(test_log)
            parser = NLFastParser(sio, err_cb=errfn, verify=True)
            try:
                i = 0
                genfn = parser.parseStream()
                for d in genfn:
                    i += 1
            except ValueError,E:
                pass
            expected = errfn_ok[errfn]
            self.failUnless(i == expected,"with err callback (%s): "
                            "expected %d items, got %d" %
                            (errfn, expected, i))

    def testParseDate(self):
        """Test correctness of date parsing with and without timezones.
        """
        # first try some proper dates
        dates = { 
            '1970-01-01T00:00:00Z' : 0,
            '1969-12-31T16:00:01-08:00' : 1,
            '2009-02-13T15:31:30.987654-08:00' : 1234567890.987654,
            '2009-02-13T23:31:30.987654Z' : 1234567890.987654,
            '2009-02-13T23:31:30.000000001Z' : 1234567890.000000001,
            '1234567890.000000001': 1234567890.000000001,
            }
        for iso, expected_sec in dates.items():
            sec = parseDate(iso)
            self.failUnless(sec == expected_sec,
                            "date '%s' returned %lfs, but expected %lfs" %
                            (iso, sec, expected_sec))
        # now try some improperly formatted dates
        dates = ( '', ' ', 'foo', '-foo',
                  '2009-02-13T15:31:30.987654-08:00Z',
                  '2009-02-13T15:31:30.987654',
                  '2009-02-13T15:31:30',
                  '2009-02-13T15:31:30-0800',
                  '2009-02-13T15:31:30-08:0',
                  )
        for bad_date in dates:
            try:
                sec = parseDate(bad_date)
                self.fail("bad date '%s' parsed as %lf seconds" % 
                          (bad_date, sec))
            except ValueError:
                # this should be the ONLY error we need to catch;
                # if the test fails because some other exception was
                # raised this is a bug in the code, not the test.
                pass
        # just for fun, try a quick performance test
        import time
        N, iso = 10000, '2009-02-13T15:31:30.987654-08:00'
        t = time.time()
        for i in xrange(N):
            sec = parseDate(iso)
        usec = ((time.time() - t) / N) * 1e6        
        self.debug_("avg time to parse '%s' = %.1lf us" % (iso, usec))

    class OneCharParser(BaseParser):
        def process(self, line):
            return list(line.strip())

    def testMultipleEventsPerLine(self):
        """Test correctness of file offset mid-resultset.
        """
        tmpf = TemporaryFile()
        data = ('abcd*', 'fghi*')
        tmpf.write('\n'.join(data) + '\n')
        tmpf.seek(0)
        parser = self.OneCharParser(tmpf)
        offset = 0
        for line in data:
            for c in line:
                e = parser.next()
                self.failUnless(e[0] == c, "wrong character")
                self.failUnless(parser.getOffset() == offset, "wrong offset")
                if c == '*':
                    offset += len(line) + 1 # expected offset jumps at EOL
    
    def testBPParser(self):
        """Best-Practices (noop) parser, including extra params"""
        input = ["ts=2008-06-06T17:37:46.411961Z event=nlwrite.event level=Info n=0\n",
                 "ts=2008-06-06T17:37:46.412375Z event=nlwrite.event level=Info n=1\n",
"ts=2008-06-06T17:37:46.412411Z event=nlwrite.event level=Info n=2\n"]
        sio = StringIO(''.join(input))
        p = bp.Parser(sio)
        for i, parsed_line in enumerate(p):
            self.assert_(parsed_line == input[i])
        # try again with some extra data in the keywords
        kw = {"hello":"world", "howya":"doin"}
        kwstr = ' '.join(["%s=%s" % (k,v) for k,v in kw.items()])
        input2 = [s[:-1] + ' ' + kwstr + '\n' for s in input]
        sio = StringIO(''.join(input))
        p = bp.Parser(sio, **kw)
        for i, parsed_line in enumerate(p):
            self.assert_(parsed_line == input2[i])
        
    def testOffset(self):
        """File offset is calculated correctly even with 'bad' events.
        """
        from netlogger.parsers.modules import sge_rpt
        self.parser_class = sge_rpt.Parser
        self.basename = 'sge_rpt.'
        # read and parse data file
        parser = self.checkGood("some-not-parseable", test=lambda e: True)
        # check that offset matches file size
        file_length = os.stat(parser._infile.name)[6]
        self.assert_(file_length > 0, "input file %s is empty" %
                     parser._infile.name)
        offs = parser.getOffset()
        self.assert_(file_length == offs, "file length %d != offset %d" % (
                file_length, offs))
        
# Boilerplate to run the tests
def suite(): 
    return testBase.suite(TestCase)
if __name__ == '__main__':
    testBase.main()
