"""
Unittests for netlogger/analysis/nlloader.py
"""
__author__ = 'Dan Gunter'
__rcsid__ = '$Id: testNlLoader.py 1039 2008-09-15 22:44:44Z dang $'

import glob
import os
import signal
import subprocess
from subprocess import Popen, PIPE, STDOUT
import sys
import tempfile
import time
import unittest
#
from netlogger.analysis import nlloader
from netlogger.util import rm_rf, EOF_EVENT
from netlogger import pipeline
import testBase

class Options:
    def __init__(self, **kwargs):
        for keyword in kwargs:
            setattr(self, keyword, kwargs[keyword])

class TestCase(testBase.BaseTestCase):

    def setUp(self):
        # Make a temporary directory
        self.TMPDIR = tempfile.mkdtemp(dir="/tmp", prefix="nl_loader_test-",
                                       suffix=".tempdir")
        # Set an input file
        self.input_base =  "nl_loader-input.log"
        self.INPUT_FILE = os.path.join(self.TMPDIR, self.input_base)
        # Set local values
        local_vals = {'INPUT_FILE' : self.INPUT_FILE,
                      'TMPDIR' : self.TMPDIR }
        # Find config files starting with 'nl_loader'
        # and, for each, copy it modified with local values, to 
        # file with same name in tmpdir
        for f in glob.glob(self.data_dir + "/nl_loader-*.cfg"):
            ofile = file(os.path.join(self.TMPDIR, os.path.basename(f)), 'w')
            for line in file(f):
                ofile.write(line % local_vals)

        self._copyInput(self.INPUT_FILE)

    def _copyInput(self, ofilename):
        """Copy the contents of self.INPUT_FILE to ofilename.
        """
        ofile = file(ofilename, 'w')
        n = 0
        for line in file(os.path.join(self.data_dir, self.input_base)):
            ofile.write(line)
            n += len(line)
        ofile.close()
        self.INPUT_DATA_LEN = n

    def tearDown(self):
        if self.DEBUG:
            self.debug_("Do not remove %s" % self.TMPDIR)
        else:
            rm_rf(self.TMPDIR)

    def createApp(self, path, **kw):
        options = Options()
        options.config = path
        options.noexec = False
        fd, options.secret_file = tempfile.mkstemp()
        if kw:
            for keyword, value in kw.items():
                setattr(options, keyword, value)
        # set a new port every time to allow multiples
        port = pipeline.getLoaderPort() + 1
        pipeline.setLoaderPort(port)
        app = nlloader.Application(options)
        app.configure()
        return app

    def parseStateFile(self, f):
        line = f.readline().strip()
        filename, position = line.split()
        return filename, int(position)

    def testNumberedFiles(self):
        """Reading from numbered files.
        """
        path = os.path.join(self.TMPDIR, 'nl_loader-numbered.cfg')
        app = self.createApp(path)
        # make 3 numbered files
        n = 3
        for i in range(n): 
            filename = self.INPUT_FILE + '.%d' % i
            self._copyInput(filename)
            # add EOF event to each
            f = file(filename, 'a')
            f.write('ts=2008-03-14T19:26:19.160217Z event=%s\n' % EOF_EVENT)
            f.close()
        # run app in separate process
        pid = os.fork()
        if pid == 0:
            # child
            app.run()
            return
        else:
            # parent
            # wait for it to start, and slurp all 3 files
            time.sleep(2)
            # kill the child (before checks below)
            os.kill(pid, signal.SIGTERM)
            # check that all the files were marked '.DONE'
            for i in range(n):
                filename = self.INPUT_FILE + '.%d' % i + '.DONE'
                self.assert_(os.path.exists(filename), 
                             "post-processed file %s not found" % filename)

    def testSaveStateTimer(self):
        """Make sure state is saved with timer
        """
        path = os.path.join(self.TMPDIR, 'nl_loader-simple.cfg')
        # with timer
        app = self.createApp(path)
        # set the save-state interval to be tiny, to guarantee it gets called
        app.SAVE_STATE_SEC = 0.000000001 # 1ns
        # process the whole file
        app.run()
        # check that state was saved
        f = file(app.cfg.state_file)
        self.assert_(f, "state file does not exist")
        filename, offset = self.parseStateFile(f)
        input_filename = self.INPUT_FILE
        self.assert_(filename == input_filename, "wrong filename: "
                     "got '%s', expected '%s'" % (filename, input_filename))
        self.assert_(offset == self.INPUT_DATA_LEN, "wrong offset: "
                     "got %d, expected %d" % (offset, self.INPUT_DATA_LEN))

    def testSaveStateManual(self):
        """Make sure state is saved with a manual call
        """
        path = os.path.join(self.TMPDIR, 'nl_loader-simple.cfg')
        app = self.createApp(path)
        # set the save-state interval to be huge, so it is not called
        app.SAVE_STATE_SEC = 1000000
        # process the whole file
        app.run()
        # check that state was saved
        f = file(app.cfg.state_file)
        self.assert_(f, "state file does not exist")
        filename, offset = self.parseStateFile(f)
        input_filename = self.INPUT_FILE
        self.assert_(filename == input_filename, "wrong filename: "
                     "got '%s', expected '%s'" % (filename, input_filename))
        self.assert_(offset == self.INPUT_DATA_LEN, "wrong offset: "
                     "got %d, expected %d" % (offset, self.INPUT_DATA_LEN))

    def testDuplicateEvents(self):
        """Verify that duplicate events are processed properly

        Skipped if MySQL server is not present on localhost or user
        does not have a working ~/.my.cnf
        """
        infile = os.path.join(self.data_dir, 'nl_loader-duplicateEvents.log')
        # loop over a few batch sizes
        for batch in 1, 3, 5, 20:
            # equiv to: nl_loader -i <infile> -u mysql://localhost -C -D
            #           -p db=<dbname> -p read_default_file=~/.my.cnf
            #           -b <batch>
            dbname = "testDuplicateEvents"
            options = Options(
                config=None,
                ifile=infile,
                db_uri="mysql://localhost",
                ibatch=batch,
                create=True,
                drop=True,
                unique=True,
                _params=(('db',dbname), ('read_default_file',"~/.my.cnf"))
                )
            app = nlloader.Application(options)
            # this will connect to DB. If it fails, skip test
            try:
                app.configure()
            except Exception,E:
                self.debug_("Cannot connect to MySQL: skipping test. "
                            "Error: %s" % E)
                return # skip test
            # figure out how many non-dup. events are in file:
            f = file(infile)
            d = { }
            for line in f:
                d[line] = 1
            f.close()
            unique = len(d)
            # load into the database
            app.run()
            app.loader.flush() # make sure events are inserted
            # now, "borrow" the app's connection to check the DB
            c = app.loader.conn.cursor()
            c.execute("use %s" % dbname)
            c.execute("select count(id) from event")
            r = c.fetchone()
            db_unique = int(r[0])
            # verify that DB matches our own calculation
            self.assert_(db_unique == unique, "Number of unique events "
                         "in database '%s' (%d) does not match the number in "
                         "the input file (%d), '%s'" % (dbname, db_unique, 
                                                        unique, infile))

    def testRestore(self):
        """Restore nl_loader from where it left off with -r option
        """
        # constants
        LOG_SZ, SLEEP_SEC = 100000, 1.5
        # make large-ish log file to load
        self.debug_("make log file with %d items" % LOG_SZ)
        logfd, logname = tempfile.mkstemp(suffix='.log')
        log_fmt = "ts=2008-07-25T09:21:10.%06dZ event=e\n"
        for i in xrange(LOG_SZ):
            os.write(logfd, log_fmt % i)
        # get a state file
        statefd, statename = tempfile.mkstemp(suffix='.state')
        # get a test database file
        dbfd, dbname = tempfile.mkstemp(suffix='.sql')
        # set name, args of nl_loader script
        nl_loader = testBase.scriptPath('nl_loader')
        db_uri = "test://%s" % dbname
        args = ['-u', db_uri, '-i', logname, '-r', statename]
        cum_num = 0 # lines processed so far
        for i in xrange(3):
            # start loader
            cmdline = nl_loader + ' '.join(args)
            self.debug_("\nrun nl_loader:\n    %s" % cmdline)
            proc = subprocess.Popen([nl_loader] + args, stdout=PIPE, 
                                    stderr=STDOUT)
            # wait
            self.debug_("sleep for %lf seconds" % SLEEP_SEC)
            time.sleep(SLEEP_SEC)
            # kill it
            self.debug_("kill process (%d)" % proc.pid)
            try:
                os.kill(proc.pid, signal.SIGINT)
            except OSError:
                self.fail("'nl_loader %s' died" % ' '.join(args))
            # in debug mode, print output
            for line in proc.stdout.readlines():
                self.debug_("<out> %s" % line.strip())
            # read back file offset
            txt = file(statename, 'r').readline()
            self.assert_(txt, "state file is empty after kill")
            self.debug_("state file contents = '%s'" % txt.strip())
            _, offs_str = txt.split()
            offs = int(offs_str)
            self.debug_("state file offset = %d" % offs)
            # calculate how many log lines were processed
            line_len = len(log_fmt % 0) # fixed length
            fnum = offs / float(line_len)
            num = int(fnum)
            self.assert_(fnum == num, "offset is not an even "
                         "multiple of line length")
            num -= cum_num # subtract lines loaded before this loop
            self.debug_("loaded %d lines" % num)
            # compare with number of events inserted into DB
            self.debug_("count inserts into event table")
            f = file(dbname)
            events = 0
            for line in f:
                s = line.lower().strip()
                if s.startswith("insert into event"):
                    events += 1
            f.close()
            self.assert_(events+1 >= num, "number of events inserted "
                         "into database, %d, is more than one less than "
                         "of lines processed, %d" % (events, num))
            cum_num += num
            self.debug_("%d lines loaded so far" % cum_num)

    def _runExternalSchema(self, db_uri, schema_file, init_kw, fin_kw,
                           **kw):
        """Run application for testExternalSchema*() tests

           Equivalent to: 
             nl_loader -i <self.INPUT_FILE> -u <scheme>://<db_file>
                     -D -s <schema_file>  --schema-init <init_kw> 
                     --schema-finalize <fin_kw>
        """
        params = kw.items()
        options = Options(
            config=None,
            ifile=self.INPUT_FILE,
            db_uri=db_uri,
            create=True,
            drop=True,
            restore=None,
            ibatch=100,
            unique=True,
            _params=params,
            schema_file=schema_file,
            schema_init=init_kw,
            schema_finalize=fin_kw,
            )            
        self.debug_("Run nlloader.Application(options=%s)" % dict(
            config=None,
            ifile=self.INPUT_FILE,
            db_uri=db_uri,
            create=True,
            drop=True,
            restore=None,
            ibatch=100,
            unique=True,
            _params=params,
            schema_file=schema_file,
            schema_init=init_kw,
            schema_finalize=fin_kw,))
        app = nlloader.Application(options)
        app.configure()
        # Run application
        app.run()
        app.flush()
        return app
        
    def testExternalSchemaSqlite(self):
        """Can nl_loader use a different external schema for sqlite
        """
        import sqlite3
        db_file = os.path.join(self.TMPDIR, "ext.sqlite")
        db_uri = "sqlite://%s" % db_file
        schema_file = os.path.join(self.TMPDIR,"nl_loader-schema.cfg")
        for init_kw in '','unique','nounique':
            for fin_kw in '','noop':
                # Configure/run application
                self._runExternalSchema(db_uri, schema_file, init_kw, fin_kw)
                # Check result
                # Open connection to sqlite
                conn = sqlite3.connect(db_file)
                for row in conn.execute("select name, time from event"):
                    self.failUnless(row[0] == u'nlwrite.event')
                    self.failUnless(row[1] >  1205522779)
                # make sure auto-increment column worked
                i=1
                for row in conn.execute("select * from attr"):
                    self.failUnless(row[0] == i, "id in attr %d != %d" % (row[0], i))
                    i += 1

    def testExternalSchemaMySQL(self):
        """Can nl_loader use a different external schema for MySQL
        """
        db_uri = "mysql://localhost"
        schema_file = os.path.join(self.TMPDIR,"nl_loader-schema.cfg")
        first = True
        db_name = "testExternalSchema"
        for init_kw in ('','index,unique', 'index,nounique',
                        'noindex, nounique'):
            for fin_kw in ('','noop'):
                # Configure/run application
                try:
                    app = self._runExternalSchema(db_uri, schema_file,
                                                  init_kw, fin_kw,
                                                  db=db_name)
                except Exception,E:
                    # Skip test for failure on 1st, as
                    # MySQL may not be present
                    if first:
                        self.debug_("Skipping test -- cannot connect")
                        return
                    # If first succeeded, then subsequent ones should too
                    else:
                        raise
                first = False
                # Check result
                # Use 'app' connection to DB
                cursor = app.loader.conn.cursor()
                cursor.execute("use %s" % db_name)
                cursor.execute("select name, time from event")
                for row in cursor.fetchall():
                    self.failUnless(row[0] == u'nlwrite.event')
                    self.failUnless(row[1] >  1205522779)
                # make sure auto-increment column worked
                i=1
                cursor.execute("select * from attr")
                for row in cursor.fetchall():
                    self.failUnless(row[0] == i, "id in attr %d != %d" % (row[0], i))
                    i += 1

    def testExternalSchemaErrors(self):
        """Can nl_loader handle bad external schema requests
        """
        schema_file = os.path.join(self.TMPDIR,"nl_loader-schema.cfg")
        sqlite_file = os.path.join(self.TMPDIR, "extE.sqlite")
        for db_uri,kw in (("mysql://localhost", 
                           {'db':'testExternalSchemaE'}),
                          ("sqlite://%s" % sqlite_file, {})):
            for init_kw in 'None','-',',', '':
                for fin_kw in '*',',':
                    self.debug_("uri=%s kw=%s | init=%s fin=%s" % (
                            db_uri, kw, init_kw, fin_kw))
                    # Configure/run application
                    try:
                        self._runExternalSchema(db_uri, schema_file,
                                                init_kw, fin_kw, **kw)
                    except KeyError, E:
                        # Make sure it's the right kind of KeyError
                        estr = str(E)
                        self.failUnless("in subsection" in estr)

# Boilerplate to run the tests
def suite(): 
    return testBase.suite(TestCase)
if __name__ == '__main__':
    testBase.main()
