from __future__ import with_statement
"""
Application body of program to load NetLogger log files 
into a relational database.
"""
__rcsid__ = "$Id: nlloader.py 1024 2008-09-12 00:23:08Z dang $"
__author__ = "Dan Gunter"

import logging
import os
import sys
import tempfile
import time
import warnings
#
from netlogger import nllog
from netlogger.nllog import logged
from netlogger import util
from netlogger import talktome
from netlogger.parsers.base import NLFastParser
from netlogger.analysis import loaderConfig
from netlogger.analysis import loader
from netlogger.analysis.loader import DB, TestDB, LoaderFactory
from netlogger import pipeline
from netlogger.pipeline import PipelineApplication

import loaderConfig

def optionFile(name):
    if name == sys.stdin.name:
        return sys.stdin
    return file(name)

class Application(PipelineApplication):
    """Methods and state for nl_loader
    """
    SAVE_STATE_SEC = 30 # save state every 30 seconds
    SLEEP_NO_FILES = 1 # sleep for 1 second if no more files
    SLEEP_NO_DATA = 0.1 # sleep for 0.1 second at end of file

    def __init__(self, options):
        self.options = options
        self.cfg = None
        self.read_batch = 100 # arbitrary
        self.infile = None
        self._parser = None
        self.n_loaded, self.n_parsed = 0, 0
        self.setLogger(pipeline.PROG_LOADER)
        self._rcvr = None
        self._state_file_warned = False # avoid repeated warnings
        # In pipeline mode, read shared secret from a file.
        if options.config and not options.noexec:
            if options.secret_file is None:
                raise KeyError("No shared-secret file given")
            # Read secret and start receiver thread
            port = pipeline.getLoaderPort()
            self._rcvr = talktome.initReceiver(options.secret_file, port)

    def initLogging(self, level, hndlr):
        self.log.setLevel(level)
        self.log.addHandler(hndlr)
        self.log.setLevel(level)
        self.log.addHandler(hndlr)

    def configure(self):
        self.log.info("configure.start")
        if self.options.config:
            self._readConfig()
        else:
            self._createConfig()
        if self.cfg.db_module is True: 
            # TestDB
            db_file = file(self.cfg.db_dsn, 'w')
            self.log.debug("testDB.write", file=db_file.name)
            conn = TestDB(batch=self.cfg.batch, output=db_file)
        else:
            self.log.info("db.connect.start", db_module=self.cfg.db_module,
                     dsn=self.cfg.db_dsn, batch=self.cfg.batch, 
                     create=self.cfg.create, conn_kw=self.cfg.db_param,
                     unique=self.cfg.unique)
            conn = DB(db_module=self.cfg.db_module, dsn=self.cfg.db_dsn, 
                      batch=self.cfg.batch, create=self.cfg.create, 
                      conn_kw=self.cfg.db_param, unique=self.cfg.unique,
                      schema_file=self.cfg.schema_file,
                      schema_init_keys=self.cfg.schema_init_keys,
                      schema_finalize_keys=self.cfg.schema_finalize_keys)
            self.log.info("db.connect.end", status=0)
        self.loader = LoaderFactory(conn).new()
        # Do not calculate event hash if uniqueness isn't wanted.
        # This is both more efficient AND guarantees correct behavior
        # in the case of existing tables with the UNIQUE constraint.
        if not self.cfg.unique:
            self.loader.setNoHash()
        self.log.info("configure.end", status=0)

    def _readConfig(self):
        """Initialize configuration from file specified in options.
        """
        filename = os.path.abspath(self.options.config)
        self.log.info("readConfig.start", file=filename)        
        try:
            self.cfg = loaderConfig.Configuration(filename)
        except util.ConfigError, E:
            self.log.exception("readConfig.end", E)
            raise RuntimeError("in file '%s': %s" % (filename, E))
        self.log.info("readConfig.end", status=0)
        
    def _createConfig(self):
        """Initialize configuration from command-line values.
        """
        self.log.info("createConfig.start")
        CFG = loaderConfig.Configuration
        options = self.options
        create_bits = int(options.create) + (int(options.drop) << 1)
        lines = [
            "[%s]" % CFG.GLOBAL,
            "%s = %s" % (CFG.STATE_FILE, options.restore),
            "[%s]" % CFG.INPUT,
            "%s = %s" % (CFG.BASE, options.ifile),
            "%s = no" % CFG.IS_NUM,
            "[%s]" % CFG.DB,
            "%s = %s" % (CFG.DB_URI, options.db_uri),
            "%s = %d" % (CFG.DB_BATCH, options.ibatch),
            "%s = %d" % (CFG.DB_CREATE, create_bits),
            "%s = %s" % (CFG.DB_UNIQUE, options.unique),
           ]
        # Add optional database configuration
        if options.schema_file:
            lines.append("%s = %s" % (CFG.DB_SCHEMA_FILE, 
                                      options.schema_file))
        # schema_init and schema_finalize are lists, but the
        # user would have entered them with commas already, which
        # is also the convention for ConfigParser. Thus, they
        # can just be added as strings.
        if options.schema_init:
            values = options.schema_init
            lines.append("%s = %s" % (CFG.DB_SCHEMA_INIT, values)) 
        if options.schema_finalize:
            values = options.schema_finalize
            lines.append("%s = %s" % (CFG.DB_SCHEMA_FIN, values)) 
        # Add database connection parameters sub-section
        p = options._params
        if p:
            lines.append("[[parameters]]")
            for k, v in p:
                lines.append("%s = \"%s\"" % (k, v))
        try:
            self.cfg = CFG(lines)
        except util.ConfigError, E:
            self.log.exception("createConfig.end", E)
            raise RuntimeError("%s from configuration: %s" % (E, lines))
        self.log.info("createConfig.end", status=0)

    def run(self):
        """Run application's main loop.

        Return status code: 0 for success, non-zero for error.
        """
        self.log.info("run.start")
        status = 0
        self._last_save_state = time.time() # used in _readLoop()
        try:
            if self.cfg.numbered:
                self._runForever()
            else:
                self._runOne()
        except pipeline.ExitNow:
            self.log.debug("run.exitNow")
        except Exception, E:
            self.log.exception("run", E)
            status = -1
        self.flush()
        self.saveState()
        self.log.info("run.end", status=status)
        return status

    def exit(self, *args, **kwargs):
        self.flush()
        self.saveState()
        PipelineApplication.exit(self, *args, **kwargs)

    def _runOne(self):
        """Process one input file and stop.
        """
        self._setInfile()
        total = self._readLoop(eof_event=False)
        self._logTotal(total, 1)

    def _runForever(self):
        """Process input files 'forever'
        """
        total, nfiles = 0, 0
        while 1:
            self._setInfile()
            if not self.infile:
                self._pipelineCommand()
                self._spin("no_files")
                continue
            total += self._readLoop(eof_event=True)
            nfiles += 1
            self._logTotal(total, nfiles)
            # Delete or move completed file
            if self.cfg.delete:
                self._deleteInfile()
            else:
                self._moveInfile()

    def _spin(self, why, sec=1):
        with nllog.logged(self.log, "spin." + why):
            time.sleep(self.SLEEP_NO_FILES)

    def _logTotal(self, nevents, nfiles):
        self.log.info("loaded.total", events=nevents, files=nfiles)

    def _setInfile(self):
        if self.cfg.numbered:
            self.infile = util.getLowestNumberedFile(self.cfg.base)
        else:
            self.infile = optionFile(self.cfg.base)

    def _deleteInfile(self):
        name = self.infile.name
        self.infile.close()
        self.infile = None
        path = os.path.realpath(name)
        self.log.info("input.delete.start", file=path)
        try:
            os.unlink(path)
            self.log.info("input.delete.end", file=path, status=0)
        except OSError,E:
            self.log.exception("input.delete.end", E, file=path)
            raise

    def _moveInfile(self):
        name = self.infile.name
        self.infile.close()
        self.infile = None
        old_path = os.path.realpath(name)
        if self.cfg.moveto_dir:
            basename = os.path.basename(old_path)
            new_path = os.path.join(self.cfg.moveto_dir, basename)
        elif self.cfg.moveto_suffix:
            new_path = old_path + self.cfg.moveto_suffix
        else:
            raise RuntimeError("expected a directory or suffix to move "
                               "old file to, but they both were empty")
        self.log.info("input.move.start", src=old_path, dst=new_path)
        try:
            os.rename(old_path, new_path)
            self.log.info("input.move.end", src=old_path, dst=new_path, status=0)
        except OSError,E:
            self.log.exception("input.move.end", E, src=old_path, dst=new_path)
            raise
        
    def _readLoop(self, eof_event=None):        
        self._parser = p = NLFastParser(self.infile)
        if self.cfg.state_file:
            with nllog.logged(self.log, 
                              "readloop.restoreState", level=logging.INFO, 
                        file=self.cfg.state_file):
                self.restoreState()
                self._parser.setOffset(self._infile_offs)
        while 1:
            # read the next log event
            try:
                event = p.next()
            except StopIteration:
                if eof_event:
                    time.sleep(self.SLEEP_NO_DATA)
                    self.loader.checkFlush()
                    continue
                break
            event_name = event and event.get('event', None)
            # check for EOF
            if event_name is None:
                self.log.error("readLoop.bad_input",msg="missing 'event'", 
                          input=event)
                break
            if eof_event and event_name == util.EOF_EVENT:
                break
            self.n_parsed += 1
            # load event into database
            try:
                self.loader.load(event)
                self.n_loaded += 1
            except ValueError, E:
                self.log.error("bad event skipped: %s" % E)
            # Check for commands from pipeline
            self._pipelineCommand()
            # Save state?
            t = time.time()
            if t - self._last_save_state > self.SAVE_STATE_SEC:
                self.saveState()
                self._last_save_state = t
        return self.n_loaded

    def _pipelineCommand(self):
        """Check for  and process commands from pipeline.
        """
        if not self._rcvr: return
        self.log.info("pipecmd.process.start")
        cmd, status, msg = self.getCommand(self._rcvr)
        do_exit = False
        if 0 == status:
            if cmd == 'save':
                self.saveState()
                self._last_save_state = time.time()
                save_state = True
            elif cmd == 'exit':
                do_exit = True
            else:
                msg = "not_implemented"
                status = -1
        self.log.info("pipecmd.process.end", status=status, cmd=cmd, msg=msg)
        if do_exit:
            raise pipeline.ExitNow()

    def saveState(self):
        """Save the current input file name and offset.
        Will raise an IOError if the file is not writable.
        """
        self.log.info("saveState.start")
        if not self.cfg.state_file:
            if not self._state_file_warned:
                self.log.warn("saveState.end", status=-1, msg="no state file specified")
                self._state_file_warned = True
        elif not self._parser:
            if not self._state_file_warned:
                self.log.warn("saveState.end", status=-1, msg="no parsers exist so no state to save")
                self._state_file_warned = True
        else:
            state_file = self.cfg.state_file
            try:
                f = file(state_file, 'w')        
                #print "@@ %s %ld" % (self.infile.name, self.infile.tell())
                offset = self._parser.getOffset()
                f.write("%s %ld\n" % (self.infile.name, offset))
                f.close()
                self.log.info("saveState.end", status=0, file=state_file, offs=offset)
            except (IOError, AttributeError), E:
                self.log.exception("saveState.end", E)
                raise

    def restoreState(self):
        """Restore to the saved file name and offset.
        Will raise an IOError if the file is not readable or seekable.
        """
        self.log.info("restoreState.start")
        self._infile_offs = 0
        try:
            f = file(self.cfg.state_file)
        except IOError, E:
            self.log.warn("restoreState.end", msg=str(E), status=-1)
            return  None
        line = f.readline().strip()
        filename, offs_str = line.split()
        self._infile_offs = int(offs_str)
        self.infile = file(filename)
        self.log.info("restoreState.end", status=0)

    def flush(self):
        """Flush loader.

        This amounts to closing the database connection.
        """
        self.log.info("flush.start")
        # make sure we get 'credit' for all loaded events
        if self._parser and (self.n_loaded >= self.n_parsed):
            self._parser.updateOffset()
        self.loader.close()
        self.log.info("flush.end", status=0)

