#!/usr/bin/env python
"""
Report gaps in database records.
"""

__rcsid__ = "$Id: nl_gap 23923 2009-09-18 22:42:26Z ksb $"
__author__ = "Dan Gunter <dkgunter@lbl.gov>"

## Imports

import calendar
import getpass
import sys
#
from netlogger import nldate
from netlogger import util
from netlogger.analysis import loader
from netlogger.nllog import DoesLogging, get_logger, OptionParser

## Exceptions

class OptionError(Exception):
    pass

class ConnectError(Exception):
    pass

## Global variables

# Standard indentation
INDENT = "   "

## Signal handlers

def on_kill(signo, frame):
    "Signal handler for a graceful exit."
    get_logger(__file__).warn("abort", signal=signo)
    sys.exit(1)

## Classes

class GapQuery(DoesLogging):
    # template for queries with an event namespace
    NS_TMPL = "select from_unixtime(time, '%%Y%%m%%d') date from event "\
        "where name like '%(ns)s.%%' and time >= %(t1)f and time < %(t2)f "\
        "group by date order by date"
    # template for queries with no event namespace (all events)
    ALL_TMPL = "select from_unixtime(time, '%%Y%%m%%d') date from event "\
        "where time >= %(t1)f and time < %(t2)f "\
        "group by date order by date"

    def __init__(self, conn, namespace=None, start_time=None, end_time=None):
        DoesLogging.__init__(self)
        self._params = dict(ns = namespace, t1=start_time, t2=end_time)
        self._conn = conn
        self._gaps = [ ]

    def execute(self):
        if self._params['ns'] == None:
            query = self.ALL_TMPL % self._params
        else:
            query = self.NS_TMPL % self._params
        cursor = self._conn.cursor()
        self.log.debug("query.start", value=query)
        cursor.execute(query)
        self.log.debug("query.end")
        self.log.debug("query.fetch.start")
        rows = cursor.fetchall()
        self.log.debug("query.fetch.end")
        dates = [r[0] for r in rows]
        self._gaps = self._calc_gaps(dates)

    def get_gaps(self):
        return self._gaps

    def _calc_gaps(self, dates):
        """Calculate gaps in a sequence of integer dates of the
        form YYYYMMDD

        Returns a list of days, a triple (year, month, day),
        that are not in the sequence.
        """
        if len(dates) < 2:
            return  [ ]
        gaps = [ ]
        date_split = lambda x: (int(x[:4]), int(x[4:6]), int(x[6:]))
        date_join = lambda y, m, d: "%04d%02d%02d" % (y, m, d)
        first = date_split(dates[0])
        last = date_split(dates[-1])
        i = 0
        for year in range(first[0], last[0]+1):
            if year == first[0]:
                first_month = first[1]
            else:
                first_month = 1
            if year == last[0]:
                last_month = last[1]
            else:
                last_month = 12
            for month in range(first_month, last_month+1):
                if month == first[1]:
                    first_day = first[2]
                else:
                    first_day = 1
                if month == last[1]:
                    last_day = last[2]
                else:
                    _, last_day = calendar.monthrange(year, month)
                for day in range(first_day, last_day+1):
                    date = date_join(year, month, day)
                    self.log.trace("check_date", value=date)
                    if dates[i] == date:
                        i += 1
                    else:
                        self.log.trace("gap")
                        gaps.append((year, month, day))
        return gaps

## Functions

def check_required_options(options):
    """Return a list of missing options.

    The list will be empty if everything is OK.
    """
    result = [ ] # OK
    if options.url is None:
        result.append("-u URL")
    if options.db is None and (options.url is None or not
                               options.url.strip().startswith("sqlite://")):
        result.append("-d DBNAME")
    return result

def get_avail_url():
    """Get a list of the URL patterns for available database modules.

    Returns a list of zero or more items.
    """
    avail_url = [ ]
    for name in loader.AVAIL_DB:
        if name == 'test':
            continue
        elif name == 'sqlite':
            avail_url.append("%s:///path/to/file" % name)
        else:
            avail_url.append("%s://[user@]host[:port]" % name)
    return avail_url

def parse_timerange(timerange):
    """Parse timerange.
    Return two numeric timestamps (start, end).
    Raise ValueError on bad input.
    """
    try:
        start_date, end_date = timerange.split('::', 1)
    except ValueError:
        raise ValueError("Format must be START::END, e.g. 2009-01::2009-03")
    try:
        fmt, start_ts = nldate.guess(start_date)
    except ValueError, E:
        raise ValueError("Bad format '%s' for START date: %s" % (
                start_date, E))
    if fmt == nldate.UNKNOWN:
        raise ValueError("Unknown format for START date '%s'" % start_date)
    try:
        fmt, end_ts = nldate.guess(end_date)
    except ValueError, E:
        raise ValueError("Bad format '%s' for END date: %s" % (end_date, E))
    if fmt == nldate.UNKNOWN:
        raise ValueError("Unknown format for END date '%s'" % end_date)
    return start_ts, end_ts

def print_gaps(ofile, gaps):
    """Print out the gaps
    """
    ofile.write("Gaps:\n")
    for gap in gaps:
        ofile.write(INDENT)
        ofile.write("%04d-%02d-%02d\n" % gap)

def db_connect(url=None, password=None, db=None):
    """Connect to a RDBMS at the given 'url' (which includes
    the type of database in its scheme://), and log in to the
    database 'db', optionally using 'password'.

    Returns a connection object.
    Raises ValueError if input params are bad
    Raises ConnectError if connection attempt fails
    """
    log = get_logger(__file__)
    log.debug("get_dbmod", msg="Get database module for '%s'" % url)
    try:
        dbmod = loader.getDbForURL(url)
    except ValueError:
        raise ValueError("Bad URL format for '%s'" % url)
    if dbmod is None:
        raise ValueError("Cannot load DB module for URL '%s'" % url)
    dsn, conn_kw = loader.extractConnKeywords(url)
    if password:
        passwd = getpass.getpass()
        conn_kw['passwd'] = passwd
    try:
        log.debug("connect.start", db=db, url=url)
        conn = loader.connect(dbmod, conn_kw=conn_kw,
                              dsn=dsn, dbname=db)
        log.debug("connect.end", status=0, msg="OK")
    except util.DBConnectError, err:
        raise ConnectError(err)
    return conn

def main():
    """Program entry point

    Parse arguments, run query, print results, and exit.
    """
    # Parse arguments
    usage = "%prog [options]"
    desc = ' '.join(__doc__.split())
    parser = OptionParser(usage=usage, description=desc)
    avail_url_str = ", ".join(get_avail_url())
    parser.add_option('-d', '--db', default=None,
                action='store', dest='db', metavar='DBNAME',
                help="Database to use (required, except for sqlite)")
    parser.add_option('-n', '--ns', default=None,
                      action='store', dest='ns', metavar='STRING',
                      help="Namespace for events, e.g. 'srm' (default=ALL)")
    parser.add_option('-p', '--password', action='store_true', dest="password",
                      help="Prompt for database password")
    parser.add_option('-t', '--timerange', default=None,
                      action='store', dest='timerange', metavar="TIME-RANGE",
                      help="Time range for query, as START::END. "
                      "Format for START, END is ISO8601, numeric or English "
                      "like 'yesterday' or '2 weeks ago'. (default=ALL)")
    parser.add_option('-u', '--url', default=None,
                 action='store', dest='url', metavar='URL',
                 help="Connect to database server at URL. "
                 "One of: %s (required" %  avail_url_str)
    options, args = parser.parse_args()
    missing = check_required_options(options)
    if missing:
        arg_list = ", ".join(missing)
        parser.error("Missing required arguments: %s" % arg_list)
    # Init logging
    log = get_logger(__file__)  # Should be first done, just after parsing args
    log.info("start")
    # Set up signal handlers
    util.handleSignals((on_kill, ('SIGTERM', 'SIGINT', 'SIGUSR1',
                                  'SIGUSR2', 'SIGHUP')))
    # Parse timerange
    if options.timerange is None:
        # default is all of representable time
        start_ts = 0
        end_ts = float(1<<32)
    else:
        try:
            start_ts, end_ts = parse_timerange(options.timerange)
        except ValueError, err:
            parser.error("Invalid time range: %s" % err)
    # Connect to database
    try:
        conn = db_connect(url=options.url, password=options.password,
                          db=options.db)
    except ValueError, err:
        log.error("connect.error", msg="bad input: %s" % err)
    except ConnectError, err:
        log.error("connect.error", msg=err)
        return -1
    # Create and run query
    log.info("query.start")
    ofile = sys.stdout
    query = GapQuery(conn, namespace=options.ns,
                     start_time=start_ts, end_time=end_ts)
    query.execute()
    log.info("query.end", status=0)
    # Print report
    print_gaps(ofile, query.get_gaps())
    log.info("end", status=0)
    return 0

if __name__ == '__main__':
    sys.exit(main())
