#!/usr/bin/env python
"""
Driver for Python/R bandwidth predictions
"""
__rcsid__ = "$Id: nl_loader 23968 2009-10-15 14:30:44Z dang $"
__author__ = "Dan Gunter"

# System imports
from datetime import datetime
import sys
# Third-party imports
import pymongo
# Local imports
from netlogger import nldate
from netlogger.analysis.datamining import rpython
from netlogger.analysis.datamining import bandwidth
from netlogger.nllog import OptionParser, get_logger

class Koa:
    """Hold constants for koa log schema.
    """
    src = 'src_hostname'
    dst = 'dst_hostname'
    status = 'status'
    
def query_mongo(conn, start=None, end=None,
                src=None, dst=None, db="", collection="",
                user=None, passwd=None):
    _dt = datetime.fromtimestamp
    db = conn[db]
    if user is not None and passwd is not None:
        success = db.authenticate(user, passwd)
        if not success:
            halt("Could not authenticate to DB as user '%s'" % user)
    coll = db[collection]
    expr = { }
    # time range
    if start is not None:
        expr['ts'] = { '$gt' : _dt(start) }
        if end is not None:
            expr['ts'].update({ '$lt' : _dt(end) })
    elif end is not None:
        expr['ts'] = { '$lt' : _dt(end) }
    # src/dst (required)
    expr[Koa.src] = src
    expr[Koa.dst] = dst
    # not interested in errors
    expr[Koa.status] = 0
    #print "@@ EXPR:",expr
    cursor = coll.find(expr)
    return cursor
 
def halt(msg):
    sys.stderr.write(msg + "\n")
    sys.stderr.write("Abort.\n")
    sys.exit(1)
    
def main():
    """Program entry point.
    """
    usage = "%prog [options]"
    desc = ' '.join(__doc__.split())
    parser = OptionParser(usage=usage, description=desc)
    parser.add_option("-e", "--endpoints", metavar="HOST1:HOST2",
                      dest="subj", action="store", default="",
                      help="Endpoints of desired path")
    parser.add_option("-m", "--mongo-option", metavar='NAME=VALUE',
                      dest="mongo", action="append", default=[],
                      help="MongoDB options: host, port, user, passwd, "
                      "db, collecton")
    parser.add_option("-p", "--param", metavar="NAME=VALUE",
                      dest="param", action="append", default=[],
                      help="Other parameters for transfer: "
                      "concurrency, parallelism")
    parser.add_option('-t', '--time-range', default="",
                      action='store', dest='timerange', metavar="TIME-RANGE",
                      help="Time range for query, as START::END. "
                      "Format for START, END is ISO8601, numeric or English "
                      "like 'yesterday' or '2 weeks ago'. "
                      "See nl_date for details (required)")
    parser.add_option('-z', '--sizes', dest="file_sizes", metavar="SIZES..",
                      help="Expected file size mix, notated as "
                      "<size1>:<pct1>,<size2>:<pct2>. "
                      "For example, '0:5,1e6:5,5e7:90' means "
                      "5% of the files will be less than 1MB, "
                      "5% will be between 1MB and 50MB, and "
                      "90% will be greater than 50MB (default=%default)",
                      default='0:10,5e7:90')
    options, args = parser.parse_args()
    log = get_logger(__file__)
    # Parse time range
    try:
        start_date, end_date = options.timerange.split('::', 1)
    except ValueError:
        parser.error("Time range must be in format START::END, e.g. "
                     "2009-01::2009-03")
    try:
        fmt, start_ts = nldate.guess(start_date)
    except ValueError, E:
        parser.error("Bad format '%s' for START date: %s" % (start_date, E))
    if fmt == nldate.UNKNOWN:
        parser.error("Unknown format for START date '%s'" % start_date)
    try:
        fmt, end_ts = nldate.guess(end_date)
    except ValueError, E:
        parser.error("Bad format '%s' for END date: %s" % (end_date, E))
    if fmt == nldate.UNKNOWN:
        parser.error("Unknown format for END date '%s'" % end_date)
    # Connect to database
    mongo_kw = { 'host' : 'localhost' }
    mongo_db, mongo_collection = 'test', 'test'
    for opt in options.mongo:
        try:
            k, v = opt.split('=')
        except ValueError:
            parser.error("MongoDB options must be in the form NAME=VALUE")
        if k == 'db':
            mongo_db = v
        elif k == 'collection':
            mongo_collection = v
        else:
            mongo_kw[k] = v
    try:
        conn = pymongo.Connection(**mongo_kw)
    except pymongo.errors.ConnectionFailure:
        parser.error("Couldn't connect to MongoDB with parameters: %s" %
                     mongo_kw)
    # Parse file sizes
    file_sizes = { }
    try:
        sizes = options.file_sizes.split(",")
        for sz in sizes:
            bytes, pct = sz.split(":")
            bytes = int(float(bytes))
            pct = int(pct)
            file_sizes[bytes] = pct    
    except ValueError:
        parser.error("Bad file size")
    # Parse parmameters
    param = { 'concurrency' : -1, # -1 means 'any'
              'parallelism' : -1 }
    for opt in options.param:
        try:
            k, v = opt.split('=')
        except ValueError:
            parser.error("Parameters must be in the form NAME=VALUE")
        param[k] = int(v) # XXX: may not be true for some params?
    # Parse subject
    try:
        src_host, dst_host = options.subj.split(':')
    except ValueError:
        parser.error("Endpoint must be in the form HOST1:HOST2")
    # Query DB
    cursor = query_mongo(conn, start=start_ts, end=end_ts,
                         src=src_host, dst=dst_host,
                         db=mongo_db, collection=mongo_collection)
    if not hasattr(cursor, 'next'):
        halt("Query returned no results")
    # Run prediction
    try:
        df = rpython.cursor_data_frame(cursor)
    except StopIteration:
        halt("Query returned no results")
    predictor = bandwidth.PredictR(df)
    pred = predictor.get_value(size_mix=file_sizes, **param)
    # Report
    print "Prediction (n=%d)" % pred.n
    print str(pred)
    # Done
    return 0

if __name__ == '__main__':
    sys.exit(main())
