"""
Run derived collection builder.

See the -h option for details on the options.

Subcommands:

* merge - Merge two collections, e.g., a "sandbox" and core DB.
* list - List builder commands.
* build - Run a builder command.

"""
__author__ = 'Dan Gunter <dkgunter@lbl.gov>'
__date__ = '5/22/13'

# Imports
# -------

# System imports.
import argparse
import importlib
import itertools as itt
import json
import logging
import os
import sys
import time
import traceback

# Third-party imports.
import pymongo

# Local imports.
from matgendb.builders import core
from matgendb.util import csv_list, kvp_dict
from matgendb.query_engine import QueryEngine
from matgendb.builders.incr import TrackedQueryEngine, UnTrackedQueryEngine, Operation

# Global variables.

_log = None     # configured in main()

DEFAULT_CONFIG_FILE = "db.json"

# Suffix for merged tasks collection
MERGED_SUFFIX = "merged"

# Cmdline flag for parameters
NOINCR_FLAG = '-'

# Exceptions
# ----------


class BuilderError(Exception):
    pass


class ConfigurationError(BuilderError):
    def __init__(self, where, why):
        Exception.__init__(self, "class {}: {}".format(where, why))


class BuilderNotFoundError(Exception):
    pass

# Commands
# --------


def command_merge(args):
    """Command: Merge tasks from two collections.
    Intended for merging a sandbox and core db.

    :param args: Command-line arguments
    :type args: list
    """
    # Check args.
    if not args.sandbox_file:
        raise ConfigurationError("sandbox filename",
                                 "In sandbox mode, -s/--sandbox is required")

    # Connect to "core" collections.
    try:
        settings = get_settings(args.config_file)
    except ConfigurationError, err:
        _log.error("command_merge.configuration")
        raise
    core_db = QueryEngine(**settings)
    if hasattr(args, 'merged_tasks') and args.merged_tasks:
        suffix = MERGED_SUFFIX
    else:
        suffix = None
    core_collections = core.Collections(core_db, task_suffix=suffix)

    # Setup.
    sandbox_settings = get_settings(args.sandbox_file)
    sandbox_db = QueryEngine(**sandbox_settings)
    sdb = sandbox_settings['database']
    pfx = args.coll_prefix
    sandbox_collections = core.Collections(sandbox_db, prefix=pfx)
    # set task id prefix
    if args.sandbox_name:
        id_prefix = args.sandbox_name
    elif pfx:
        id_prefix = pfx
    else:
        id_prefix = "sandbox"
    # set target collection name
    if pfx:
        target = "{}.tasks.{}".format(pfx, MERGED_SUFFIX)
    else:
        target = "tasks.{}".format(MERGED_SUFFIX)

    # Perform the merge.
    _log.debug("sandbox.merge.begin: sandbox={}".format(sdb))
    t0 = time.time()
    try:
        core.merge_tasks(core_collections, sandbox_collections, id_prefix, target, wipe=args.wipe_target)
    except pymongo.errors.DuplicateKeyError, err:
        _log.error("sandbox.merge.end error=merge.duplicate_key msg={}".format(err))
        tell_user("\nDuplicate key error from MongoDB.\nUse -W/--wipe to clear target collection before merge.\n")
        return -1
    _log.debug("sandbox.merge.end: sandbox={} duration_sec={:g}".format(sdb, time.time() - t0))
    tell_user("Merged tasks: db={} collection={}".format(sdb, target))

    return 0

def _show_builder(b, params=None, strm=sys.stderr):
    """Print a formatted version of builder info to the console.
    `b` is a builder class.
    """
    info = dict(desc=(b.__doc__ or "(no description)").strip(),
                name='.'.join((b.__module__, b.__name__)))
    strm.write("\n{name}\n  {desc}\n".format(**info))
    if params is None:
        params = core.parse_fn_docstring(b.get_items)
    strm.write("  Parameters:\n")
    for p, pval in params.iteritems():
        info = dict(name=p, type=pval['type'], desc=pval['desc'])
        strm.write("    {name} ({type}): {desc}\n".format(**info))
    strm.write("\n")

def command_build(args):
    """Command: Run builder, invoked from 'build' sub-command

    :param args: Command-line arguments
    :type args: list
    """
    # Get builder, optionally in module.
    bld_arg = args.builder
    if '.' in bld_arg:
        bld_path, bld_class = bld_arg.rsplit('.', 1)
    else:
        bld_path, bld_class = None, bld_arg
    builder_class = find_builder(bld_path, bld_class)
    if builder_class is None:
        raise BuilderNotFoundError(bld_arg)

    # Create builder
    builder = builder_class(ncores=args.num_cores)

    # Get parameter types
    params = builder.get_parameters()
    if params is None:
        # Parse builder's get_items() method docstring.
        _log.debug("parse builder docstring")
        params = core.parse_fn_docstring(builder_class.get_items)

    # Check args against the builder parameters
    if args.usage:
        _show_builder(builder_class, params=params)
        return 0
    user_params, meth_params = [x[0] for x in args.params], params.keys()
    if set(user_params) != set(meth_params):
        if len(user_params) != len(meth_params):
            adj, ineq = (("few", "<"), ("many", ">"))[
                len(user_params) > len(meth_params)]
            msg = "too {} arguments (got {:d} {} wanted {:d})".format(
                adj, len(user_params), ineq, len(meth_params))
        else:
            bad_params = [p for p in user_params if not p in meth_params]
            msg = "unknown argument(s): {}".format(csv_list(bad_params))
        _show_builder(builder_class, params=params)
        raise ConfigurationError(args.builder, msg)

    # Fill builder args using builder parameter types
    simple_types = {k: eval(k) for k in ('int', 'float')}
    eval_types = {k: eval(k) for k in ('dict', 'list')}
    parsed_builder_args, query_engines = {}, []
    for key, value in args.params:
        _type = params[key]['type']
        if _type.endswith("QueryEngine"):
            # Replace path to query engine config with a QE instance
            try:
                qe = create_qe(value, args.incr)
                parsed_builder_args[key] = qe
                query_engines.append(qe)
            except ValueError, err:
                raise ConfigurationError("creating query engine", err)
        elif _type in simple_types:
            parsed_builder_args[key] = simple_types[_type](value)
        elif _type in eval_types:
            parsed_builder_args[key] = eval(value)
        else:
            parsed_builder_args[key] = value

    # Run builder.
    _log.info("run: {}({})".format(builder_class.__name__,
                                   kvp_dict(parsed_builder_args)))
    count = builder.run(user_kw=parsed_builder_args)
    if count < 1:
        _log.warn("Processed {:d} items".format(count))
    else:
        _log.info("Processed {:d} items".format(count))
    # Save current position, for all query engines
    [qe.set_mark() for qe in query_engines]
    result = 0

    return result

# Utility functions
# -----------------

def create_qe(path, incr=None):
    """Configure a new QueryEngine from a config file at `path`.
    """
    if incr is not None and path.startswith(NOINCR_FLAG):
        incr = None
        path = path[1:]

    db_settings = get_settings(path)

    # Pick QueryEngine subclass
    if incr is not None:
        _log.info("Incremental build: {}".format(path))
        iop, ifield = incr['op'], incr['field']  # set by incr_option()
        qe_class = TrackedQueryEngine
        # Update settings to include the type of operation, and field, used
        # to track the position in the source collection.
        try:
            t_op = Operation[iop]
        except KeyError:
            raise ValueError("Bad operation for incremental mode: '{}'"
                             .format(iop))
        db_settings.update(dict(track_operation=t_op, track_field=ifield))
    else:
        _log.info("Non-incremental build: {}".format(path))
        qe_class = UnTrackedQueryEngine

    # Instantiate QueryEngine
    _log.debug("QueryEngine.create settings={}".format(db_settings))
    try:
        qe = qe_class(**db_settings)
    except pymongo.errors.ConnectionFailure as err:
        raise ValueError("Cannot connect from settings in '{}': {}"
                         .format(path, err))
    return qe

def tell_user(message):
    """Print something to the user.
    """
    print(message)


def get_settings(config_file, allow_default=True):
    """Read settings from a configuration file.
    """
    try:
        if config_file:
            cfg = json.load(open(config_file))
        elif allow_default and os.path.exists(DEFAULT_CONFIG_FILE):
            cfg = json.load(open(DEFAULT_CONFIG_FILE))
        else:
            raise ValueError("Default configuration '{}' not found".format(DEFAULT_CONFIG_FILE))
    except Exception, err:
        raise ConfigurationError(config_file, err)
    normalize_userpass(cfg)
    normalize_aliases(cfg)
    return cfg

def find_builder(module_name, class_name):
    """Find a builder class.
    Return class obj, or None if not found.
    """
    try:
        md = importlib.import_module(module_name)
    except ImportError:
        _log.info("Failed to import module '{}'".format(module_name))
        return None
    if not class_name in dir(md):
        _log.info("Failed to find class '{}' in module '{}'"
                  .format(class_name, module_name))
        return None
    return getattr(md, class_name)


def normalize_userpass(cfg):
    """In DB conn. config, normalize user/password from readonly and admin prefixes.
    In the end, there will be only keys 'user' and 'password'.
    """
    for pfx in 'readonly', 'admin':  # in reverse order of priority, to overwrite
        if (pfx + '_user') in cfg and (pfx + '_password') in cfg:
            cfg[QueryEngine.USER_KEY] = cfg[pfx + '_user']
            cfg[QueryEngine.PASSWORD_KEY] = cfg[pfx + '_password']
            del cfg[pfx + '_user']
            del cfg[pfx + '_password']


def normalize_aliases(cfg):
    """Normalize the 'aliases_config' used by the QueryEngine class.
    If the user sets something, leave it alone.
    But if there is nothing, use the name of the collection to decide whether
    to let the QueryEngine use its defaults or empty it out.

    :param cfg: Configuration read from JSON
    :type cfg: dict
    :return: None (cfg is modified in-place)
    """
    if QueryEngine.ALIASES_CONFIG_KEY in cfg:
        return  # explicitly set by user, so do nothing
    if cfg[QueryEngine.COLLECTION_KEY] in ('materials', 'tasks'):
        pass  # let the defaults for QueryEngine proceed
    else:
        # explicitly put empty aliases, to override QueryEngine defaults
        cfg[QueryEngine.ALIASES_CONFIG_KEY] = {'aliases': {}, 'defaults': {}}

def kvp_option(arg):
    """Parse key=value pair into (key, value)
    """
    try:
        key, value = arg.split('=', 1)
    except ValueError:
        raise ConfigurationError("Bad key=value pair: {}".format(arg))
    return key, value

def incr_option(arg, default_field='_id'):
    """Parse incremental option of OPERATION[:FIELD]
    into a little dictionary with keys 'op' and 'field'.
    """
    d = {}
    if ':' in arg:
        d['op'], d['field'] = arg.split(':', 1)
    else:
        d['op'], d['field'] = arg, default_field
    return d

# Main program
# ------------


def main():
    global _log


    # Configure parent parser for shared args.
    parent_parser = argparse.ArgumentParser(add_help=False)
    parent_parser.add_argument('--quiet', '-q', dest='quiet', action="store_true", default=False,
                               help="Minimal verbosity.")
    parent_parser.add_argument('--verbose', '-v', dest='vb', action="count", default=0,
                               help="Print more verbose messages to standard error. Repeatable. (default=ERROR)")
    # Set up argument parsing.
    p = argparse.ArgumentParser(description="Build databases")
    subparsers = p.add_subparsers(description="Actions")

    # Merge action.
    subp = subparsers.add_parser("merge", help="Merge sandbox and core database",
                                 parents=[parent_parser])
    subp.set_defaults(func=command_merge)
    subp.add_argument("-c", "--config", dest="config_file", type=str, metavar='FILE', default="db.json",
                      help="Configure database connection from FILE (%(default)s)")
    subp.add_argument("-n", "--name", dest="sandbox_name", type=str, metavar="NAME", default=None,
                      help="Sandbox name, for prefixing merged task IDs. "
                           "If not given, try to use -p/--prefix, then default (sandbox)")
    subp.add_argument("-p", "--prefix", dest="coll_prefix", type=str, metavar='PREFIX', default=None,
                      help="Collection name prefix for input (and possibly output) collections")
    subp.add_argument("-s", "--sandbox", dest="sandbox_file", type=str, metavar='FILE', default=None,
                      help="Configure sandbox database from FILE (required)")
    subp.add_argument("-W", "--wipe", dest="wipe_target", action="store_true",
                      help="Wipe target collection, removing all data in it, before merge")


    # Build actions: called either 'run' or 'build'.
    for action in "build", "run":
        subp = subparsers.add_parser(action, help="run a builder",
                                     parents=[parent_parser])
        subp.set_defaults(func=command_build)
        subp.add_argument("builder", type=str, metavar="builder", default="",
                          help="Builder class, relative or absolute import "
                               "path, e.g. "
                               "'my.awesome.BigBuilder' or 'BigBuilder'.")
        subp.add_argument("params", nargs="*", metavar="parameter",
                          type=kvp_option,
                          help="Builder parameters, in format <name>=<value>. "
                               "If the parameter type is QueryEngine, the value"
                               " should be a JSON configuration file. "
                               "Prefix filename with a '{}' to ignore "
                               "incremental mode for this QueryEngine."
                               .format(NOINCR_FLAG))
        iops = csv_list(Operation.__members__.keys())
        subp.add_argument("-i", "--incr", dest="incr", type=incr_option,
                          metavar="OPER[:FIELD]",
                          help="Incremental mode for operation and optional "
                               "sort-field name. OPER may be one of: "
                               "{}. Default FIELD is '_id'".format(iops))
        subp.add_argument("-n", "--ncores", dest="num_cores", type=int, default=1,
                          help="Number of cores or processes to run "
                               "in parallel (%(default)d)")
        subp.add_argument("-u", "--usage", action="store_true", dest="usage",
                           help="Print usage information on selected builder "
                                "and exit.")
    # Parse arguments.
    args = p.parse_args()

    # Configure logging.
    _log = logging.getLogger("mg")  # parent
    _log.propagate = False
    hndlr = logging.StreamHandler()
    hndlr.setFormatter(logging.Formatter(
        "[%(levelname)-6s] %(asctime)s %(name)s :: %(message)s"))
    _log.addHandler(hndlr)
    if args.quiet:
        lvl = logging.CRITICAL
    else:
        if args.vb > 1:
            lvl = logging.DEBUG
        elif args.vb > 0:
            lvl = logging.INFO
        else:
            lvl = logging.WARN
    _log.setLevel(lvl)
    _log = logging.getLogger("mg.build")
    # don't send logs up

    # Run function.
    if args.func is None:
        p.error("No action given")
    try:
        result = args.func(args)
    except ConfigurationError as err:
        _log.error("Configuration error: {}".format(err))
        result = -1
    except BuilderNotFoundError as err:
        _log.error("Builder not found: {}".format(err))
        result = -1
    except BuilderError as err:
        _log.error("{}".format(err))
        result = -1
    except Exception as err:
        if _log.getEffectiveLevel() <= logging.INFO:
            print("{}".format(traceback.format_exc()))
        p.error("{} error: {}".format(args.func.__name__, err))
        result = -2
    if result < 0:
        _log.error("Failure: {:d}".format(result))
    return result

if __name__ == '__main__':
    sys.exit(main())
