"""
Run derived collection builder.

See the -h option for details on the options.

Subcommands:

* merge - Merge two collections, e.g., a "sandbox" and core DB.
* list - List builder commands.
* build - Run a builder command.

"""
__author__ = 'Dan Gunter <dkgunter@lbl.gov>'
__date__ = '5/22/13'

# Imports
# -------

# System imports.
import argparse
import imp
import json
import logging
import os
import sys
import time
import traceback

# Third-party imports.
import pymongo

# Local imports.
from matgendb.builders import core
from matgendb.query_engine import QueryEngine

# Global variables.

_log = None     # configured in main()

DEFAULT_CONFIG_FILE = "db.json"

# Suffix for merged tasks collection
MERGED_SUFFIX = "merged"

# Exceptions
# ----------


class BuilderError(Exception):
    pass


class ConfigurationError(BuilderError):
    def __init__(self, where, why):
        Exception.__init__(self, "Failed to load configuration {}: {}".format(where, why))


class BuilderNotFoundError(Exception):
    pass

# Commands
# --------


def command_merge(args):
    """Command: Merge tasks from two collections.
    Intended for merging a sandbox and core db.

    :param args: Command-line arguments
    :type args: list
    """
    # Check args.
    if not args.sandbox_file:
        raise ConfigurationError("sandbox filename",
                                 "In sandbox mode, -s/--sandbox is required")

    # Connect to "core" collections.
    try:
        settings = get_settings(args.config_file)
    except ConfigurationError, err:
        _log.error("command_merge.configuration")
        raise
    core_db = QueryEngine(**settings)
    if hasattr(args, 'merged_tasks') and args.merged_tasks:
        suffix = MERGED_SUFFIX
    else:
        suffix = None
    core_collections = core.Collections(core_db, task_suffix=suffix)

    # Setup.
    sandbox_settings = get_settings(args.sandbox_file)
    sandbox_db = QueryEngine(**sandbox_settings)
    sdb = sandbox_settings['database']
    pfx = args.coll_prefix
    sandbox_collections = core.Collections(sandbox_db, prefix=pfx)
    # set task id prefix
    if args.sandbox_name:
        id_prefix = args.sandbox_name
    elif pfx:
        id_prefix = pfx
    else:
        id_prefix = "sandbox"
    # set target collection name
    if pfx:
        target = "{}.tasks.{}".format(pfx, MERGED_SUFFIX)
    else:
        target = "tasks.{}".format(MERGED_SUFFIX)

    # Perform the merge.
    _log.debug("sandbox.merge.begin: sandbox={}".format(sdb))
    t0 = time.time()
    try:
        core.merge_tasks(core_collections, sandbox_collections, id_prefix, target, wipe=args.wipe_target)
    except pymongo.errors.DuplicateKeyError, err:
        _log.error("sandbox.merge.end error=merge.duplicate_key msg={}".format(err))
        tell_user("\nDuplicate key error from MongoDB.\nUse -W/--wipe to clear target collection before merge.\n")
        return -1
    _log.debug("sandbox.merge.end: sandbox={} duration_sec={:g}".format(sdb, time.time() - t0))
    tell_user("Merged tasks: db={} collection={}".format(sdb, target))

    return 0


def command_list(args):
    """Command: List all the builders in a given module dir.

    :param args: Command-line arguments
    :type args: list
    :return: Number of builders shown
    """
    # Load parent module.
    module = load_module(args.mod_path)
    if module is None:
        print("Error loading module {}. Abort.".format(args.mod_path))
        return 0
    # Get all Python modules in directory.
    path = os.path.dirname(module.__file__)
    pyfiles = [f for f in os.listdir(path) if f.endswith('.py') and not f.startswith('__')]
    # Convert back to full module paths.
    pymods = ["{}.{}".format(args.mod_path, os.path.splitext(f)[0]) for f in pyfiles]
    # Find and show builders in the module paths.
    builders = filter(None, [get_builder(m) for m in pymods])
    n = len(builders)
    if n > 0:
        print("Found {:d} builder{}:".format(n, 's' if n > 1 else ''))
        map(_show_builder, builders)
    else:
        print("No builders found in module {}".format(args.mod_path))
    return n


def _show_builder(b):
    """Print a formatted version of builder info to the console.
    """
    indent = " " * 4
    modname = b.__module__.split(".")[-1]
    print("\n{m}".format(m=modname))
    print("{i}{name}: {desc}".format(i=indent, name=b.__name__, desc=b.__doc__.strip()))
    params = {}
    setup_doc = b.setup.__doc__.split("\n")
    for line in setup_doc:
        s = line.strip()
        if s.startswith(":"):
            words = s.split()
            param_name = words[1].split(":")[0]
            param_desc = ' '.join(words[2:])
            if s.startswith(":param"):
                params[param_name] = [param_desc, None]  # desc goes first
            else:
                params[param_name][1] = param_desc  # type goes second
    print("{i}{i}Keywords:".format(i=indent))
    for key, value in params.iteritems():
        desc, type_ = value
        print("{i}{i}{i}{name} = ({type}) {desc}".format(i=indent, name=key, type=type_,
                                                         desc=desc))


def command_build(args):
    """Command: Run builder, invoked from 'build' sub-command

    :param args: Command-line arguments
    :type args: list
    """
    # Get builder in module.
    full_mod_path = "{}.{}".format(args.mod_path, args.builder)
    builder_class = get_builder(full_mod_path)
    if builder_class is None:
        raise BuilderNotFoundError("{}".format(full_mod_path))

    # Get keywords from args.
    args_kw = {}
    for kwd in args.keywords:
        try:
            key, value = kwd.split('=', 1)
        except ValueError:
            raise ConfigurationError("for builder '{}'".format(args.builder),
                                     "Bad key=value pair: {}".format(kwd))
        args_kw[key] = value

    db_settings = {}

    # Parse builder's setup() method docstring.
    _log.debug("parse builder docstring")
    params, returnval = core.parse_fn_docstring(builder_class.setup)
    for name, info in params.iteritems():
        if not 'type' in info:
            raise ConfigurationError("for builder '{}'".format(args.builder),
                                     "Missing ':type {}: <type>' in docstring".format(name))
        try:
            value = args_kw[name]
        except KeyError:
            if is_mqe(info['type']):
                value = name  # for collection foo, default to 'foo.json' config
            else:
                if '(optional)' in info['desc']:
                    _log.info("Use default value for parameter '{}'".format(name))
                    continue
                else:
                    raise ConfigurationError("for builder '{}'".format(args.builder),
                                             "Missing value for parameter '{}' in {}".format(name, full_mod_path))
        # take special action for some types
        if is_mqe(info['type']):  # QueryEngine
            # configure new DB connection
            config_file = os.path.join(args.config_path, value + ".json")
            _log.info("load DB configuration for '{}' from '{}'".format(name, config_file))
            db_settings = get_settings(config_file)
            # replace value with MQE obj
            _log.debug("QueryEngine.create settings={}".format(db_settings))
            try:
                args_kw[name] = QueryEngine(**db_settings)
            except pymongo.errors.ConnectionFailure as err:
                raise BuilderError("Cannot connect from settings in '{}': {}"
                                   .format(config_file, err))
            _log.debug("Configured query engine {}: {}".format(name, db_settings))

    # Run builder.
    _log.info("run builder")
    builder = builder_class(ncores=args.num_cores, config=db_settings)
    count = builder.run(setup_kw=args_kw)
    if count < 1:
        _log.warn("Processed {:d} items".format(count))
    else:
        _log.info("Processed {:d} items".format(count))
    result = 0

    return result


# Utility functions
# -----------------


def tell_user(message):
    """Print something to the user.
    """
    print(message)


def get_settings(config_file, allow_default=True):
    """Read settings from a configuration file.
    """
    try:
        if config_file:
            cfg = json.load(open(config_file))
        elif allow_default and os.path.exists(DEFAULT_CONFIG_FILE):
            cfg = json.load(open(DEFAULT_CONFIG_FILE))
        else:
            raise ValueError("Default configuration '{}' not found".format(DEFAULT_CONFIG_FILE))
    except Exception, err:
        raise ConfigurationError(config_file, err)
    normalize_userpass(cfg)
    normalize_aliases(cfg)
    return cfg


def is_mqe(type_name):
    """Whether this type name is a QueryEngine.
    """
    return type_name.endswith("QueryEngine")


def load_module(module):
    """Extend imp to handle dotted module paths.
    """
    _log.debug("Loading module: {}".format(module))
    parts = module.split('.')
    path, m = None, None
    # navigate packages
    for p in parts[:-1]:
        loc = imp.find_module(p, path)
        m = imp.load_module(p, *loc)
        path = m.__path__
    # load module
    p = parts[-1]
    loc = imp.find_module(p, path)
    try:
        mod = imp.load_module(p, *loc)
    except (ValueError, ImportError) as err:
        _log.warn("Skipping module {} on error: {}".format(module, err))
        mod = None
    return mod


def get_builder(module):
    """Get the (first) Builder subclass found in the module.
    """
    result = None
    moduleobj = load_module(module)
    for name in dir(moduleobj):
        obj = getattr(moduleobj, name)
        #_log.debug("examine {}.{}".format(module, name))
        try:
            if issubclass(obj, core.Builder) and not obj == core.Builder:
                _log.debug("{}.{} is a Builder".format(module, name))
                result = obj
                break
        except TypeError:
            pass
    return result


def normalize_userpass(cfg):
    """In DB conn. config, normalize user/password from readonly and admin prefixes.
    In the end, there will be only keys 'user' and 'password'.
    """
    for pfx in 'readonly', 'admin':  # in reverse order of priority, to overwrite
        if (pfx + '_user') in cfg and (pfx + '_password') in cfg:
            cfg[QueryEngine.USER_KEY] = cfg[pfx + '_user']
            cfg[QueryEngine.PASSWORD_KEY] = cfg[pfx + '_password']
            del cfg[pfx + '_user']
            del cfg[pfx + '_password']


def normalize_aliases(cfg):
    """Normalize the 'aliases_config' used by the QueryEngine class.
    If the user sets something, leave it alone.
    But if there is nothing, use the name of the collection to decide whether
    to let the QueryEngine use its defaults or empty it out.

    :param cfg: Configuration read from JSON
    :type cfg: dict
    :return: None (cfg is modified in-place)
    """
    if QueryEngine.ALIASES_CONFIG_KEY in cfg:
        return  # explicitly set by user, so do nothing
    if cfg[QueryEngine.COLLECTION_KEY] in ('materials', 'tasks'):
        pass  # let the defaults for QueryEngine proceed
    else:
        # explicitly put empty aliases, to override QueryEngine defaults
        cfg[QueryEngine.ALIASES_CONFIG_KEY] = {'aliases': {}, 'defaults': {}}

# Main program
# ------------


def main():
    global _log

    # Configure parent parser for shared args.
    parent_parser = argparse.ArgumentParser(add_help=False)
    parent_parser.add_argument('--quiet', '-q', dest='quiet', action="store_true", default=False,
                               help="Minimal verbosity.")
    parent_parser.add_argument('--verbose', '-v', dest='vb', action="count", default=0,
                               help="Print more verbose messages to standard error. Repeatable. (default=ERROR)")
    # Set up argument parsing.
    p = argparse.ArgumentParser(description="Build databases")
    subparsers = p.add_subparsers(description="Actions")

    # Merge action.
    subp = subparsers.add_parser("merge", help="Merge sandbox and core database",
                                 parents=[parent_parser])
    subp.set_defaults(func=command_merge)
    subp.add_argument("-c", "--config", dest="config_file", type=str, metavar='FILE', default="db.json",
                      help="Configure database connection from FILE (%(default)s)")
    subp.add_argument("-n", "--name", dest="sandbox_name", type=str, metavar="NAME", default=None,
                      help="Sandbox name, for prefixing merged task IDs. "
                           "If not given, try to use -p/--prefix, then default (sandbox)")
    subp.add_argument("-p", "--prefix", dest="coll_prefix", type=str, metavar='PREFIX', default=None,
                      help="Collection name prefix for input (and possibly output) collections")
    subp.add_argument("-s", "--sandbox", dest="sandbox_file", type=str, metavar='FILE', default=None,
                      help="Configure sandbox database from FILE (required)")
    subp.add_argument("-W", "--wipe", dest="wipe_target", action="store_true",
                      help="Wipe target collection, removing all data in it, before merge")

    # List builders action.
    subp = subparsers.add_parser("list", help="list builders",
                                 parents=[parent_parser])
    subp.set_defaults(func=command_list)
    subp.add_argument("-m", "--module", dest="mod_path", type=str, metavar="MODULE",
                      default="matgendb.builders",
                      help="Find builder modules under MODULE (default=matgendb.builders)")

    # Build action.
    subp = subparsers.add_parser("build", help="run a builder",
                                 parents=[parent_parser])
    subp.set_defaults(func=command_build)
    subp.add_argument("-b", "--builder", dest="builder", type=str, metavar="NAME", default="",
                      help="Run builder NAME, which is relative to the module path in -m/--module")
    subp.add_argument("-C", "--config-path", dest="config_path", type=str, metavar='DIR', default=".",
                      help="Configure database connection from .json files in DIR (default=%(default)s)")
    subp.add_argument("-k", "--kvp", dest="keywords", action="append", default=[],
                      help="Key/value pairs, in format <key>=<value>, passed to builder function. "
                           "For QueryEngine arguments, the value should be the name of a DB configuration file, "
                           "relative to the path given by -C/--config-path, without the '.json' suffix; if not given, "
                           "a configuration file '<key>.json' will be assumed.")
    subp.add_argument("-m", "--module", dest="mod_path", type=str, metavar="MODULE",
                      default="matgendb.builders",
                      help="Find builder modules under MODULE (default=matgendb.builders)")
    subp.add_argument("-n", "--ncores", dest="num_cores", type=int, default=16,
                      help="Number of cores or processes to run in parallel (%(default)d)")
    subp.add_argument("-p", "--prefix", dest="coll_prefix", type=str, metavar='PREFIX', default=None,
                      help="Collection name prefix for input (and possibly output) collections")
    # Parse arguments.
    args = p.parse_args()

    # Configure logging.
    _log = logging.getLogger("mg")  # parent
    _log.propagate = False
    hndlr = logging.StreamHandler()
    hndlr.setFormatter(logging.Formatter("[%(levelname)-6s] %(asctime)s %(name)s :: %(message)s"))
    _log.addHandler(hndlr)
    if args.quiet:
        lvl = logging.CRITICAL
    else:
        if args.vb > 1:
            lvl = logging.DEBUG
        elif args.vb > 0:
            lvl = logging.INFO
        else:
            lvl = logging.WARN
    _log.setLevel(lvl)
    _log = logging.getLogger("mg.build")
    # don't send logs up

    # Run function.
    if args.func is None:
        p.error("No action given")
    try:
        result = args.func(args)
    except ConfigurationError as err:
        _log.error("Configuration error: {}".format(err))
        result = -1
    except BuilderNotFoundError as err:
        _log.error("Builder not found: {}".format(err))
        result = -1
    except BuilderError as err:
        _log.error("{}".format(err))
        result = -1
    except Exception as err:
        if _log.getEffectiveLevel() <= logging.INFO:
            print("{}".format(traceback.format_exc()))
        p.error("{} error: {}".format(args.func.__name__, err))
        result = -2
    if result < 0:
        _log.error("Failure: {:d}".format(result))
    return result

if __name__ == '__main__':
    sys.exit(main())
