#!/usr/bin/env python
# -*- coding: utf8 -*-
"""
Run validation-related actions against (MongoDB) databases.
Database connections are configured from a file
and the constraints to validate against can come from a file or the command-line.
"""
__author__ = 'Dan Gunter <dkgunter@lbl.gov>'
__date__ = '3/29/13'

import argparse
import logging
import os
import pymongo
import smtplib
import socket
import sys
import time
import traceback
import yaml
# local module
from matgendb.util import get_settings, get_collection
from matgendb.vv.validate import ConstraintSpec, Validator, DBError
from matgendb.vv import report
from matgendb.vv.util import Timing, ElapsedTime, letter_num
from matgendb.vv.util import YamlConfig, args_kvp_nodup, args_list
from matgendb.vv import diff
#from matgendb.vv import mquery
from smoqe.query import to_mongo, BadExpression


# Logging
# -------

_log_name = 'mg'
_log = logging.getLogger(_log_name)
_handler = logging.StreamHandler()
_handler.setFormatter(logging.Formatter('%(levelname)-7s [%(name)s] %(message)s'))
_log.addHandler(_handler)


# Constants
# ---------

DB_KEY = '_db'              # in YAML file
ALIASES_KEY = '_aliases'    # in YAML file

PATTERN_KEY_PREFIX_IGNORE = '_'


# Exceptions
# ----------


class FunctionError(Exception):
    """Error with a top-level function.
    """
    def __init__(self, command, why):
        msg = "In '{}' command: {}".format(command, why)
        Exception.__init__(self, msg)


class ArgumentError(Exception):
    """Command-line arg. error
    """

    def __init__(self, msg):
        Exception.__init__(self, msg)
        self.msg = msg


# Utility functions
# -----------------


def wstrip(s):
    return s.strip()


def textlist(items):
    return ', '.join(items)


def mapdump(d):
    return textlist(('{}={}'.format(k, v) for k, v in d.iteritems()))


def nvpstr(s):
    """Return tuple (name, value) from string in form 'name=value'.
    Used as type for argparse.

    :raise: ValueError if input string is invalid
    """
    p = s.find('=')
    if p < 0:
        raise ValueError('expected "name=value", got "{}"'.format(s))
    nvp = (s[:p].strip(), s[p + 1:].strip())
    #print("NVP: {}".format(nvp))
    return nvp


def _parse_email(s):
    """Parse an email specification into component parts.

    Keys:
        - sender
        - recipients
        - host
        - port

    :return: Parsed email information
    :rtype: dict
    """
    d = {}
    # parse command-line args
    items = s.split(':')
    if len(items) < 2:
        raise ValueError('from:to required in "{}"'.format(s))
    if len(items) > 4:
        raise ValueError('too many values in "{}"'.format(s))
    d['sender'] = items[0]
    d['recipients'] = [x.strip() for x in items[1].split(',')]
    if len(items) > 2:
        d['server'] = items[2]
    if len(items) > 3:
        if '/' in items[3]:
            p1, p2 = items[3].split('/')
            d['port'], d['subject'] = int(p1), p2
        else:
            d['port'] = int(items[3])
    return d


def _config_email(fname):
    """Get an email spec. from a config file.

    :return: Parsed email information
    :rtype: dict
    """
    d = {}
    try:
        f = open(fname)
    except OSError:
        raise ValueError('cannot read from file "{}"'.format(fname))
    y = yaml.safe_load(f)
    info = y.get('_email', None)
    if info is None:
        return None
    try:
        d['sender'] = info['from']
        d['recipients'] = info['to']
        if isinstance(d['recipients'], str):
            d['recipients'] = [d['recipients']]
    except KeyError:
        raise ValueError('from/to not found in file {} mapping key "_email"'
                         .format(fname))
    if 'port' in info:
        d['port'] = int(info['port'])
    if 'host' in info:
        d['host'] = info['host']
    if 'subject' in info:
        d['subject'] = info['subject']
    return d


def _splitup(s, sep, n1, n2):
    parts = s.split(sep)
    n = len(parts)
    if not (n1 <= n <= n2):
        raise ValueError("Wrong number of '{}'-separated parts: {:d}".format(sep, n))
    return parts


def parse_email_args(msg, server):
    mp = _splitup(msg, '/', 2, 3)
    sp = _splitup(server, ':', 1, 2)
    return {"sender": mp[0],
            "recipients": [x.strip() for x in mp[1].split(',')],
            "subject": "" if len(mp) < 3 else mp[2],
            "server": sp[0],
            "port": int(sp[1]) if len(sp) > 1 else smtplib.SMTP_PORT}


def _dbinfo(cfg):
    return "{host}/{collection}.{database}".format(**cfg)


# Subcommands
# -----------


def command_diff(args):
    """Sub-command to calculate the difference of 2 collections.
    """
    # Check that conf files exist
    try:
        map(open, (args.old, args.new))
    except IOError as err:
        raise FunctionError("diff", "Cannot open configuration file: {}".format(err))

    # Parse query/filter, if provided
    fltr = {}
    if args.fltr is not None:
        try:
            fltr = to_mongo(args.fltr)
        except BadExpression as err:
            raise FunctionError("diff", "Bad query filter: {}".format(err.details))
        _log.info("Filter (original => mongo): {} => {}".format(args.fltr, fltr))

    # Parse numeric change fields, if given
    deltas = {}
    if args.numprops:
        for field, expr in args.numprops.iteritems():
            try:
                deltas[field] = diff.Delta(expr)
            except ValueError as err:
                raise FunctionError("diff", "Bad numeric expr for field '{}': {}"
                                    .format(field, err))

    # Instantiate main class.
    df = diff.Differ(key=args.key, info=args.info, props=args.properties, fltr=fltr, deltas=deltas)

    # Run diff.
    t0 = time.time()
    try:
        r = df.diff(args.old, args.new, only_missing=args.missonly, only_values=args.changeonly)
    except Exception as err:
        if _log.getEffectiveLevel() in (logging.DEBUG,):
            exc_str = traceback.format_exc()
            err = "{}\nTraceback:\n{}".format(err, exc_str)
        raise FunctionError("diff", "Runtime error: {}".format(err))
    t1 = time.time()

    # Build report.
    stime = lambda t: time.strftime("%Y-%m-%d %H:%M", time.localtime(t))
    meta = {'start_time': stime(t0), 'end_time': stime(t1),
            'elapsed': t1 - t0,
            'filter': fltr,
            'db1': _dbinfo(get_settings(args.old)),
            'db2': _dbinfo(get_settings(args.new)),
            'args': {k: v for k, v in vars(args).iteritems()
                     if not k.startswith("func")}
    }
    meta.update(system_metadata())
    fmt_kwargs = dict(key=args.key)
    if args.format == "html":
        fmt_kwargs['url'] = args.rest_url
        fmt_kwargs['email_mode'] = args.rpt_email

    make_report = lambda f: getattr(report, "Diff{}Formatter".format(f.title()))(meta, **fmt_kwargs).format(r)

    # Email, print, and/or db-insert a report
    # (a) email
    if args.rpt_email:
        kw = parse_email_args(args.rpt_email, args.email_server)
        emailer = report.Emailer(**kw)
        fmt = args.format or "html"
        text = make_report(fmt)
        emailer.send(text, ("text/plain", "text/html")[fmt == "html"])
    # (b) print
    if args.rpt_print or (not args.rpt_email and not args.rpt_print and not args.rpt_db):
        if args.format == "json":
            fmt_kwargs['pretty'] = True
        text = make_report(args.format or "text")
        print(text)
    # (c) DB
    if args.rpt_db:
        try:
            coll = get_collection(args.rpt_db, admin=True)
        except Exception as err:
            raise FunctionError("diff", "Connecting to report DB: {}".format(err))
        try:
            #coll.database.add_son_manipulator(report.DiffJsonFormatter.Manipulator())
            doc = report.DiffJsonFormatter(meta).document(r)
            coll.insert(doc)
        except pymongo.errors.OperationFailure as err:
            raise FunctionError("diff", "Inserting to report DB: {}".format(err))

    return 0


def system_metadata():
    """Return some simple system metadata.
    """
    try:
        user_name = os.getlogin()
    except OSError:
        user_name = 'unknown'
    host_name = socket.gethostname()
    cmdline = " ".join(sys.argv)
    return {'user': user_name,
            'host': host_name,
            "cmdline": cmdline}


def command_validate(args, formatters):
    """Run validation command.
    """
    retcode = 0  # return code
    # Connect and authenticate
    fname = args.config_file if args.config_file else "db.json"
    try:
        config = get_settings(args.config_file)
    except ValueError, err:
        raise ArgumentError('Cannot parse configuration "{}": {}'.format(fname, err))
    try:
        conn = pymongo.MongoClient(config['host'], config['port'])
    except pymongo.errors.ConnectionFailure, err:
        raise ArgumentError('Cannot connect to server: {}'.format(err))
    db_key = 'database'
    if not db_key in config:
        raise ArgumentError('Configuration in "{}" is missing "{}"'.format(fname, db_key))
    database_name = config[db_key]
    db = conn[database_name]
    user, passwd = None, None
    # try both readonly and admin credentials
    for utype in 'readonly', 'admin':
        if (utype + '_user') in config:
            try:
                user, passwd = config[utype + '_user'], config[utype + '_password']
            except KeyError:
                raise ArgumentError('Configuration error in file {0}: {1}_user but no {1}_password'
                                    .format(args.config_file, utype))
    if user and passwd:
        if not db.authenticate(user, passwd):
            raise ArgumentError('Cannot authenticate to database as user {}'.format(user))
    else:
        _log.info('Connecting to DB "{}" without authentication'.format(config[db_key]))
        # get collection name if given
    if 'collection' in config:
        db_coll_name = config['collection']
    else:
        db_coll_name = None

    # Init constraints from file or command-line
    constraints_from_file = args.constfile is not None
    if constraints_from_file:
        try:
            f = open(args.constfile, 'rb')
        except IOError:
            if not os.path.exists(args.constfile):
                raise ArgumentError('constraints file "{}" not found'.format(args.constfile))
            else:
                raise ArgumentError('constraints file "{}" found, but unreadable'.format(args.constfile))
        with Timing('load.yaml', file=f.name):
            constraints = yaml.safe_load(f)
    else:
        coll_name = args.coll
        if coll_name is None:
            if db_coll_name is None:
                raise ArgumentError('collection name not found')
            else:
                coll_name = db_coll_name
        if not args.constraint:
            raise ArgumentError('at least one constraint is required')
            # re-split command-line arguments on commas (not whitespace)
        plist = ' '.join(args.constraint).split(',')
        # make dict(key=collection) of list of the constraints, just like YAML config file
        constraints = {coll_name: [plist]}
        _log.debug("command-line constraints dump: {}".format(constraints))

    # Init aliases from file or command-line
    aliases = {}
    if constraints_from_file:
        astr = ""
        try:
            astr = constraints.get(ALIASES_KEY, {})
            aliases = dict(map(nvpstr, astr))
        except ValueError, err:
            raise ArgumentError('aliases should be list of name=value, got "{}": {}'.format(astr, err))
        except AttributeError:
            if constraints is None:
                raise ArgumentError('constraints file "{}" is empty'.format(args.constfile))
    elif len(args.aliases) > 0:
        aliases = dict(args.aliases)

    # Init email from file or command-line
    emailer, email_kw = None, None
    if args.email is not None:
        email_kw = _parse_email(args.email)
    elif args.constfile:
        email_kw = _config_email(args.constfile)
    if email_kw is not None:
        _kwd = {'subject': 'MP Validation Report'}
        _kwd.update(email_kw)
        _log.info("Will email report to: {}".format(mapdump(_kwd)))
        emailer = report.Emailer(**_kwd)

    # Initialize report
    sys_meta = system_metadata()
    rpt_hdr = report.ReportHeader("Materials Project Validation Report")
    rpt_hdr.add('Report time', time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime()))
    rpt_hdr.add('Report user', sys_meta['user'])
    rpt_hdr.add('Report host', sys_meta['host'])
    rpt_hdr.add('Database', database_name)
    rpt_hdr.add('Limit', args.limit)
    rpt = report.Report(rpt_hdr)
    # pick report formatter
    fmt = args.report_format.lower()
    formatter_class = formatters.get(fmt, None)
    if formatter_class is None:
        raise ArgumentError('Unknown format "{}" for --format, choose from: {}'
                            .format(fmt, textlist(formatters.keys())))

    # Run validation for each collection
    validator = Validator(aliases=aliases, max_violations=args.limit, max_dberrors=10, add_exists=args.must_exist)
    if args.progress > 0:
        validator.set_progress(args.progress)
    elapsed = ElapsedTime()
    with Timing("validate", log=_log, elapsed=elapsed):
        for coll_name, constraint_spec_cfg in constraints.iteritems():
            _log.debug("validate {}".format(coll_name))
            sect_hdr = report.SectionHeader(title='Collection "{}"'.format(coll_name))
            rpt_sect = report.ReportSection(sect_hdr)
            if coll_name.startswith(PATTERN_KEY_PREFIX_IGNORE):
                continue
            coll = db[coll_name]
            try:
                try:
                    cspec = ConstraintSpec(constraint_spec_cfg)
                except ValueError, err:
                    _log.error('processing constraints for {}: {}'.format(coll_name, err))
                    break
                    #sect = get_constraint_sections(constraint_spec)
                vsect = 0
                for vnum, vgroup in enumerate(validator.validate(coll, cspec, subject=coll_name)):
                    if len(vgroup) == 0:
                        continue
                    vsect += 1
                    vletter = letter_num(vsect)
                    sect_hdr = report.SectionHeader(title='Constraint Violations {}'.format(vletter))
                    _log.debug('Collection "{}": {:d} violations'.format(coll_name, len(vgroup)))
                    sect_hdr.add('Condition', str(vgroup.condition))
                    table = report.Table(colnames=('Id', 'TaskId', 'Field', 'Constraint', 'Value'))
                    for viol, vrec in vgroup:
                        rec_id = vrec['_id']
                        task_id = vrec['task_id']
                        if isinstance(viol.expected_value, type):
                            viol.expected_value = viol.expected_value.__name__
                        table.add((rec_id, task_id, viol.field,
                                   '{} {}'.format(viol.op, viol.expected_value),
                                   viol.got_value))
                    table.sortby('Id')
                    rpt_sect.add_section(report.ReportSection(sect_hdr, table))
            except DBError, err:
                _log.error('validating collecton {}: DB error: {}'.format(coll_name, err))
                break
            except ValueError, err:
                _log.error('validating collecton {}: {}'.format(coll_name, err))
                break
            rpt.add_section(rpt_sect)
    rpt.header.add('Elapsed time', '{:.2f}s'.format(elapsed.value))
    _log.debug('Run time: {:.2f}'.format(elapsed.value))

    # Report results.
    if not args.send_on_empty and rpt.is_empty():
        _log.warn("Report is empty, skipping output")
        retcode = 1
    else:
        formatter = formatter_class()
        text = formatter.format(rpt)
        if emailer:
            msgfmt = 'text/plain'
            for mt in ((report.HTMLFormatter, 'text/html'),
                       (report.JSONFormatter, 'text/plain')):
                if isinstance(formatter, mt[0]):
                    msgfmt = mt[1]
                    break
            n = emailer.send(text, fmt=msgfmt)
            if n < 1:
                _log.error("Email report not sent")
        else:
            print(text)

    return retcode

# Main
# ----


def main(cmdline=None):
    """Main program entry point.

    :param cmdline: command-line
    :return: Status, 0=ok otherwise an error
    :rtype : int
    """
    if cmdline is None:
        cmdline = sys.argv[1:]
    desc = ' '.join(__doc__.split())
    formatters = dict(json=report.JSONFormatter, html=report.HTMLFormatter,
                      md=report.MarkdownFormatter)

    # Configure parent parser for shared args.
    parent_parser = argparse.ArgumentParser(add_help=False)
    parent_parser.add_argument('--verbose', '-v', dest='vb', action="count", default=0,
                               help="Print more verbose messages to standard error. Repeatable. (default=ERROR)")

    parser = argparse.ArgumentParser(description=desc, parents=[parent_parser])
    subparsers = parser.add_subparsers(description="Actions")

    # Validation command.
    subp = subparsers.add_parser("validate", help="Validate a collection", parents=[parent_parser])
    subp.set_defaults(func=command_validate, func_args=(formatters,))
    subp.add_argument('constraint', nargs='*')
    subp.add_argument('--alias', '-a', dest='aliases', action='append', type=nvpstr, metavar="ALIAS", default=[],
                      help='Alias for a field used in a constraint or condition, in form alias=name_in_db')
    subp.add_argument('--config', '-c', dest='config_file', metavar='FILE', default=None,
                      help="Configuration file for database connection. "
                           "Generate one using mgdb init --config filename.json, if necessary. "
                           "Otherwise, the code searches for a db.json. If "
                           "none is found, a no-authentication "
                           "localhost:27017/vasp database is assumed.")
    subp.add_argument('--collection', '-C', dest='coll', metavar='NAME', default=None,
                      help='Collection name for constraints given on command-line')
    subp.add_argument('--email', '-e', dest='email', default=None,
                      help='Email report to EMAIL at {from}:{to}[:host[:port[/subject]]].'
                           'The "from" and "to" are required; default host is localhost. '
                           'This information can also be in the main config file under the "_email" key, '
                           'which should be a mapping with keys "from", "to", "host", "port", and "subject".')
    subp.add_argument('--empty', dest='send_on_empty', action='store_true',
                      help="Send a report, even if it is empty (default=print warning and don't send)")
    subp.add_argument('--exonly', dest='must_exist', action='store_true', default=False,
                      help='Only show results where all fields in the constraints are also present in the record')
    subp.add_argument('--file', '-f', dest='constfile', metavar='FILE', default=None,
                      help='Main configuration file. Has constraints, and optionally email info.')
    subp.add_argument('--format', '-F', dest='report_format', metavar='FORMAT', default='html',
                      help='Report format: {} (default=html)'.format(', '.join(formatters.keys())))
    subp.add_argument('--limit', '-m', dest='limit', metavar='NUM', type=int, default=50,
                      help='Limit number of displayed constraint violations per-collection 0=no limit (50)')
    subp.add_argument('--progress', '-p', dest='progress', metavar='NUM', type=int, default=0,
                      help='Report progress every NUM invalid records found')
    subp.add_argument('--user', '-u', dest='user', metavar='NAME', default=None,
                      help='User name, for the report')

    # Diff command.
    subp = subparsers.add_parser("diff", help="Show difference in two collections", parents=[parent_parser])
    subp.set_defaults(func=command_diff, func_args=())
    subp.add_argument("-D", "--db", dest="rpt_db", default=None, metavar="CONFIG",
                      help="Record a JSON record of the report in the MongoDB collection configured by CONFIG, "
                           "which is a standard pymatgen-db configuration file.")
    subp.add_argument("-E", "--email", dest='rpt_email', default=None, metavar="ADDR",
                      help="Email report to ADDR, which is of the form:  "
                           "sender/receiver,[receiver2...][/subject].")
    subp.add_argument('-f', '--file', action=YamlConfig, metavar='FILE',
                      help="Read configuration from FILE instead of command line. "
                           "File is YAML (or JSON, a subset), with long option names as keys. Any "
                           "time the option takes a comma-separated list, the config file uses a real list, "
                           "and key/value pair lists are mappings. "
                           "e.g. --numeric='x=+-1.5,y=+-0.5' becomes {.., 'numeric':  ['x':'+-1.5', 'y':'+-0.5'] } "
                           "and --info=foo,bar becomes '{ .., info: ['foo', 'bar'] }'.")
    subp.add_argument("-F", "--format", dest="format", metavar="FORMAT", default=None,
                      help="Default report format: 'text', 'html', or 'json'. If not given, the format will "
                           "be determined by the output: text for console, html for email.",
                      choices=["text", "html", "json"])
    subp.add_argument("-s", "--email-server", dest="email_server", default="localhost", metavar="HOST",
                      help="Server HOST for an email report, in form hostname[:port]. Default is localhost")
    subp.add_argument("-i", "--info", help="Extra fields for records, as comma-separated list"
                                           ", e.g 'extra,fields,to_include'", dest="info",
                      default=None, type=args_list)
    subp.add_argument("-k", "--key", help="Key for matching records (default='key')", dest="key",
                      default="key")
    subp.add_argument("-m", "--missing", help="Only report keys that are in the 'old' collection, but "
                                              "not in the 'new' collection", action="store_true", dest="missonly")
    subp.add_argument("-n", "--numeric", dest="numprops", default=None, metavar="EXPR", type=args_kvp_nodup,
                      help="Fields with numeric values that must match, with a tolerance, "
                           "as a comma-separated list. "
                           "e.g., '<name1>=<expr1>, <name2>=<expr2>, ..'. "
                           "<name> is a field name, <expr> syntax is: '+-' (sign), '+-N', '+-N%%', "
                           "where N is a number. "
                           "Adding '=' (must go before '%%') compares >=, <= instead of >, <. "
                           "The '%%' means a percentage relative to the first collection's value.")
    subp.add_argument("-p", "--properties", help="Fields with properties that must match, as comma-separated list "
                                           ", e.g 'these_must,match'", dest="properties", default=None,
                      type=args_list)
    subp.add_argument("-P", "--print", help="Print report to the console", action="store_true", dest="rpt_print")
    subp.add_argument("-q", "--query", metavar="EXPR", help="Expression to filter records before diff. "
                                                            "Uses simplified constraint syntax, e.g., "
                                                            "'name = \"oscar\" and grouchiness > 3'",
                      dest="fltr")
    subp.add_argument("-u", "--url", metavar="URL", dest="rest_url",
                      help="In HTML reports, make the key into a hyperlink by prefixing with URL. "
                           "e.g., 'https://materialsproject.org/tasks/'.")
    subp.add_argument("-V", "--values", dest="changeonly", action="store_true",
                      help="Only report changes in values, not missing or added keys")
    subp.add_argument("old", help="pymatgen-db JSON config file for the 'old' collection")
    subp.add_argument("new", help="pymatgen-db JSON config file for the 'new' collection")

    args = parser.parse_args(cmdline)

    # Do some extra arg. checking for diff command, due to YAML config option.
    if hasattr(args, 'func') and args.func == command_diff:
        # Manually check types on some args
        for name, type_, opt in (("info", list, "-i/--info"), ("properties", list, "-p/--properties"),
                                 ("numprops", dict, "-n/--numeric")):
            val = getattr(args, name)
            if val is None:
                continue
            if not isinstance(val, type_):
                parser.error("Wrong type for {} argument, '{}' is not a {}"
                             .format(opt, val, type_.__name__))

    # Set logging level.
    verbosity = (logging.ERROR, logging.INFO, logging.DEBUG)[min(args.vb, 2)]
    _log.setLevel(verbosity)

    # Run subcommand.
    if hasattr(args, 'func'):
        try:
            return args.func(args, *args.func_args)
        except ArgumentError as err:
            parser.error(err.msg)
        except FunctionError as err:
            _log.critical("{}".format(err))
            return -1


if __name__ == '__main__':
    sys.exit(main())
