#!/usr/bin/env python
# -*- coding: utf8 -*-
"""
Run validation tests of (MongoDB) databases.
Database connections are configured from a file
and the constraints to validate against can come from a file or the command-line.
"""
__author__ = 'Dan Gunter <dkgunter@lbl.gov>'
__date__ = '3/29/13'

import argparse
import logging
import os
import pymongo
import smtplib
import socket
import sys
import time
import traceback
import yaml
# local module
from matgendb.util import get_settings
from matgendb.vv.validate import ConstraintSpec, Validator, DBError
from matgendb.vv import report
from matgendb.vv.util import Timing, ElapsedTime, letter_num
from matgendb.vv import diff
#from matgendb.vv import mquery
from smoqe.query import to_mongo, BadExpression

# Initialize module logging
_log_name = 'mg'
_log = logging.getLogger(_log_name)
_handler = logging.StreamHandler()
_handler.setFormatter(logging.Formatter('%(levelname)-7s [%(name)s] %(message)s'))
_log.addHandler(_handler)

## Constants

DB_KEY = '_db'              # in YAML file
ALIASES_KEY = '_aliases'    # in YAML file

PATTERN_KEY_PREFIX_IGNORE = '_'

## Exceptions


class FunctionError(Exception):
    """Error with a top-level function.
    """
    def __init__(self, command, why):
        msg = "In '{}' command: {}".format(command, why)
        Exception.__init__(self, msg)

## Functions


def wstrip(s):
    return s.strip()


def textlist(items):
    return ', '.join(items)

def mapdump(d):
    return textlist(('{}={}'.format(k, v) for k, v in d.iteritems()))

def nvpstr(s):
    """Return tuple (name, value) from string in form 'name=value'.
    Used as type for argparse.

    :raise: ValueError if input string is invalid
    """
    p = s.find('=')
    if p < 0:
        raise ValueError('expected "name=value", got "{}"'.format(s))
    nvp = (s[:p].strip(), s[p + 1:].strip())
    #print("NVP: {}".format(nvp))
    return nvp


def _parse_email(s):
    """Parse an email specification into component parts.

    Keys:
        - sender
        - recipients
        - host
        - port

    :return: Parsed email information
    :rtype: dict
    """
    d = {}
    # parse command-line args
    items = s.split(':')
    if len(items) < 2:
        raise ValueError('from:to required in "{}"'.format(s))
    if len(items) > 4:
        raise ValueError('too many values in "{}"'.format(s))
    d['sender'] = items[0]
    d['recipients'] = [x.strip() for x in items[1].split(',')]
    if len(items) > 2:
        d['server'] = items[2]
    if len(items) > 3:
        if '/' in items[3]:
            p1, p2 = items[3].split('/')
            d['port'], d['subject'] = int(p1), p2
        else:
            d['port'] = int(items[3])
    return d


def _config_email(fname):
    """Get an email spec. from a config file.

    :return: Parsed email information
    :rtype: dict
    """
    d = {}
    try:
        f = open(fname)
    except OSError:
        raise ValueError('cannot read from file "{}"'.format(fname))
    y = yaml.safe_load(f)
    info = y.get('_email', None)
    if info is None:
        return None
    try:
        d['sender'] = info['from']
        d['recipients'] = info['to']
        if isinstance(d['recipients'], str):
            d['recipients'] = [d['recipients']]
    except KeyError, err:
        raise ValueError('from/to not found in file {} mapping key "_email"'
        .format(fname))
    if 'port' in info:
        d['port'] = int(info['port'])
    if 'host' in info:
        d['host'] = info['host']
    if 'subject' in info:
        d['subject'] = info['subject']
    return d


# --------
# Commands
# --------

def _splitup(s, sep, n1, n2):
    parts = s.split(sep)
    n = len(parts)
    if not (n1 <= n <= n2):
        raise ValueError("Wrong number of '{}'-separated parts: {:d}".format(sep, n))
    return parts


def parse_email_args(msg, server):
    mp = _splitup(msg, '/', 2, 3)
    sp = _splitup(server, ':', 1, 2)
    return {"sender": mp[0],
            "recipients": [x.strip() for x in mp[1].split(',')],
            "subject": "" if len(mp) < 3 else mp[2],
            "host": sp[0],
            "port": int(sp[1]) if len(sp) > 1 else smtplib.SMTP_PORT}


def _dbinfo(cfg):
    return "{host}/{collection}.{database}".format(**cfg)


def command_diff(args):
    """Sub-command to calculate the difference of 2 collections.
    """
    # Check that conf files exist
    try:
        map(open, (args.old, args.new))
    except IOError as err:
        raise FunctionError("diff", "Cannot open configuration file: {}".format(err))

    # Parse info arg.
    info, props = [], []
    if args.info is not None:
        info = [s.strip() for s in args.info.split(',')]
    if args.props is not None:
        props = [s.strip() for s in args.props.split(',')]

    # Parse query/filter, if provided
    fltr = {}
    if args.fltr is not None:
        try:
            fltr = to_mongo(args.fltr)
        except BadExpression as err:
            raise FunctionError("diff", "Bad query filter: {}".format(err.details))
        _log.info("Filter (original => mongo): {} => {}".format(args.fltr, fltr))

    # Parse numeric change fields, if given
    deltas, numprops = {}, []
    if args.numprops:
        numprops = [e.strip() for e in args.numprops.split(",")]
    for np in numprops:
        try:
            field, expr = np.split("=", 1)
        except ValueError:
            raise FunctionError("diff", "numeric expression '{}' not in form 'field=expr'"
                                .format(np))
        if field in deltas:
            raise FunctionError("diff", "Only one numeric expression allowed per field, '{}' has "
                                "at least 2: '{}', '{}'".format(field, deltas[field], expr))
        try:
            deltas[field] = diff.Delta(expr)
        except ValueError as err:
            raise FunctionError("diff", "Bad numeric expr for field '{}': {}".format(field, err))

    # Instantiate main class.
    df = diff.Differ(key=args.key, info=info, props=props, fltr=fltr, deltas=deltas)

    # Run diff.
    t0 = time.time()
    try:
        r = df.diff(args.old, args.new, only_missing=args.missonly)
    except Exception as err:
        if _log.getEffectiveLevel() in (logging.DEBUG,):
	    exc_str = traceback.format_exc()
	    err = "{}\nTraceback:\n{}".format(err, exc_str)
        raise FunctionError("diff", "Runtime error: {}".format(err))
    t1 = time.time()

    # Build report.
    stime = lambda t: time.strftime("%Y-%m-%d %H:%M", time.localtime(t))
    meta = {'start_time': stime(t0), 'end_time': stime(t1),
            'elapsed': t1 - t0,
            'filter': fltr,
            'db1': _dbinfo(get_settings(args.old)),
            'db2': _dbinfo(get_settings(args.new))}
    if args.format is None:
        args.format = ("text", "html")[bool(args.email)]
    as_html = args.format == "html"
    fmt_class = getattr(report, ("DiffTextFormatter", "DiffHtmlFormatter")[as_html])
    text = fmt_class(meta).format(r)

    # Print or send report.
    if args.email:
        kw = parse_email_args(args.email, args.email_server)
        emailer = report.Emailer(**kw)
        emailer.send(text, ("text/plain", "text/html")[as_html])
    else:
        print(text)

    return 0

# ------------
# Main program
# ------------


def main(cmdline=None):
    """Main program entry point.

    :param cmdline: command-line
    :return: Status, 0=ok otherwise an error
    :rtype : int
    """
    retcode = 0

    if cmdline is None:
        cmdline = sys.argv[1:]
    desc = ' '.join(__doc__.split())
    formatters = dict(json=report.JSONFormatter, html=report.HTMLFormatter,
                      md=report.MarkdownFormatter)

    # Configure parent parser for shared args.
    parent_parser = argparse.ArgumentParser(add_help=False)
    parent_parser.add_argument('--verbose', '-v', dest='vb', action="count", default=0,
                               help="Print more verbose messages to standard error. Repeatable. (default=ERROR)")

    # Options for main command.
    parser = argparse.ArgumentParser(description=desc, parents=[parent_parser])
    parser.add_argument('constraint', nargs='*')
    parser.add_argument('--alias', '-a', dest='aliases', action='append', type=nvpstr, metavar="ALIAS", default=[],
                        help='Alias for a field used in a constraint or condition, in form alias=name_in_db')
    parser.add_argument('--config', '-c', dest='config_file', metavar='FILE', default=None,
                        help="Configuration file for database connection. "
                             "Generate one using mgdb init --config filename.json, if necessary. "
                             "Otherwise, the code searches for a db.json. If "
                             "none is found, a no-authentication "
                             "localhost:27017/vasp database is assumed.")
    parser.add_argument('--collection', '-C', dest='coll', metavar='NAME', default=None,
                        help='Collection name for constraints given on command-line')
    parser.add_argument('--email', '-e', dest='email', default=None,
                        help='Email report to EMAIL at {from}:{to}[:host[:port[/subject]]].'
                             'The "from" and "to" are required; default host is localhost. '
                             'This information can also be in the main config file under the "_email" key, '
                             'which should be a mapping with keys "from", "to", "host", "port", and "subject".')
    parser.add_argument('--empty', dest='send_on_empty', action='store_true',
                        help="Send a report, even if it is empty (default=print warning and don't send)")
    parser.add_argument('--exonly', dest='must_exist', action='store_true', default=False,
                        help='Only show results where all fields in the constraints are also present in the record')
    parser.add_argument('--file', '-f', dest='constfile', metavar='FILE', default=None,
                        help='Main configuration file. Has constraints, and optionally email info.')
    parser.add_argument('--format', '-F', dest='report_format', metavar='FORMAT', default='html',
                        help='Report format: {} (default=html)'.format(', '.join(formatters.keys())))
    parser.add_argument('--limit', '-m', dest='limit', metavar='NUM', type=int, default=50,
                        help='Limit number of displayed constraint violations per-collection 0=no limit (50)')
    parser.add_argument('--progress', '-p', dest='progress', metavar='NUM', type=int, default=0,
                        help='Report progress every NUM invalid records found')
    parser.add_argument('--user', '-u', dest='user', metavar='NAME', default=None,
                        help='User name, for the report')

    # Diff command.
    subparsers = parser.add_subparsers(description="Other actions")
    subp = subparsers.add_parser("diff", help="Show difference in two collections", parents=[parent_parser])
    subp.set_defaults(func=command_diff)
    subp.add_argument("-e", "--email", dest='email', default=None, metavar="ADDR",
                      help="Email report, instead of printing it to standard output. "
                           "ADDR is of the form: sender/receiver,[receiver2...][/subject].")
    subp.add_argument("-f", "--format", dest="format", metavar="FORMAT", default=None,
                      help="Report format: 'text' (default for screen) or 'html' (default for email)",
                      choices=["text", "html"])
    subp.add_argument("-s", "--email-server", dest="email_server", default="localhost", metavar="HOST",
                      help="Server HOST for an email report, in form hostname[:port]. Default is localhost")
    subp.add_argument("-i", "--info", help="Extra fields for records, as comma-separated list"
                                           ", e.g 'extra,fields,to_include'", dest="info", default=None)
    subp.add_argument("-k", "--key", help="Key for matching records", dest="key", required=True)
    subp.add_argument("-m", "--missing", help="Only report keys that are in the 'old' collection, but "
                                              "not in the 'new' collection", action="store_true", dest="missonly")
    subp.add_argument("-n", "--numeric", dest="numprops", default=None, metavar="EXPR",
                      help="Fields with numeric values that must match, with a tolerance, "
                           "as a comma-separated list. "
                           "e.g., '<name1>=<expr1>, <name2>=<expr2>, ..'. "
                           "<name> is a field name, <expr> syntax is: '+-' (sign), '+-N', '+-N%%', "
                           "where N is a number. "
                           "Adding '=' (must go before '%%') compares >=, <= instead of >, <. "
                           "The '%%' means a percentage relative to the first collection's value.")
    subp.add_argument("-p", "--prop", help="Fields with properties that must match, as comma-separated list "
                                           ", e.g 'these_must,match'", dest="props", default=None)
    subp.add_argument("-q", "--query", metavar="EXPR", help="Expression to filter records before diff. "
                                                            "Uses simplified constraint syntax, e.g., "
                                                            "'name = \"oscar\" and grouchiness > 3'",
                      dest="fltr")
    subp.add_argument("old", help="pymatgen-db JSON config file for the 'old' collection")
    subp.add_argument("new", help="pymatgen-db JSON config file for the 'new' collection")

    args = parser.parse_args(cmdline)

    # Set logging level.
    verbosity = (logging.ERROR, logging.INFO, logging.DEBUG)[min(args.vb, 2)]
    _log.setLevel(verbosity)

    # Handle sub-commands.
    if hasattr(args, 'func'):
        try:
            return args.func(args)
        except FunctionError as err:
            _log.critical("{}".format(err))
            return -1

    # Connect and authenticate
    conn, config = None, None
    fname = args.config_file if args.config_file else "db.json"
    try:
        config = get_settings(args.config_file)
    except ValueError, err:
        parser.error('Cannot parse configuration "{}": {}'.format(fname, err))
    try:
        conn = pymongo.MongoClient(config['host'], config['port'])
    except pymongo.errors.ConnectionFailure, err:
        parser.error('Cannot connect to server: {}'.format(err))
    db_key = 'database'
    if not db_key in config:
        parser.error('Configuration in "{}" is missing "{}"'.format(fname, db_key))
    db = conn[config[db_key]]
    user, passwd = None, None
    # try both readonly and admin credentials
    for utype in 'readonly', 'admin':
        if (utype + '_user') in config:
            try:
                user, passwd = config[utype + '_user'], config[utype + '_password']
            except KeyError:
                parser.error('Configuration error in file {0}: {1}_user but no {1}_password'
                             .format(args.config_file, utype))
    if user and passwd:
        if not db.authenticate(user, passwd):
            parser.error('Cannot authenticate to database as user {}'.format(user))
    else:
        _log.info('Connecting to DB "{}" without authentication'.format(config[db_key]))
        # get collection name if given
    if 'collection' in config:
        db_coll_name = config['collection']
    else:
        db_coll_name = None

    # Init constraints from file or command-line
    constraints_from_file = args.constfile is not None
    if constraints_from_file:
        try:
            f = open(args.constfile, 'rb')
        except IOError:
            if not os.path.exists(args.constfile):
                parser.error('constraints file "{}" not found'.format(args.constfile))
            else:
                parser.error('constraints file "{}" found, but unreadable'.format(args.constfile))
        with Timing('load.yaml', file=f.name):
            constraints = yaml.safe_load(f)
    else:
        coll_name = args.coll
        if coll_name is None:
            if db_coll_name is None:
                parser.error('collection name not found')
            else:
                coll_name = db_coll_name
        if not args.constraint:
            parser.error('at least one constraint is required')
            # re-split command-line arguments on commas (not whitespace)
        plist = ' '.join(args.constraint).split(',')
        # make dict(key=collection) of list of the constraints, just like YAML config file
        constraints = {coll_name: [plist]}
        _log.debug("command-line constraints dump: {}".format(constraints))

    # Init aliases from file or command-line
    if constraints_from_file:
        try:
            astr = constraints.get(ALIASES_KEY, {})
            aliases = dict(map(nvpstr, astr))
        except ValueError, err:
            parser.error('aliases should be list of name=value, got "{}": {}'.format(astr, err))
        except AttributeError:
            if constraints is None:
                parser.error('constraints file "{}" is empty'.format(args.constfile))
    elif len(args.aliases) > 0:
        aliases = dict(args.aliases)
    else:
        aliases = {}

    # Init email from file or command-line
    emailer, email_kw = None, None
    if args.email is not None:
        email_kw = _parse_email(args.email)
    elif args.constfile:
        email_kw = _config_email(args.constfile)
    if email_kw is not None:
        _kwd = {'subject': 'MP Validation Report'}
        _kwd.update(email_kw)
        _log.info("Will email report to: {}".format(mapdump(_kwd)))
        emailer = report.Emailer(**_kwd)

    # Initialize report
    try:
        user_name = os.getlogin()
    except OSError:
        user_name = args.user or 'unknown'
    rpt_hdr = report.ReportHeader("Materials Project Validation Report")
    rpt_hdr.add('Report time', time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime()))
    rpt_hdr.add('Report user', user_name)
    rpt_hdr.add('Report host', socket.gethostname())
    rpt_hdr.add('Limit', args.limit)
    rpt = report.Report(rpt_hdr)
    # pick report formatter
    fmt = args.report_format.lower()
    formatter_class = formatters.get(fmt, None)
    if formatter_class is None:
        parser.error('Unknown format "{}" for --format, choose from: {}'
        .format(fmt, textlist(formatters.keys())))

    # Run validation for each collection
    validator = Validator(aliases=aliases, max_violations=args.limit, max_dberrors=10, add_exists=args.must_exist)
    if args.progress > 0:
        validator.set_progress(args.progress)
    elapsed = ElapsedTime()
    with Timing("validate", log=_log, elapsed=elapsed):
        for coll_name, constraint_spec_cfg in constraints.iteritems():
            _log.debug("validate {}".format(coll_name))
            sect_hdr = report.SectionHeader(title='Collection "{}"'.format(coll_name))
            rpt_sect = report.ReportSection(sect_hdr)
            if coll_name.startswith(PATTERN_KEY_PREFIX_IGNORE):
                continue
            coll = db[coll_name]
            try:
                try:
                    cspec = ConstraintSpec(constraint_spec_cfg)
                except ValueError, err:
                    _log.error('processing constraints for {}: {}'.format(coll_name, err))
                    break
                    #sect = get_constraint_sections(constraint_spec)
                vsect = 0
                for vnum, vgroup in enumerate(validator.validate(coll, cspec, subject=coll_name)):
                    if len(vgroup) == 0:
                        continue
                    vsect += 1
                    vletter = letter_num(vsect)
                    sect_hdr = report.SectionHeader(title='Constraint Violations {}'.format(vletter))
                    _log.debug('Collection "{}": {:d} violations'.format(coll_name, len(vgroup)))
                    sect_hdr.add('Condition', str(vgroup.condition))
                    table = report.Table(colnames=('Id', 'TaskId', 'Field', 'Constraint', 'Value'))
                    for viol, vrec in vgroup:
                        rec_id = vrec['_id']
                        task_id = vrec['task_id']
                        if isinstance(viol.expected_value, type):
                            viol.expected_value = viol.expected_value.__name__
                        table.add((rec_id, task_id, viol.field,
                                   '{} {}'.format(viol.op, viol.expected_value),
                                   viol.got_value))
                    table.sortby('Id')
                    rpt_sect.add_section(report.ReportSection(sect_hdr, table))
            except DBError, err:
                _log.error('validating collecton {}: DB error: {}'.format(coll_name, err))
                break
            except ValueError, err:
                _log.error('validating collecton {}: {}'.format(coll_name, err))
                break
            rpt.add_section(rpt_sect)
    rpt.header.add('Elapsed time', '{:.2f}s'.format(elapsed.value))
    _log.debug('Run time: {:.2f}'.format(elapsed.value))
    # Report results
    if not args.send_on_empty and rpt.is_empty():
        _log.warn("Report is empty, skipping output")
        retcode = 1
    else:
        formatter = formatter_class()
        text = formatter.format(rpt)
        if emailer:
            msgfmt = 'text/plain'
            for mt in ((report.HTMLFormatter, 'text/html'),
                       (report.JSONFormatter, 'text/plain')):
                if isinstance(formatter, mt[0]):
                    msgfmt = mt[1]
                    break
            n = emailer.send(text, fmt=msgfmt)
            if n < 1:
                _log.error("Email report not sent")
        else:
            print(text)

    return retcode

if __name__ == '__main__':
    sys.exit(main())
