#!/usr/bin/env python
# -*- coding: utf8 -*-
"""
Run validation tests of (MongoDB) databases.
Database connections are configured from a file
and the constraints to validate against can come from a file or the command-line.
"""
__author__ = "Dan Gunter"
__copyright__ = "Copyright 2012-2013, The Materials Project"
__version__ = "1.0"
__maintainer__ = "Dan Gunter"
__email__ = "dkgunter@lbl.gov"
__status__ = "Development"
__date__ = "3/29/13"

import argparse
import logging
import os
import pymongo
import re
import socket
import sys
import time
import urlparse
import yaml
# local module
from matgendb.util import get_settings
from matgendb.vv.validate import ConstraintSpec, Validator, DBError
from matgendb.vv import report
from matgendb.vv.util import Timing, ElapsedTime, letter_num

# Initialize module logging
_log_name = 'mg'
_log = logging.getLogger(_log_name)
_handler = logging.StreamHandler()
_handler.setFormatter(logging.Formatter('%(levelname)-7s [%(name)s] %(message)s'))
_log.addHandler(_handler)

## Constants

DB_KEY = '_db'              # in YAML file
ALIASES_KEY = '_aliases'    # in YAML file

PATTERN_KEY_PREFIX_IGNORE = '_'

## Functions


def wstrip(s):
    return s.strip()


def textlist(items):
    return ', '.join(items)

def mapdump(d):
    return textlist(('{}={}'.format(k, v) for k, v in d.iteritems()))

def nvpstr(s):
    """Return tuple (name, value) from string in form 'name=value'.
    Used as type for argparse.

    :raise: ValueError if input string is invalid
    """
    p = s.find('=')
    if p < 0:
        raise ValueError('expected "name=value", got "{}"'.format(s))
    nvp = (s[:p].strip(), s[p+1:].strip())
    #print("NVP: {}".format(nvp))
    return nvp

def _parse_email(s):
    """Parse an email specification into component parts.

    Keys:
    - sender
    - recipients
    - host
    - port

    :return: Parsed email information
    :rtype: dict
    """
    d = {}
    # parse command-line args
    items = s.split(':')
    if len(items) < 2:
        raise ValueError('from:to required in "{}"'.format(s))
    if len(items) > 4:
        raise ValueError('too many values in "{}"'.format(s))
    d['sender'] = items[0]
    d['recipients'] = [x.strip() for x in items[1].split(',')]
    if len(items) > 2:
        d['server'] = items[2]
    if len(items) > 3:
	if '/' in items[3]:
	    p1, p2 = items[3].split('/')
        d['port'], d['subject'] = int(p1), p2
    else:
        d['port'] = int(items[3])
    return d

def _config_email(fname):
    d = {}
    try:
        f = open(fname)
    except OSError:
        raise ValueError('cannot read from file "{}"'.format(fname))
    y = yaml.safe_load(f)
    info = y.get('_email', None)
    if info is None:
        return None
    try:
        d['sender'] = info['from']
        d['recipients'] = info['to']
        if isinstance(d['recipients'], str):
            d['recipients'] = [d['recipients']]
    except KeyError, err:
        raise ValueError('from/to not found in file {} mapping key "_email"'
                         .format(fname))
    if 'port' in info:
        d['port'] = int(info['port'])
    if 'host' in info:
        d['host'] = info['host']
    if 'subject' in info:
        d['subject'] = info['subject']
    return d


def parse_mongo_url(url):
    """Parse mongodb:// URL, return dict of information

    XXX: Not used right now.
    """
    parts = urlparse.urlsplit(url)
    netloc = parts[1]
    urlexpr = '''
            (?P<user>\w+)           # user
            :(?P<password>\S+)      # password
            @(?P<host>[a-zA-Z.]+)   # host
            (?::(?P<port>\d+))?     # optional port'''
    m = re.match(urlexpr, netloc, flags=re.X)
    if m is None:
        raise ValueError("bad format: {}".format(url))
    g = m.groupdict()
    d = {'host':g['host'], 'user_name':g['user'],
         'password':g['password']}
    if g['port'] is not None:
        d['port'] = int(g['port'])
    d['db'] = parts[2][1:]
    return d

def main(cmdline=None):
    """Main program entry point.

    :param cmdline: command-line
    :return: Status, 0=ok otherwise an error
    :rtype : int
    """
    retcode = 0

    if cmdline is None:
        cmdline = sys.argv[1:]
    desc = ' '.join(__doc__.split())
    formatters = dict(json=report.JSONFormatter, html=report.HTMLFormatter,
                      md=report.MarkdownFormatter)
    parser = argparse.ArgumentParser(description=desc)
    parser.add_argument('constraint', nargs='*')
    parser.add_argument('--alias', '-a', dest='aliases', action='append', type=nvpstr, metavar="ALIAS", default=[],
                        help='Alias for a field used in a constraint or condition, in form alias=name_in_db')
    parser.add_argument('--config', '-c', dest='config_file', metavar='FILE', default=None,
                        help="Configuration file for database connection. "
                             "Generate one using mgdb init --config filename.json, if necessary. "
                             "Otherwise, the code searches for a db.json. If "
                             "none is found, a no-authentication "
                             "localhost:27017/vasp database is assumed.")
    parser.add_argument('--collection', '-C', dest='coll', metavar='NAME', default=None,
                        help='Collection name for constraints given on command-line')
    parser.add_argument('--email', '-e', dest='email', default=None,
		    help='Email report to EMAIL at {from}:{to}[:host[:port[/subject]]].'
                             'The "from" and "to" are required; default host is localhost. '
                             'This information can also be in the main config file under the "_email" key, '
			     'which should be a mapping with keys "from", "to", "host", "port", and "subject".')
    parser.add_argument('--empty', dest='send_on_empty', action='store_true',
                        help="Send a report, even if it is empty (default=print warning and don't send)")
    parser.add_argument('--exonly', dest='must_exist', action='store_true', default=False,
                        help='Only show results where all fields in the constraints are also present in the record')
    parser.add_argument('--file', '-f', dest='constfile', metavar='FILE', default=None,
                        help='Main configuration file. Has constraints, and optionally email info.')
    parser.add_argument('--format', '-F', dest='report_format', metavar='FORMAT', default='html',
                        help='Report format: {} (default=html)'.format(', '.join(formatters.keys())))
    parser.add_argument('--limit', '-m', dest='limit', metavar='NUM', type=int, default=50,
                        help='Limit number of displayed constraint violations per-collection 0=no limit (50)')
    parser.add_argument('--progress', '-p', dest='progress', metavar='NUM', type=int, default=0,
                        help='Report progress every NUM invalid records found')
    parser.add_argument('--user', '-u', dest='user', metavar='NAME', default=None,
                        help='User name, for the report')
    parser.add_argument('-v', '--verbose', dest='vb', action='count', help='Log message verbosity')

    args = parser.parse_args(cmdline)

    # Set logging level from --verbose option
    if args.vb > 1:
        _log.setLevel(logging.DEBUG)
    elif args.vb > 0:
        _log.setLevel(logging.INFO)
    else:
        _log.setLevel(logging.WARN)

    # Connect and authenticate
    conn, config = None, None
    fname = args.config_file if args.config_file else "db.json"
    try:
        config = get_settings(args.config_file)
    except ValueError, err:
        parser.error('Cannot parse configuration "{}": {}'.format(fname, err))
    try:
        conn = pymongo.MongoClient(config['host'], config['port'])
    except pymongo.errors.ConnectionFailure, err:
        parser.error('Cannot connect to server: {}'.format(err))
    db_key = 'database'
    if not db_key in config:
        parser.error('Configuration in "{}" is missing "{}"'.format(fname, db_key))
    db = conn[config[db_key]]
    user, passwd = None, None
    # try both readonly and admin credentials
    for utype in 'readonly', 'admin':
        if (utype + '_user') in config:
            try:
                user, passwd = config[utype + '_user'], config[utype + '_password']
            except KeyError:
                parser.error('Configuration error in file {0}: {1}_user but no {1}_password'
                             .format(args.config_file, utype))
    if user and passwd:
        if not db.authenticate(user, passwd):
            parser.error('Cannot authenticate to database as user {}'.format(user))
    else:
        _log.info('Connecting to DB "{}" without authentication'.format(config[db_key]))
    # get collection name if given
    if 'collection' in config:
        db_coll_name = config['collection']
    else:
        db_coll_name = None

    # Init constraints from file or command-line
    constraints_from_file = args.constfile is not None
    if constraints_from_file:
        try:
            f = open(args.constfile, 'rb')
        except IOError:
            if not os.path.exists(args.constfile):
                parser.error('constraints file "{}" not found'.format(args.constfile))
            else:
                parser.error('constraints file "{}" found, but unreadable'.format(args.constfile))
        with Timing('load.yaml', file=f.name):
            constraints = yaml.safe_load(f)
    else:
        coll_name = args.coll
        if coll_name is None:
            if db_coll_name is None:
                parser.error('collection name not found')
            else:
                coll_name = db_coll_name
        if not args.constraint:
            parser.error('at least one constraint is required')
        # re-split command-line arguments on commas (not whitespace)
        plist = ' '.join(args.constraint).split(',')
        # make dict(key=collection) of list of the constraints, just like YAML config file
        constraints = {coll_name: [plist]}
	_log.debug("command-line constraints dump: {}".format(constraints))

    # Init aliases from file or command-line
    if constraints_from_file:
        try:
            astr = constraints.get(ALIASES_KEY, {})
            aliases = dict(map(nvpstr, astr))
        except ValueError, err:
            parser.error('aliases should be list of name=value, got "{}": {}'.format(astr, err))
        except AttributeError:
            if constraints is None:
                parser.error('constraints file "{}" is empty'.format(args.constfile))
    elif len(args.aliases) > 0:
        aliases = dict(args.aliases)
    else:
        aliases = {}

    # Init email from file or command-line
    emailer, email_kw = None, None
    if args.email is not None:
        email_kw = _parse_email(args.email)
    elif args.constfile:
        email_kw = _config_email(args.constfile)
    if email_kw is not None:
        _kwd = {'subject':'MP Validation Report'}
        _kwd.update(email_kw)
        _log.info("Will email report to: {}".format(mapdump(_kwd)))
        emailer = report.Emailer(**_kwd)

    # Initialize report
    try:
        user_name = os.getlogin()
    except OSError:
        user_name = args.user or 'unknown'
    rpt_hdr = report.ReportHeader("Materials Project Validation Report")
    rpt_hdr.add('Report time', time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime()))
    rpt_hdr.add('Report user',  user_name)
    rpt_hdr.add('Report host', socket.gethostname())
    rpt_hdr.add('Limit', args.limit)
    rpt = report.Report(rpt_hdr)
    # pick report formatter
    fmt = args.report_format.lower()
    formatter_class = formatters.get(fmt, None)
    if formatter_class is None:
        parser.error('Unknown format "{}" for --format, choose from: {}'
                    .format(fmt, textlist(formatters.keys())))

    # Run validation for each collection
    validator = Validator(aliases=aliases, max_violations=args.limit, max_dberrors=10, add_exists=args.must_exist)
    if args.progress > 0:
        validator.set_progress(args.progress)
    elapsed = ElapsedTime()
    with Timing("validate", log=_log, elapsed=elapsed):
        for coll_name, constraint_spec_cfg in constraints.iteritems():
	    _log.debug("validate {}".format(coll_name))
            sect_hdr = report.SectionHeader(title='Collection "{}"'.format(coll_name))
            rpt_sect = report.ReportSection(sect_hdr)
            if coll_name.startswith(PATTERN_KEY_PREFIX_IGNORE):
                continue
            coll = db[coll_name]
            try:
                try:
                    cspec = ConstraintSpec(constraint_spec_cfg)
                except ValueError, err:
                    _log.error('processing constraints for {}: {}'.format(coll_name, err))
                    break
                #sect = get_constraint_sections(constraint_spec)
                vsect = 0
                for vnum, vgroup in enumerate(validator.validate(coll, cspec, subject=coll_name)):
                    if len(vgroup) == 0:
                        continue
                    vsect += 1
                    vletter = letter_num(vsect)
                    sect_hdr = report.SectionHeader(title='Constraint Violations {}'.format(vletter))
                    _log.debug('Collection "{}": {:d} violations'.format(coll_name, len(vgroup)))
                    sect_hdr.add('Condition', str(vgroup.condition))
                    table = report.Table(colnames=('Id', 'TaskId', 'Field', 'Constraint', 'Value'))
                    for viol, vrec in vgroup:
                        rec_id = vrec['_id']
                        task_id = vrec['task_id']
                        if isinstance(viol.expected_value, type):
                            viol.expected_value = viol.expected_value.__name__
                        table.add((rec_id, task_id, viol.field,
                                   '{} {}'.format(viol.op, viol.expected_value),
                                   viol.got_value))
                    table.sortby('Id')
                    rpt_sect.add_section(report.ReportSection(sect_hdr, table))
            except DBError, err:
                _log.error('validating collecton {}: DB error: {}'.format(coll_name, err))
                break
            except ValueError, err:
                _log.error('validating collecton {}: {}'.format(coll_name, err))
                break
            rpt.add_section(rpt_sect)
    rpt.header.add('Elapsed time', '{:.2f}s'.format(elapsed.value))
    _log.debug('Run time: {:.2f}'.format(elapsed.value))
    # Report results
    if not args.send_on_empty and rpt.is_empty():
        _log.warn("Report is empty, skipping output")
        retcode = 1
    else:
        formatter = formatter_class()
        text = formatter.format(rpt)
        if emailer:
            msgfmt = 'text/plain'
            for mt in ((report.HTMLFormatter, 'text/html'),
                       (report.JSONFormatter, 'text/plain')):
                if isinstance(formatter, mt[0]):
                    msgfmt = mt[1]
                    break
            n = emailer.send(text, fmt=msgfmt)
            if n < 1:
                _log.error("Email report not sent")
        else:
            print(text)

    return retcode

if __name__ == '__main__':
    sys.exit(main())
