#!/usr/bin/env python
"""
Gristle_processor is used to run processes against files, one at a time.
Its functionality includes:
   - support for command line, config, and eventually env configuration items
   - extensive logging
   - support for built-in actions such as delete, list

To do:
   - add support for external actions - such as user scripts
   - add support for acting against files in their mtime or filename order
     in order to support restarts

This source code is protected by the BSD license.  See the file "LICENSE"
in the source code root directory for the full language or refer to it here:
  http://opensource.org/licenses/BSD-3-Clause

Copyright 2014 Ken Farmer
"""

#--- standard modules ------------------
from __future__ import division
import sys
import argparse
import os
import time
import re
import math
import operator
import shutil
import errno
from pprint import pprint as pp

#from pprint import pprint as pp
import validictory as valid
import appdirs
import yaml
import cletus.cletus_log          as log
import cletus.cletus_supp         as supp
import cletus.cletus_job          as job
import cletus.cletus_config       as config

#--- Ignore SIG_PIPE and don't throw exceptions on it
#--- (http://docs.python.org/library/signal.html)
from signal import signal, SIGPIPE, SIG_DFL
signal(SIGPIPE, SIG_DFL)

#--- gristle modules -------------------
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import gristle.file_type           as file_type
import gristle.common              as comm


logger = None
APP_NAME = 'datagristle'


def main():
    """Initializes program
            - gets args
            - gets config
            - ensures it isn't already running
            - ensures it isn't suppressed (temporarily blocked)
            - sets up logging
       For each directory section in config:
            - processes directory
       Terminates program
    """
    #----- set up ----
    args              = get_args()
    config, config_fn = get_config(arg_config_file=args.config_file,
                                   arg_config_name=args.config_name)
    setup_logs(args.log_dir, args.log_level or config['housekeeping']['log_level'])
    logger.info('program starting')
    logger.info('config_file used: %s' % config_fn)

    jobcheck   = exit_if_already_running()
    exit_if_suppressed()

    for dir_sect_name in config:
        if dir_sect_name == 'housekeeping':
            continue
        logger.info('section starting for %s' % dir_sect_name)
        process_dir = ProcessDir(config[dir_sect_name],
                                 args.test_run)
        process_dir.walk()
        logger.info('section terminating for %s' % dir_sect_name)

    jobcheck.close()
    logger.info('program terminating successfully')
    return 0



class ProcessDir(object):
    """
    Handles all processing of a single Config Section, each of which has its
    own root directory.
    """

    def __init__(self, kwargs, test_run=False):

        self.test_run      = test_run

        self._validate_config_section(kwargs)

        self.name          = kwargs['name']
        self.gather_kwargs = kwargs['gathering_phase']

        if 'inclusion_phase' in kwargs:
            self.in_kwargs = kwargs['inclusion_phase']
        else:
            self.in_kwargs = {}

        if 'exclusion_phase' in kwargs:
            self.ex_kwargs     = kwargs['exclusion_phase']
        else:
            self.ex_kwargs = {}

        self.act_kwargs    = kwargs['action_phase']

        self.file_analyzer = FileAnalyzer(self.in_kwargs, self.ex_kwargs)
        self.file_actioner = FileActioner(self.act_kwargs, test_run)


    def _validate_config_section(self, kwargs):
        self.kwargs = kwargs
        config_validator = ConfigValidator(kwargs)
        if (not os.path.exists(self.kwargs['gathering_phase']['dir'])
        or  not os.path.isdir(self.kwargs['gathering_phase']['dir'])):
            abort('ERROR: invalid config file',
                  'The config file dir in gathering_phase.dir was not found '
                  'or is not a directory: %s' % self.kwargs['gathering_phase']['dir'], 1)


    def walk(self):
        # how about a recursion on/off switch?
        # how to apply inclusion/exclusion criteria to dirs?
        # need to offer a sorted-walk, in order to pick up all files, then sort
        # by some timestamp

        for root, dirs, files in os.walk(self.gather_kwargs['dir']):
            logger.info('root: %s' % root)
            for dir_name in dirs:
                logger.info('dirname: %s' % dir_name)
                if self.file_analyzer.qualify_file(dir_name,
                                                   os.path.join(root, dir_name)):
                    self.file_actioner.run_all_actions(os.path.join(root, dir_name))
            for file_name in files:
                logger.info('filename: %s' % file_name)
                if self.file_analyzer.qualify_file(file_name,
                                                   os.path.join(root, file_name)):
                    self.file_actioner.run_all_actions(os.path.join(root, file_name))



class FileActioner(object):
    """
    Applies an action to a file or directory.
    """

    def __init__(self, action_kwargs, test_run=False):
        self.act_kwargs     = action_kwargs
        self.file_name      = None
        self.test_run       = test_run

    def run_all_actions(self, file_name):
        self.file_name      = file_name
        if 'action_builtin' in self.act_kwargs:
            self._run_action_builtin()
        else:
            self._run_action_external()

    def _run_action_builtin(self):
        """ Runs built-in action
        """
        action = self.act_kwargs['action_builtin']
        if action == 'list':
            self._action_list(self.file_name)
        elif action == 'delete':
            self._action_delete(self.file_name)

    def _run_action_external(self):
        """ Runs command through envoy
        """
        action = self.act_kwargs['action_external']
        logger.debug('running external action %s on file %s' % (action, self.file_name))


    def _action_delete(self, fqfn):
        logger.debug('action_delete file: %s' % fqfn)
        if self.test_run:
            return

        if os.path.isfile(fqfn):
            try:
                os.remove(fqfn)
            except OSError, e:
                if e[0] == errno.ENOENT:
                    logger.warning('Delete failed - file already gone: %s' % fqfn)
                else:
                    abort('delete failed: %s' % fqfn, 'Delete msg: %s' % e)
        elif os.path.isdir(fqfn):
            try:
                shutil.rmtree(fqfn)
            except OSError, e:
                if e[0] == errno.ENOENT:
                    logger.warning('Delete failed - dir already gone: %s' % fqfn)
                else:
                    abort('delete failed: %s' % fqfn, 'Delete msg: %s' % e)

    def _action_list(self, fqfn):
        logger.debug('%s on file %s' % ('list', self.file_name))
        print fqfn





class FileAnalyzer(object):

    def __init__(self, in_kwargs, ex_kwargs):
        self.in_kwargs    = in_kwargs
        self.ex_kwargs    = ex_kwargs
        self.seconds      = {'s':1, 'm':60, 'h':3600, 'd':86400,
                             'w':604800, 'M':2592000, 'y':31536000}
        self.seconds_translator = {'seconds':'s', 'second':'s',
                                   'minutes':'m', 'minute':'m',
                                   'hours':'h',   'hour':'h',
                                   'days':'d',    'day':'d',
                                   'weeks':'w',   'week':'w',
                                   'months':'M',  'month':'M',
                                   'year':'y',    'year':'y'}

    def qualify_file(self, fn, fqfn):

        if not self._check_inclusion_phase(fn, fqfn):
            logger.debug('file disqualified in inclusion_phase: %s' % fqfn)
            return False

        if not self._check_exclusion_phase(fn, fqfn):
            logger.debug('file disqualified in exclusion_phase: %s' % fqfn)
            return False

        logger.debug('file qualified: %s' % fqfn)
        return True



    def _check_inclusion_phase(self, file_name, fqfn):

        if 'fntime_time' in self.in_kwargs:
            if self._filenamedate_op_argdate(file_name,
                     comm.dict_coalesce(self.in_kwargs, 'fntime_filter_regex'),
                     comm.dict_coalesce(self.in_kwargs, 'fntime_extract_regex'),
                     comm.dict_coalesce(self.in_kwargs, 'fntime_format'),
                     comm.dict_coalesce(self.in_kwargs, 'fntime_op'),
                     comm.dict_coalesce(self.in_kwargs, 'fntime_time')):
                pass
            else:
                return False

        elif 'atime_time' in self.in_kwargs:
            if self._atime_op_argdate(fqfn,
                     comm.dict_coalesce(self.in_kwargs, 'atime_time'),
                     comm.dict_coalesce(self.in_kwargs, 'atime_op')):
                pass
            else:
                return False

        else:
            return True

        if comm.dict_coalesce(self.in_kwargs, 'file_type') == 'file':
            if os.path.isfile(fqfn):
                return True
            else:
                return False
        else:
            if os.path.isdir(fqfn):
                return True
            else:
                return False



    def _check_exclusion_phase(self, fn, fqfn):
        # exclude if symbolic link
        # exclude if root?
        # exclude if 
        return True   # true == passed checks



    def _ctime_op_argdate(self, fn, op, argdate):
        """ Performs op comparison of file mtime against argdate.
            Inputs:
             - atime in format: [number][unit] where unit is one of s,m,h,d,w,M,y
             - file_name
            Returns:
             - boolean: True if file's atime is older than current-time
                       - ctime arg, False otherwise
        """
        threshold_epoch = self._get_threshold_epoch(argdate)

        if dynamic_comparison(os.path.getctime(fn), op, threshold_epoch):
            return True
        else:
            return False



    def _mtime_op_argdate(self, fn, op, argdate):
        """ Performs op comparison of file mtime against argdate.
            Inputs:
             - atime in format: [number][unit] where unit is one of s,m,h,d,w,M,y
             - file_name
            Returns:
             - boolean: True if file's atime is older than current-time
                       - mtime arg, False otherwise
        """
        threshold_epoch = self._get_threshold_epoch(argdate)

        if dynamic_comparison(os.path.getmtime(fn), op, threshold_epoch):
            return True
        else:
            return False



    def _atime_op_argdate(self, fn, op, argdate):
        """ Performs op comparison of file atime against argdate.
            Inputs:
             - atime in format: [number][unit] where unit is one of s,m,h,d,w,M,y
             - file_name
            Returns:
             - boolean: True if file's atime is older than current-time
                       - atime arg, False otherwise
        """
        threshold_epoch = self._get_threshold_epoch(argdate)

        if dynamic_comparison(os.path.getatime(fn), op, threshold_epoch):
            return True
        else:
            return False



    def _filenamedate_op_argdate(self, fn, filter_rgx, extract_rgx, date_format, op, arg_date):
        """ Inputs:
            - fn
            - filter_rgx:
            - extract_rgx:
            - date_format
            - op
            - arg_date
            Outputs:
            - boolean
            Raises:
            - ValueError if format doesn't work on filename date
        """
        assert op in ['gt','lt']

        #---- extract filename date string:
        file_substring = get_regex_substring(fn, extract_rgx, filter_rgx)
        if file_substring is None: # regex didn't find a date
            return None
        logger.debug('filenamedate_op_argdate file regex extracted date: %s' % file_substring)

        #---- turn filename date string into python epoch time:
        try:
            file_date = time.strptime(file_substring, date_format)
        except ValueError:
            logger.warning('invalid date format on: %s' % fn)
            return None

        threshold_epoch = self._get_threshold_epoch(arg_date)
        filename_epoch  = time.mktime(file_date)

        if dynamic_comparison(filename_epoch, op, threshold_epoch):
            return True
        else:
            return False


    def _get_threshold_epoch(self, time_delta):
        return time.time() - self._get_delta(time_delta)


    def _get_delta(self, time_delta):
        """ Inputs:
                - time_delta: one of two formats:
                     a.  current_time - n [units]
                         - ex: 'curent_time - 365 days' == 1 year
                     b.  n[units] to subtract from current_time
                         - ex: '2s' == 2 seconds
                         - ex: '3m' == 3 minutes
                         - ex: '4h' == 4 hours
                         - ex: '5d' == 5 days
                         - ex: '6w' == 6 weeks
                         - ex: '7M' == 7 Months
                         - ex: '8y' == 8 years
            Returns:
                - number of seconds that time_delta translates to
            Ex:
                - time_delta('4s') returns 4
                - time_delta('1m') returns 60
                - time_delta('1h') returns 3600
                - time_delta('1d') returns 86400
                - time_delta('current_time - 1d') returns 86400
        """
        if len(time_delta) < 2:
            raise ValueError

        #---- handle the current_time format
        if 'current_time' in time_delta:
           token = time_delta.split()
           if (len(token) != 4
           or token[0] != 'current_time'
           or token[1] != '-'
           or not isnumeric(token[2])
           or token[3] not in self.seconds_translator.keys()):
               abort('ERROR: Config file is invalid',
                     'fntime_time should have format like: current_time - n days'
                     'What was found:  %s' % time_delta, 1)
           else:
              measure = token[2]
              unit    = self.seconds_translator[token[3]]

        #---- handle the abbreviated format
        else:
            unit = time_delta[-1:]
            if unit not in ['s', 'm', 'h', 'd', 'w', 'M', 'y']:
                abort('ERROR: Config file is invalid',
                      'fntime_time should have format like: n[unit] where n is an integer and '
                      'unit is a time abbreviation, one of: s,m,h,d,w,M,y. '
                      'What was found: %s' % time_delta, 1)
            measure = time_delta[:-1]
            if not isnumeric(measure):
                abort('ERROR: Config file is invalid',
                      'fntime_time should have format like: n[unit] where n is an integer and '
                      'unit is a time abbreviation, one of: s,m,h,d,w,M,y. '
                      'What was found: %s' % time_delta, 1)

        delta_seconds = int(measure) * self.seconds[unit]
        return delta_seconds






def get_config(arg_config_file=None, arg_config_name=None):

    assert not (arg_config_file and arg_config_name)

    if arg_config_file:
        config_fn = arg_config_file
    elif arg_config_name:
        config_fn = os.path.join(appdirs.user_config_dir('datagristle'),
                                                         'gristle_processor',
                                                         '%s.yml' % arg_config_name)
    else:
        abort('ERROR: config file missing',
              'User must provide either a config-fn or config-name', 1)

    if not os.path.exists(config_fn):
        abort('ERROR: config file does not exist',
              'file not found: %s' % config_fn, 1)

    config_fp = open(config_fn)
    config    = yaml.safe_load(config_fp)

    return config, config_fn




class ConfigValidator(object):
    """ Validates the config file - to ensure that it is correctly
        structured.   This includes checks on its structure, keywords, and
        values.

        Any errors found will be handled by a ValueError exception.
    """

    def __init__(self, config):
        #self.schema           = validation_schema
        #self.unsupported_keys = ['minimum', 'maximum', 'format', 'divisibleBy']
        #self.valid_keys       = ['type', 'minLength', 'maxLength', 'title',
        #                         'description', 'enum', 'pattern', 'required',
        #                         'blank',
        #                         'dg_type', 'dg_minimum', 'dg_maximum']
        self.config = config
        self._validate_document(config)


    def _validate_document(self, config):
        """ Validates a document - with a few high-level checks, and by
            running _validate_field for each field validation set.
        """
        if 'gathering_phase' in self.config:
            self._validate_gathering_phase(self.config['gathering_phase'])
        else:
            abort('ERROR: invalid config file',
                  'The required gathering_phase was not in the config', 1)

        if 'inclusion_phase' in self.config:
            self._validate_inclusion_phase(self.config['inclusion_phase'])

        if 'action_phase' in self.config:
            self._validate_action_phase(self.config['action_phase'])
        else:
            abort('ERROR: invalid config file',
                  'The required action_phase was not in the config', 1)



    def _validate_gathering_phase(self, gathering_config):
        gathering_schema = {"type":"object",
                            "properties":{"dir": {"type": "string",
                                                  "required": True},
                                         }
                           }
        try:
            valid.validate(gathering_config, gathering_schema)
        except valid.validator.FieldValidationError as e:
            abort('ERROR: Config file is invalid', str(e), 1)


    def _validate_inclusion_phase(self, inclusion_config):
        inclusion_schema = {"type":"object",
                            "properties":{"fntime_time": {"type": "string",
                                                          "required": False},
                                          "fntime_filter_regex": {"type": "string",
                                                                  "required": False},
                                          "fntime_extract_regex": {"type": "string",
                                                                   "required": False},
                                          "fntime_format": {"type": "string",
                                                            "required": False},
                                          "fntime_op": {"type": "string",
                                                        "required": False,
                                                        "enum": ["gt", "lt"]},
                                         }
                           }
        try:
            valid.validate(inclusion_config, inclusion_schema)
        except valid.validator.FieldValidationError as e:
            abort('ERROR: Config file is invalid', str(e), 1)


        #--- if fntime set was included, make sure all necessary items are
        #--- included.
        if ('fntime_time'          in inclusion_config
        or  'fntime_filter_regex'  in inclusion_config
        or  'fntime_extract_regex' in inclusion_config
        or  'fntime_format'        in inclusion_config
        or  'fntime_op'            in inclusion_config):
            if 'fntime_time' not in inclusion_config:
                abort('ERROR: config is invalid',
                      'fntime_time_time is missing from inclusion_config', 1)
            elif 'fntime_op' not in inclusion_config:
                abort('ERROR: config is invalid',
                      'fntime_op is missing from inclusion_config', 1)
            elif 'fntime_format' not in inclusion_config:
                abort('ERROR: config is invalid',
                      'fntime_format is missing from inclusion_config', 1)
            elif 'fntime_extract_regex' not in inclusion_config:
                abort('ERROR: config is invalid',
                      'fntime_extract_regex is missing from inclusion_config', 1)



    def _validate_action_phase(self, action_config):
        action_schema = {"type":"object",
                         "properties":{"action_builtin":  {"type": "string",
                                                           "required": False,
                                                           "enum": ["list", "delete", "compress"]},
                                       "action_external": {"type": "string",
                                                           "required": False}
                                      }
                        }
        try:
            valid.validate(action_config, action_schema)
        except valid.validator.FieldValidationError as e:
            abort('ERROR: Config file is invalid', str(e), 1)

        if ('action_builtin'       in action_config
        and 'action_external'      in action_config):
            abort('ERROR: config is invalid',
                  'both builtin and external actions provided.', 1)



def abort(summary, details, rc=1):
    """ Creates formatted error message within a box of = characters
        then exits.
    """

    #---prints top line:
    print('=' * 79)

    #---prints message within = characters, assumes it is kinda short:
    print '=== ',
    print '%-69.69s' % summary,
    print(' ===')
    if logger:
        logger.critical(summary)

    #---prints exception msg, breaks it into multiple lines:
    for i in range(int(math.ceil(len(details)/68))):
        print '=== ',
        print '%-69.69s' % details[i*68:(i*68)+68],
        print ' ==='
    if logger: 
        logger.critical(details)

    #---prints bottom line:
    print('=' * 79)

    sys.exit(rc)






def setup_logs(arg_log_dir, log_level='debug'):
    """ Sets up the cletus logger.  This is just a wrapper around the
        standard logger.
        -  APP_NAME will be used to look up the XDG logging directory
        -  log_name will be used for organizing the logs
        -  arg_log_dir will override default if user provided something
    """
    assert log_level.lower() in ('debug', 'info', 'warning', 'error', 'critical')
    global logger
    cl_logger = log.LogManager(app_name=APP_NAME,
                               log_name=__name__,
                               log_dir=arg_log_dir)
    logger    = cl_logger.logger
    logger.setLevel(log_level.upper())




def get_regex_substring(string, extract_rgx, filter_rgx=None):
    """ Gets a substring of a string based on two regexs.
        Inputs:
           - string: the data source for the regex operations
           - extract_rgx (required): defines exactly what will be
             returned.
           - filter_rgx (optional): can be used to isolate the part
             of string to apply the extract_rgx
        Returns:
           - result of extract_rgx - which should be the exact
             string, or None if there was no match.
        Raises:
           - ValueError if the filter_rgx finds a match but the extract_rgx
             does not find the match within the output of filter_rgx.
        Examples:
           - >>> get_regex_substring('300_123_b395r', '\d\d\d')
             >>> 300
           - >>> get_regex_substring('300_123_b395r', '\d\d\d', '_\d\d\d_')
             >>> 123
        Limits:
           - It only performs the search once, so if there are multiple
             matches it will return the first.
    """

    if filter_rgx is not None:
        filter_pile     = re.compile(filter_rgx)
        results         = filter_pile.search(string)
        if results is None:
            filtered_string = None
        else:
            filtered_string = results.group()
    else:
        filtered_string = string

    if filtered_string is None:
        return None

    ext_pile   = re.compile(extract_rgx)
    ext_match  = ext_pile.search(filtered_string)

    if ext_match is None and filter_rgx:
        raise ValueError # Extract Regex is not intended to filter
    elif ext_match is None and not filter_rgx:
        return None
    else:
        return ext_match.group()



def isnumeric(number):
    """ Tests whether or not the input is numeric.
        Args:
           - number:  a string containing a number that may be either an integer
             or a float, a positive or a negative or zero.
        Returns:  True or False
    """
    try:
        float(number)
        return True
    except (TypeError, ValueError):
        return False



def dynamic_comparison(v1, op, v2):
    """
    Inputs
        - v1 is typically the actual file value
        - op is a comparison operator - either gt or lt
        - v2 is typically the threshold value
    Returns
        - Boolean
           - True if v1 op v2
           - False if v1 !op v2
    Example
        - > dynamic_comparison(5, lt, 3)
          > False
    """
    assert op in ['gt', 'lt']

    operator_map = {'gt': operator.gt,
                    'lt': operator.lt}

    return operator_map[op](v1, v2)



def exit_if_suppressed():

    suppcheck = supp.SuppressCheck(app_name=APP_NAME)
    if suppcheck.suppressed():
        logger.warning('Pgm has been suppressed - this instance will be terminated.')
        sys.exit(0)
    else:
        logger.info('SuppCheck has passed')



def exit_if_already_running():
    jobcheck  = job.JobCheck(app_name=APP_NAME)
    if jobcheck.lock_pidfile():
        logger.info('JobCheck has passed')
        return jobcheck
    else:
        logger.warning('Pgm is already running - this instance will be terminated')
        sys.exit(0)



def get_args():
    """ gets args and returns them
        Input:
            - command line args & options
        Output:
            - args dictionary
    """
    use = ("%prog is used to run user-specified actions against files. It is "
           "typically scheduled to run periodically, and uses a config file for "
           "criteria. \n"
           " \n"
           "   %prog [file] [misc options]")

    parser = argparse.ArgumentParser(description=use,
                                     formatter_class=argparse.RawTextHelpFormatter)

    parser.add_argument('--config-name',
           dest='config_name',
           default=None,
           help='Specifies the name of the config file in the standard xdg datagristle config')
    parser.add_argument('-c', '--config-file',
           dest='config_file',
           default=None,
           help='Specifies the name of the config file in a user-specified directory')
    parser.add_argument('--log-dir',
           dest='log_dir',
           default=None,
           help='Overrides the xdg-default logging directory.  XDG default on linux is $HOME/.cache/datagristle/gristle_process')
    parser.add_argument('--log-level',
           dest='log_level',
           choices=['debug', 'info', 'warning', 'error', 'critical'],
           help='Specifies level of detail in logs.  Default is None - which leaves it to the config file.')

    parser.add_argument('--testrun',
           default=False,
           action='store_true',
           dest='test_run',
           help=('Identify files to act upon - but just list them'))

    parser.add_argument('--long-help',
           default=False,
           action='store_true',
           help='Print more verbose help')

    args = parser.parse_args()

    if args.long_help:
        print __doc__
        sys.exit(0)


    return args



if __name__ == '__main__':
    sys.exit(main())

