#!/usr/bin/env python
'''
 The python version of pcazip!!

 Reproduces the functionality of the old fortran- and C-based versions.

 In essence its wraps a simple set of procedures provided by two modules:
       'cofasu' - trajectory file handling
       'pcz'    - PCA analysis

 Stripped down to the bare essentials, the complete procedure is:

 >>> import cofasu, pcz
 >>> f = cofasu.Fasu('topology.top','trajectory.traj')
 >>> c = cofasu.Cofasu(f)
 >>> p = pcz.Pcz(c)
 >>> p.write('compressed.pcz')

 Everything else is basically analysing and sanity-checking the arguments
 given on the command line.
'''

# General python libraries import.
import os.path as op
import sys
import logging as log
# @charlie
'''
The cofasu module provides simple abstractions of complex trajectory data.
The pcz module provides the core pca capabilities.
'''
from cofasu import Fasu, Cofasu
import pcz
# @charlie
'''
The enhanced Universe provided by MDPlus is now used by the cofasu module.
This version does however not use the MDPlus pcz file writer, but uses a
method provided by the pcz module.
from MDPlus import Universe, Writer
'''

# EPCC, Parallelization
import numpy as np
from time import time
import mpiRelated

rank = mpiRelated.rank
size = mpiRelated.size

# @charlie
'''
Begin by defining the little utility function that parses trajectory
filename strings that have the "trajfile(start:stop:step)" structure.
'''


def input_parse(infile):
    if "(" in infile:
        i = infile.find("(")
        if ")" in infile[i:]:
            j = infile.find(")")
            return infile[:i], infile[i + 1:j]
        else:
            log.error('Malformed trajectory filename: {0}'.format(infile))
            sys.exit(-1)
    else:
        return [infile, ':::']

# @charlie
'''
And now here is a little utility function to convert 'mask' pdb files into
MDAnalysis-style selection strings. Basically it reads the second column
(atom number) and uses it to construct 'bynum' selections. Runs
of consecutive numbers are expressed in start:stop form.
'''


def pdb2selection(pdbfile):
    sel = ''
    i = 0
    j = 0
    with open(pdbfile, 'r') as f:
        for line in f:
            if line.find('ATOM') == 0 or line.find('HETATM') == 0:
                k = int(line.split()[1])
                # the next line catches the initialization process:
                if i == 0:
                    i = k
                    j = k - 1
                # are we in a run of consecutive numbers?:
                if k == j + 1:
                    j = k
                else:
                    # time to write out another selection:
                    sel = sel + ' bynum {0}:{1} or'.format(i, j)
                    i = k
                    j = k
                # end-of-file reached. Make sure last selection is included:
    if i > 0 and j > 0:
        sel = sel + ' bynum {0}:{1} or'.format(i, j)
    if len(sel) > 3:
        # remove the trailing ' or':
        sel = sel[:-3]
    return sel


#############################################################################
#                                                                           #
#                        PCAZIP main function (start)                       #
#                                                                           #
#############################################################################

def pcazip(args):
    # Time the complete run time
    time0start = time()

    if args.verbosity:
        log.basicConfig(format="%(levelname)s: %(message)s", level=log.INFO)
        log.info("Verbose output.")
    else:
        log.basicConfig(format="%(levelname)s: %(message)s")

    '''
    Input filename and topology filename are mandatory. Hence a check on
    these two parameters should be performed:
    '''
    if (not ((args.input is None) ^ (args.album is None))) or (args.topology is None):
        log.error('')
        log.error('All or any of the mandatory command line arguments is missing. The correct usage is:')
        log.error(
            'python ./pcazip.py XOR[(-i|--input <input-file>),(-a|--album) <album-file>] -t|--topology <topology-file> [optional arguments]')
        log.error('')
        log.error('Type "python ./pcazip.py -h" or "python ./pcazip.py --help" for further details.')
        log.error('')
        sys.exit(-1)

    # @charlie
    '''
    Multiple album files OR multiple trajectory files are permitted.
    The rule is that all the trajectory files in each album must be
    compatible with a single topology file. If more than one album file
    is specified, then either there should be one topology file that is
    applicable to ALL the album files, or there should be one topology file
    specified for each album file. Similar rules operate in cases where
    multiple trajectory files are specified on the command line: either one
    topology file common to all trajectory files, or one topology file per
    trajectory file, must be given. The same rules operate (independently)
    for the selection and masking options: either there should be one
    selection/mask that applies to ALL trajectory files or albums, or there
    should be one selection/mask for each trajectory file or album.

    Let's check that these rules are being followed. First for albums:
    '''
    if args.input is None:
        na = len(args.album)
        nt = len(args.topology)
        if args.selection is None:
            ns = 1
        else:
            ns = len(args.selection)
        if args.mask is not None:
            ns = max(ns, len(args.mask))
        if nt > 1 and nt != na:
            log.error(("Number of topology files must be one,"
                       " or equal to the number of album files."))
        if ns > 1 and ns != na:
            log.error(("Number of masks/selections must be one,"
                       " or equal to the number of album files."))
            sys.exit(-1)
    else:
        # now for trajectories:
        na = len(args.input)
        nt = len(args.topology)
        if args.selection is None:
            ns = 1
        else:
            ns = len(args.selection)
        if args.mask is not None:
            ns = max(ns, len(args.mask))
        if nt > 1 and nt != na:
            log.error(("Number of topology files must be one, or equal"
                       " to the number of trajectory files."))
        if ns > 1 and ns != na:
            log.error(("Number of masks/selections must be one, or equal"
                       " to the number of trajectory files."))
            sys.exit(-1)
    '''
        We can now build the key data structures.
        The data structures are:

        uniStr[]:              a list of albums a[], one per topology file.
        a[]:                   a list of trajectory specifiers (each of length 4)
        traj. specifier:       [topfile, trajfile, slice, filter] where trajfile
                               is a string containing the trajectory filename,
                               topfile is the appropriate topology file, slice
                               is a string that defines which snapshots in the
                               trajectory are to be included, using the
                               conventional start:stop:step syntax that e.g.
                               numpy uses to slice arrays, and filter is the atom
                               selection string (MDAnalysis format).

    '''
    uniStr = []
    if args.input is None:
        try:
            # @charlie
            # There are one or more album files to process. Within an album,
            # all trajectory files will share the same topology file and filter
            # specification.
            for i in range(len(args.album)):
                log.debug('Reading album file {0}'.format(i))
                # sort out the selection string:
                if args.selection == None:
                    sel = 'name *'
                else:
                    if len(args.selection) == 1:
                        sel = args.selection[0]
                    else:
                        sel = args.selection[i]
                if args.mask is not None:
                    if len(args.mask) == 1:
                        sel = sel + ' and (' + pdb2selection(args.mask[0]) + ')'
                    else:
                        sel = sel + ' and (' + pdb2selection(args.mask[i]) + ')'
                # sort out the topology file string:
                if len(args.topology) == 1:
                    top = args.topology[0]
                else:
                    top = args.topology[i]

                # Files opened in text-mode
                for input_str in open(args.album[i]):
                    # Here, we should figure out whether the input_str contains
                    #  a "\n" and in that case not append a "null"-name file
                    # that would trigger an error in further processing. We
                    # should do this step before calling this function
                    # from the point where we read the single line of the album.
                    if input_str != '\n':
                        # EOLs are converted to '\n'
                        input_str = input_str.rstrip('\n')
                        l = input_parse(input_str)
                        a = [top, l[0], l[1], sel]
                        uniStr.append(a)
        except IOError as e:
            log.error("Problems while tried to process the album file.")
            log.error(("Check whether the album file does exist and whether"
                       " its name matches the name given in input.\n"))
            log.error("I/O error({0}): {1}".format(e.errno, e.strerror))
            sys.exit(-2)
    else:
        # @charlie
        '''
        One or more trajectory files have ben specified by the user, rather
        than one or more album files.
        '''
        for i in range(len(args.input)):
            log.debug('Reading trajectory file {0}'.format(i))
            # sort out the selection string:
            if args.selection == None:
                sel = 'name *'
            else:
                if len(args.selection) == 1:
                    sel = args.selection[0]
                else:
                    sel = args.selection[i]
            if args.mask is not None:
                if len(args.mask) == 1:
                    sel = sel + ' and (' + pdb2selection(args.mask[0]) + ')'
                else:
                    sel = sel + ' and (' + pdb2selection(args.mask[i]) + ')'
            # sort out the topology file string:
            if len(args.topology) == 1:
                top = args.topology[0]
            else:
                top = args.topology[i]

            input_str = args.input[i]
            l = input_parse(input_str)
            a = [top, l[0], l[1], sel]
            uniStr.append(a)

        # EPCC
    # @charlie
    # Now we can create the cofasu:
    f = []
    #Time reading/gathering of the trajectories in parallel
    time1start = time()
    i = 0
    for a in uniStr:
        log.debug('Cofasu:{0} {1} {2} {3}'.format(a[0], a[1], a[2], a[3]))
        f.append(Fasu(a[0], a[1], slice=a[2], filter=a[3], owner=i % size))
        i += 1
    time1end = time()
    try:
        cf = Cofasu(f)
    except(ValueError):
        log.error('Can\'t compile trajectory files - inconsistent sizes?')
        sys.exit(-1)


    # @charlie
    # not supported yet in this version...
    #    if args.trj_output is True:
    #        filter_mob = tempfile.NamedTemporaryFile(prefix="mob_sup_",suffix=".dcd",dir = temp).name
    #        dir = op.dirname(topology[0])
    #        filter_mob = op.join(dir,"filtered_trajectory.dcd")
    #        log.info('\nOutput trajectory file will be written in dcd format at the file: %s\n', filter_mob)
    #    else:
    #        filter_mob = None

    if args.nopca is False:
        # @charlie
        # run the pca analysis:
        if rank == 0:
            log.info('Running pca analysis')
        # Timing the pcz-analysis with trajectories distributed across processors
        time2start = time()
        p = pcz.Pcz(cf, quality=float(args.quality), req_evecs=args.evecs, rank=rank, version=args.file_version, preload=args.preload)
        time2end = time()
        if rank == 0:
            log.info("Writing compressed trajectory")

        if args.output is not None:
            output_file = args.output
        else:
            # The input trajectory file is a mandatory argument and the check
            # on this has been done previously.
            dir = op.dirname(uniStr[0][0][1])
            base_out_compressed = op.basename(uniStr[0][0][1])
            name_out_compressed = op.splitext(base_out_compressed)[0]
            output_file = op.join(dir, name_out_compressed + "_outputPython.pcz")

        if rank == 0:
            time_write_output_0 = time()
            p.write(output_file)
            time_write_output_1 = time()

        if args.pdb_out is not None:
            cf.writepdb(args.pdb_out, cf.coords(0))
        if rank == 0:
            totTime = time() - time0start
            log.info('Time for appending cofasus: {:.2f} s, {:.1f}% total runtime\n'.format(time1end - time1start, (
            time1end - time1start) / totTime * 100))
            log.info('Time for pcz-analysis: {:.2f} s, {:.1f}% total runtime\n'.format(time2end - time2start, (
            time2end - time2start) / totTime * 100))
            log.info('Time to write the output file: {:.2f} s, {:.1f}% total runtime\n'.format(
                time_write_output_1 - time_write_output_0, (time_write_output_1 - time_write_output_0) / totTime * 100))
            log.info('Total run time:: {:.2f} s\n'.format(totTime))