#!/usr/bin/env python
'''
Automates submitting tasks to a job scheduler for multiple input files
'''
import sys
import os
import re

if os.path.exists(os.path.join(os.path.dirname(os.path.realpath(__file__)), '..', 'qtask')):
    sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), '..'))

import qtask


def usage():
    print __doc__
    print '''Usage: mqsub {job-args} commands... -- input_files

One command is submitted to the job scheduler for each input file. If
"{}" appears in a command argument (or job argument), it will be replaced
with the current filename. You can also specify suffixes to remove from a 
filename. For example, "{^.txt}.gz" would remove .txt from the filename, and
add .gz. So, "filename.txt" would be replaced with "filename.txt". If the {}
arg is "{%}", then the basename of the file is used, as opposed to the full
path name. Similarly, {%^.txt} would use the basename and replace '.txt' in
the filename.

Example:
    mqsub -walltime 2:00:00 -ppn 4 process_file.sh {} \\> {^.txt}.processed.txt -- *.txt

    For files "input1.txt" and "input2.txt" this would run:
    $ process_file.sh input1.txt > input1.processed.txt
    $ process_file.sh input2.txt > input2.processed.txt

'''
    print qtask.QTask.__doc__

    print '''\
Other job arguments:
  -v        Verbose output (writes the submitted scripts to stdout)
  -dr       Dry-run - don't actually submit jobs, just generate the scripts

Monitoring arguments:
  -monitor uri    URI of a job-monitor (sqlite://filename.db)
  -project name   Project to associate with these tasks
  -sample  name   Sample name to associate with these tasks (can be {} form)

    '''
    sys.exit(1)

def _var_repl(arg, fname):
    m = re.match('^(.*){(.*)}(.*)$', arg)
    if m:
        if m.group(2):
            middle = m.group(2)
            if middle[0] == '%':
                fname = os.path.basename(fname)
                middle = middle[1:]
            if middle and middle[0] == '^':
                repl = fname[:fname.rfind(middle[1:])]
                return '%s%s%s' % (m.group(1), repl, m.group(3))
        return '%s%s%s' % (m.group(1), fname, m.group(3))
    else:
        return arg



def submit(cmd_ar, infiles, resources={}, verbose=False, dryrun=False):
    if 'monitor' in resources:
        qtask.pipeline.monitor = resources['monitor']
    if 'project' in resources:
        qtask.pipeline.project = resources['project']
    if 'sample' in resources:
        qtask.pipeline.sample = resources['sample']

    if not infiles:
        if 'jobname' in resources:
            jobname = resources['jobname']
        else:
            jobname = '%s' % (cmd_ar[0])

        task = qtask.QTask(' '.join(cmd_ar), jobname, resources)
        qtask.pipeline.add_task(task)

    else:
        for fname in infiles:
            cmd_repl = []
            job_resources = {}

            for c in cmd_ar:
                cmd_repl.append(_var_repl(c, fname))

            if 'jobname' in resources:
                jobname = resources['jobname']
            else:
                jobname = '%s.%s' % (cmd_ar[0], fname)

            for k in resources:
                val = resources[k]
                if type(val) == str:
                    job_resources[k] = _var_repl(val, fname)
                else:
                    job_resources[k] = val


            task = qtask.QTask(' '.join(cmd_repl), jobname, job_resources)

            if 'sample' in resources:
                qtask.pipeline.sample = _var_repl(resources['sample'], fname)

            qtask.pipeline.add_task(task)

    qtask.pipeline.submit(verbose=verbose, dryrun=dryrun)


if __name__ == '__main__':

    in_cmd = False
    in_files = False
    in_args = True

    resources = {'env': True}
    cmd_ar = []
    infiles = []

    verbose = False
    dryrun = False

    last = None
    for arg in sys.argv[1:]:
        if in_args:
            if last:
                resources[last[1:]] = arg
                last = None
            elif arg == '-v':
                verbose = True
            elif arg == '-dr':
                dryrun = True
            elif arg in ['-holding', '-env']:
                resources[arg[1:]] = True
            elif arg[0] == '-':
                last = arg
            else:
                in_args = False
                in_cmd = True
                cmd_ar.append(arg)

        elif in_cmd:
            if arg == '--':
                in_files = True
                in_cmd = False
            else:
                cmd_ar.append(arg)

        elif in_files:
            infiles.append(arg)


    if not cmd_ar:
        usage()

    submit(cmd_ar, infiles, resources, verbose, dryrun)
