#!/usr/bin/env python3

import os
import re
from glob import glob

import argparse
## samflow work control
from samflow.command import ShellCommand, PythonCommand
from samflow.workflow import Workflow, attach_back

## config file parser
from gcap.funcs.config import Conf
from gcap.funcs.fragment import fragment_size

from gcap.funcs.library_complexity import library_complexity
from gcap.funcs.nsc_rsc import strand_cor
from gcap.funcs.pdf_report import report
from gcap.funcs.peaks_calling import call_peaks
from gcap.funcs.mapping import lib_contamination, reads_mapping
from gcap.funcs.sequence_quality import *

from pkg_resources import resource_filename
from gcap.funcs.union_dhs_overlap import union_DHS_overlap

def parse_args(args=None):
    """
    If args is None, argparse will parse from sys.argv
    """
    description = "GCAP :  Global Chromatin Accessibility Pipeline"
    parser = argparse.ArgumentParser(description=description)
    sub_parsers = parser.add_subparsers(help="sub-command help", dest="sub_command")

    parser_run = sub_parsers.add_parser("run", help="run pipeline using a config file",
        description="GCAP-run: Run GCAP pipeline using a config file")

    parser_batch = sub_parsers.add_parser("batch", help="run pipeline for multiple datasets")
    parser_batch.add_argument("-b", "--batch-config", dest="batch_config", required=True, help="batch file")
    parser_purge = sub_parsers.add_parser("purge")
    parser_clean = sub_parsers.add_parser("clean", help="Move result file into a new folder and delete other files",
        description="GCAP-run: Run GCAP pipeline using a config file")

    for p in (parser_run, parser_batch):
        p.add_argument("--from", dest="start_step", default=0, type=int,
            help="Only step after this number will be processed")
        p.add_argument("--to", dest="end_step", default=100, type=int,
            help="Only step before this number will be processed ")
        p.add_argument("--skip", dest="skip_step", default="",
            help="Steps to skip, use comma as seperator")

    for p in (parser_run, parser_batch, parser_purge, parser_clean):
        p.add_argument("-v", "--verbose-level", dest="verbose_level", type=int, default=2)
        p.add_argument("--dry-run", dest="dry_run", action="store_true", default=False)
        p.add_argument("--allow-dangling", dest="allow_dangling", action="store_true", default=False)
        p.add_argument("--resume", dest="resume", action="store_true", default=False)
        p.add_argument("--remove", dest="clean", action="store_true", default=False)

    for p in (parser_run, parser_purge, parser_clean):
        p.add_argument("-c", "--config", dest="config", required=True,
            help="specify the config file to use", )

    return parser.parse_args(args)

class StepChecker:
    def __init__(self, start, end, skips):
        """control step"""
        self.start = start
        self.end = end
        self.skips = skips

    def need_run(self, step_id):
        if step_id < self.start:
            return False
        if step_id > self.end:
            return False
        if step_id in self.skips:
            return False
        return True

class GCAPBuilder:
    def __init__(self, workflow, conf, tex):
        """append to workflow"""
        self.workflow = workflow
        self.conf = conf
        self.tex = tex
        self.LaTex_fragments = []
        self.plain_fragments = []
        self.finished = set()

    def build(self, prep_func, tag=None):
        prep_func(self.workflow, self.conf, self.tex)
        if tag:
            self.finished.add(tag)

    def attach_back(self, command):
        attach_back(self.workflow, command)

def prepare_clean_up(workflow, conf, tex):
    """
    package all the necessary results and delete temporary files
    preserve bed, bigwiggle, starch, bam and pdf report
    """
    p_list = ['*.xls', '*_summits.bed', '*_peaks.bed', '*.bw', '*treat*.bed.starch',  "*final",
              'json', '*_treat*-both-passes', "*.conf", "*report.tex", "*report.pdf"]

    p_pattern = [os.path.join(conf.target_dir, p) for p in p_list]

    final_dir = conf.target_dir + '/dataset_' + conf.id
    attach_back(workflow,
        ShellCommand("if [ ! -d '{output}' ]; then mkdir -p {output}; fi",
            output=final_dir))

    for pf in p_pattern:
        if not glob(pf):
            print(pf)
            continue
        move = attach_back(workflow,
            ShellCommand('mv {param[preserve_files]} {output[dir]} \n# Pattern: {param[p_pattern]}',
                output={"dir": final_dir},
                param={"preserve_files": " ".join(glob(pf)),
                       "p_pattern": pf}, ))
        move.allow_fail = True

def prepare_purge(workflow, conf, tex):
    d_pattern = os.path.join(conf.target_dir, "*")

    for df in glob(d_pattern):
        if "dataset" in df:
            print(df)
            continue
        deleted = attach_back(workflow,
            ShellCommand("rm -r {param[deleted_files]}",
                param={"deleted_files": " ".join(glob(df))}))
        deleted.allow_fail = True

def create_workflow(args, conf, tex):
    workflow = Workflow(name="Main")
    bld = GCAPBuilder(workflow, conf, tex)

    if args.sub_command == "clean":
        ## not implemented yet
        bld.build(prepare_clean_up)
        return workflow

    if args.sub_command == "purge":
        bld.build(prepare_purge)
        return workflow

    if args.skip_step:
        skipped_steps = [int(i) for i in args.skip_step.split(",")]
    else:
        skipped_steps = []

    step_checker = StepChecker(args.start_step, args.end_step, skipped_steps)

    need_run = step_checker.need_run

    bld.attach_back(ShellCommand(
        "if [ ! -d '{output}' ]; then mkdir -p {output}; fi",
        output=conf.target_dir))

    if need_run(1):
        ## funcs.sequence_quality
        bld.build(seq_quality)

    if need_run(2) and not re.search(r"\.bam",conf.treatment_bam[0]):
        ## funcs.library_complexity
        bld.build(lib_contamination)

    if need_run(3):
        bld.build(reads_mapping)

    if need_run(4):
        bld.build(library_complexity)

    if need_run(5):
        if not conf.seq_type.startswith("bed"):
            bld.build(strand_cor)

    if need_run(6):
        bld.build(fragment_size)

    if need_run(7):
        bld.build(call_peaks)

    if need_run(8):
        bld.build(union_DHS_overlap)

    if need_run(9):
        bld.build(report)
    return workflow

def main(args=None):
    args = parse_args(args)
    print("Arguments:", args)

    ## latex template
    tex = resource_filename("gcap", "static/gcap_template.tex")

    if args.sub_command in ["run", "clean", "purge"]:
        conf = Conf(args.config)
        workflow = create_workflow(args, conf, tex)

        workflow.set_option(
            verbose_level=args.verbose_level,
            dry_run_mode=args.dry_run,
            resume=args.resume,
            allow_dangling=args.allow_dangling)

        workflow.invoke()

    elif args.sub_command == "batch":
        with open(args.batch_config) as batch_file:
            for a_conf in batch_file:
                a_conf = a_conf.strip()
                conf = Conf(a_conf)
                workflow = create_workflow(args, conf, tex)
                workflow.set_option(
                    verbose_level=args.verbose_level,
                    dry_run_mode=args.dry_run,
                    resume=args.resume,
                    allow_dangling=args.allow_dangling)
                workflow.invoke()

if __name__ == "__main__":
    main()
