"""Provide analysis of input files by chromosomal regions.

Handle splitting and analysis of files from chromosomal subsets separated by
no-read regions.
"""
import os

from bcbio.distributed.split import (parallel_split_combine,
                                     grouped_parallel_split_combine)
from bcbio import utils
from bcbio.variation import genotype, multi

# ## BAM preparation

def _split_by_regions(regions, dirname, out_ext, in_key):
    """Split a BAM file data analysis into chromosomal regions.
    """
    def _do_work(data):
        bam_file = data[in_key]
        part_info = []
        base_out = os.path.splitext(os.path.basename(bam_file))[0]
        nowork = [["nochrom"], ["noanalysis", regions["noanalysis"]]]
        for region in regions["analysis"] + nowork:
            out_dir = os.path.join(data["dirs"]["work"], dirname, data["name"][-1], region[0])
            if region[0] in ["nochrom", "noanalysis"]:
                region_str = region[0]
            else:
                region_str = "_".join([str(x) for x in region])
            region_outfile = os.path.join(out_dir, "%s-%s%s" %
                                          (base_out, region_str, out_ext))
            part_info.append((region, region_outfile))
        out_file = os.path.join(data["dirs"]["work"], dirname, data["name"][-1],
                                "%s%s" % (base_out, out_ext))
        return out_file, part_info
    return _do_work

def parallel_prep_region(samples, regions, run_parallel):
    """Perform full pre-variant calling BAM prep work on regions.
    """
    file_key = "work_bam"
    split_fn = _split_by_regions(regions, "bamprep", "-prep.bam", file_key)
    return parallel_split_combine(samples, split_fn, run_parallel,
                                  "piped_bamprep", None, file_key, ["config"])

def delayed_bamprep_merge(samples, run_parallel):
    """Perform a delayed merge on regional prepared BAM files.
    """
    needs_merge = False
    for data in samples:
        if (data[0]["config"]["algorithm"].get("merge_bamprep", True) and
            data[0].has_key("combine")):
            needs_merge = True
            break
    if needs_merge:
        return run_parallel("delayed_bam_merge", samples)
    else:
        return samples

# ## Variant calling

def _split_by_ready_regions(output_ext, file_key, dir_ext_fn):
    """Organize splits into pre-built files generated by parallel_prep_region
    """
    def _do_work(data):
        if not data["region"][0] in ["nochrom", "noanalysis"]:
            bam_file = data[file_key]
            ext = output_ext
            chrom, start, end = data["region"]
            base = os.path.splitext(os.path.basename(bam_file))[0]
            noregion_base = base[:base.index("-%s_%s_%s" % (chrom, start, end))]
            out_dir = os.path.join(data["dirs"]["work"], dir_ext_fn(data))
            out_file = os.path.join(out_dir, "{noregion_base}{ext}".format(**locals()))
            out_parts = []
            if not utils.file_exists(out_file):
                out_region_dir = os.path.join(out_dir, chrom)
                out_region_file = os.path.join(out_region_dir, "{base}{ext}".format(**locals()))
                out_parts = [(data["region"], out_region_file)]
            return out_file, out_parts
        else:
            return None, []
    return _do_work

def parallel_variantcall_region(samples, run_parallel):
    """Perform variant calling and post-analysis on samples by region.
    """
    to_process = []
    for x in samples:
        to_process.extend(genotype.handle_multiple_variantcallers(x))
    split_fn = _split_by_ready_regions("-variants.vcf", "work_bam", genotype.get_variantcaller)
    return grouped_parallel_split_combine(to_process, split_fn,
                                          multi.group_batches, run_parallel,
                                          "variantcall_sample", "split_variants_by_sample",
                                          "combine_variant_files",
                                          "vrn_file", ["sam_ref", "config"])

def clean_sample_data(samples):
    """Clean unnecessary information from sample data, reducing size for messaging passing.
    """
    out = []
    for data in samples:
        data["dirs"] = {"work": data["dirs"]["work"], "galaxy": data["dirs"]["galaxy"]}
        data["config"] = {"algorithm": data["config"]["algorithm"],
                          "resources": data["config"]["resources"]}
        data["info"] = {"upload": data["info"].get("upload"),
                        "analysis": data["info"].get("analysis")}
        for remove_attr in ["config_file", "regions"]:
            if data.has_key(remove_attr):
                del data[remove_attr]
        out.append([data])
    return out
