#!/usr/bin/env python
# -*- coding: utf-8 -*-

from sequenza.sequenza import *


VERSION = "0.0.1"
DATE    = "13 March 2014"
AUTHOR  = "Favero Francesco"
MAIL    = "favero@cbs.dtu.dk"

def main():
   '''
   Execute the function with args
   '''
   parser = DefaultHelpParser(prog = __file__, formatter_class=lambda prog: SubcommandHelpFormatter(prog, max_help_position=20, width=75),
                              description='Sequenza Utils is an ensemble of tools capable of perform various tasks, primarily aimed to convert bam/pileup files to a format usable by the sequenza R package',
                              usage= '%(prog)s module [options]', epilog = 'This is version {0} - Francesco Favero - 2013'.format(VERSION))
   subparsers = parser.add_subparsers(dest='module')
   subparsers.metavar = None
   parser_pileup2abfreq  = subparsers.add_parser('pileup2abfreq', help = ' given a paired set of pileup (normal and matching tumor), and GC-content genome-wide information returns the common positions with A and B alleles frequencies',formatter_class=lambda prog: SubcommandHelpFormatter(prog,max_help_position=39, width=90))
   parser_reduce_abfreq = subparsers.add_parser('abfreq-binning', help = 'Binning the abfreq file to reduce file size and memory requirement for the analysis.')
   parser_pup2mu = subparsers.add_parser('pileup2acgt', help = 'convert pileup format to ACGT format',formatter_class=lambda prog: SubcommandHelpFormatter(prog,max_help_position=30, width=90))
   parser_gc_window  = subparsers.add_parser('GC-windows', help = 'Given a fasta file and a window size it computes the GC percentage across the sequences, and returns a file in the same format as gc5Base from UCSC')
   parser_merge_pileups = subparsers.add_parser('merge-pileups', help = 'Merging two pileups, it finds the common positions and return an mpileup file adding the second pilep as last 3 columns.')
   try:
      import sys
      used_module =  sys.argv[1]
      if used_module == "pileup2acgt":
         args = pileup2acgt(parser, parser_pup2mu)
         import multiprocessing, logging, time, os
         from functools import partial
         from sequenza.misc import xopen, grouper
         from sequenza.pileup import parse_pileup_str
         if args.chunk == 0:
            args.chunk = 1
            args.nproc  = 0
         if args.nproc >= 1:
            p = multiprocessing.Pool(processes=args.nproc)
         if not args.quiet:
            logging.basicConfig(format='%(message)s')
            start = time.clock()
            if args.pileup != '-':
               file_size = (os.stat(args.pileup).st_size/(1024*1024))
               logging.warning("Converting " + args.pileup + " -- size = %0.1f MB --" % file_size + " to ACGT..." )
            else:
               logging.warning("Converting " + args.pileup + " from STDIN to ACGT..." )
            logging.warning("Using chunks of " + str(args.chunk) + " line(s), and splitting the job in " + str(args.nproc+1)  + " process(es).")
         with xopen(args.output, "wb") as fileout:
            with xopen(args.pileup, "rb") as f:
               fileout.write('chr' + "\t" + 'n_base' + "\t" + 'ref_base' + "\t" +  'read.depth' + "\t" + 'A' + "\t" + 'C' + "\t" + 'G' + "\t" + 'T' + '\n')
               parse_pileup_partial = partial(parse_pileup_str, min_depth=args.n, qlimit=args.qlimit, qformat=args.qformat)
               counter = 0
               for chunk in grouper(args.chunk, f):
                  if args.nproc >= 1:
                     try:
                        results = p.map_async(parse_pileup_partial, chunk).get(99)
                     except AttributeError:
                        pass
                  else:
                     try:
                        results = map(parse_pileup_partial, chunk)
                     except AttributeError:
                        pass
                  for r in results:
                     counter = counter + 1
                     if r:
                        fileout.write(r + '\n')
            if not args.quiet:
               end = time.clock()
               seconds =  end-start
               logging.warning("Pileup to Mufreq: processed " + str(counter) + " lines in " + str(seconds) + " seconds")

      elif used_module == "pileup2abfreq":
         args = pileup2abfreq(parser, parser_pileup2abfreq)
         import multiprocessing
         from functools import partial
         from sequenza.misc import xopen
         from sequenza.pileup import multiPileups, GCmultiPileups
         from sequenza.utils import line_worker
         with xopen('-', "wb") as fileout:
            out_header = ["chromosome", "n.base", "base.ref", "depth.normal", "depth.sample", "depth.ratio", "Af", "Bf", "ref.zygosity", "GC.percent", "good.s.reads", "AB.germline", "AB.sample"]
            p1 = args.reference
            p2 = args.sample
            gc = args.gc
            line_worker_partial = partial(line_worker, depth_sum=args.n, qlimit=args.qlimit, qformat=args.qformat, hom_t=args.hom, het_t=args.het)
            with xopen(p1, 'rb') as normal, xopen(p2, 'rb') as tumor, xopen(gc, 'rb') as gc_file:
               pup = multiPileups(normal,tumor)
               pup = GCmultiPileups(pup, gc_file)
               fileout.write("\t".join(out_header) + '\n')
               if args.chunk > 1 or args.nproc > 0:
                  #p = ThreadPool(processes=args.nproc)
                  p = multiprocessing.Pool(processes=args.nproc)
                  for res in p.imap(line_worker_partial, pup,chunksize=args.chunk):
                     #for res in results.get(99):
                     if res:
                        fileout.write('\t'.join(map(str,res))+'\n')
               else:
                  for line in pup:
                     res = line_worker_partial(line)
                     if res:
                        fileout.write('\t'.join(map(str,res))+'\n')


      elif used_module == "GC-windows":
         args = GC_windows(parser, parser_gc_window)
         import multiprocessing
         from multiprocessing.queues import SimpleQueue
         from sequenza.misc import xopen
         from sequenza.utils import stream_fasta, process_gc_from_pipe
         gc_queue   = SimpleQueue()
         gc_process = multiprocessing.Process(target = process_gc_from_pipe, args = (gc_queue, args.window))
         gc_process.deamon = True
         gc_process.start()
         with xopen(args.fasta, 'rb') as fa_file:
            stream_fasta(fa_file, args.window, gc_queue)
         #gc_process.terminate()
         #gc_process.join()

      elif used_module == "merge-pileups":
         args = merge_pileups(parser, parser_merge_pileups)
         from sequenza.misc import xopen
         from sequenza.pileup import multiPileups
         with xopen(args.p1, 'rb') as pileup1, xopen(args.p2, 'rb') as pileup2:
            pup = multiPileups(pileup1,pileup2)
            for line in pup:
               print('\t'.join(map(str, line)))

      elif used_module == "abfreq-binning":
         args = reduce_abfreq(parser, parser_reduce_abfreq)
         from sequenza.misc import xopen
         from sequenza.utils import abfreReduce         
         with xopen(args.abfreq, 'rb') as abfreq:
            abfred = abfreReduce(abfreq, args.w)
            print abfred._header
            for a in abfred:
               if a:
                  print '\t'.join(a['top'])
                  for mid in a['middle']:
                     print '\t'.join(mid)
                  if ['end'] != ['top']:
                     print '\t'.join(a['end'])
      else:
         return parser.parse_args()

   except IndexError:
      args = parser.parse_args()

if __name__ == "__main__":
   main()
