#!/usr/bin/env python

"""
Create formatted sequence alignments with optional pdf output.
"""

import sys
import argparse
import csv

from alnvu import util, pdf, html
from alnvu import __version__, exit_on_sigint, exit_on_sigpipe

def get_range(rawrange):

    def handle_badrange():
        print('Error in "-r/--range %s": argument requires two integers separated by a comma.' % rawrange)
        sys.exit(1)

    try:
        start, stop = [int(x) for x in rawrange.split(',')]
    except ValueError:
        handle_badrange()
    except AttributeError:
        handle_badrange()

    return [start, stop]

def main(arguments):

    parser = argparse.ArgumentParser(description=__doc__, version = __version__)
    parser.add_argument(
        "infile", type = argparse.FileType('rU'), nargs = '?',
        default = sys.stdin,
        help = "Input file in fasta format (reads stdin if missing)")

    parser.add_argument("-q", "--quiet", dest="quiet",
        help="Suppress output of alignment to screen.",
        action='store_true', default = False)

    parser.add_argument("--empty-ok",
        help="Exit with zero status if infile contains no sequences",
        action='store_true', default = False)

    ## layout
    layout_options = parser.add_argument_group('Layout')

    layout_options.add_argument("-w", "--width", dest="ncol",
        help="Width of sequence to display in each block in characters [%(default)s]",
        metavar="NUMBER", type = int, default = 115)

    layout_options.add_argument("-L", "--lines-per-block", dest="nrow",
        help="Sequences (lines) per block. [%(default)s]",
        metavar="NUMBER", type = int, default = 75)

    ## columns
    column_options = parser.add_argument_group('Column selection')

    column_options.add_argument("-x", "--exclude-invariant", dest="exclude_invariant",
        help="""Show only columns with at least N non-consensus
bases (set N using the '-a/--min-subs')""",
        action='store_true', default = False)

    column_options.add_argument("-g", "--include-gapcols", dest="include_gapcols",
        help="Show columns containing only gap characters.",
        action='store_true', default = False)

    column_options.add_argument(
        "-r", "--range", dest="rawrange",
        help="Range of columns to display (eg '-r start,stop')",
        metavar='INTERVAL')

    column_options.add_argument("-s", "--min-subs", dest="min_subs",
        help="Minimum NUMBER of substitutions required to define a position as variable. [%(default)s]",
        metavar="NUMBER", type = int, default = 1)

    ## consensus
    consensus_options = parser.add_argument_group('Consensus display and sequence appearance')

    consensus_options.add_argument("-c", "--consensus", dest="add_consensus",
        help="Show the consensus sequence [%(default)s]",
        action='store_true', default = False)

    consensus_options.add_argument(
        "-d", "--compare-to", dest="compare_to",
        help="""Identify the reference sequence by name. Nucleotide
        positions identical to the reference will be replaced with
        `--simchar`. The default behavior is to use the consensus
        sequence as a reference.""", metavar='NAME')

    consensus_options.add_argument(
        "-D", "--no-comparison", action = 'store_false', dest='compare',
        default = True,
        help = 'Show all bases (ie, suppress comparsion with the reference sequence).')

    consensus_options.add_argument(
        "-G", "--ignore-gaps", dest="include_gaps", action = "store_false",
        default = True, help = 'Ignore gaps in the calculation of a consensus.')

    consensus_options.add_argument("--simchar",
        help="""Character representing a base identical to the consensus. Specify
        the empty string to identify differences using lower case [default
        '%(default)s']""", metavar='CHARACTER', default = '.')

    consensus_options.add_argument('-t', '--reference-top', action='store_true', default=False,
        help="Would like the reference/consensus sequence on the top or the bottom?")

    ## annotation
    name_options = parser.add_argument_group('Sequence annotation')

    name_options.add_argument("-i", "--number-sequences", dest="seqnums",
        help="Show sequence number to left of name.",
        action='store_true', default = False)

    name_options.add_argument("-n", "--name-max", dest="name_max",
        help="Maximum width of sequence name in characters [%(default)s]",
        metavar="NUMBER", type = int, default = 35)

    name_options.add_argument("-N", "--name-split", dest="name_split",
        help="""Specify a character delimiting sequence names. By
        default, the name of each sequence is the first
        whitespace-delimited word. '--name-split=none' causes the entire line after the '>' to be displayed.""",
        metavar="CHARACTER")

    name_options.add_argument(
        "-S", "--sort-by-name", type = argparse.FileType('rU'), metavar = 'FILE',
        help = "File containing sequence names defining the sort-order of the sequences in the alignment.")

    name_options.add_argument(
        "--rename-from-file", type = argparse.FileType('rU'), metavar = 'FILE',
        help = "headerless csv file with columns 'old-name','new-name' to use for renaming the input sequences. If provided, renaming occurs immediately after reading the input sequences.")

    if util.treeorder:
        name_options.add_argument(
            "-T", "--sort-by-tree", type = argparse.FileType('rU'), metavar = 'FILE',
            help = "File containing a newick-format tree defining the sort-order of the sequences in the alignment (requires biopython).")


    ## Output options for both pdf and html
    output_options = parser.add_argument_group('Output options for pdf or html output')

    output_options.add_argument("-o", "--outfile", dest="outfile",
        help="Write output to a file.")

    output_options.add_argument("-F", "--fontsize", dest="fontsize",
        help="Font size for pdf output [%(default)s]",
        metavar="NUMBER", default=7, type = int)

    # For decide what output format to use
    out_format_group = output_options.add_mutually_exclusive_group()
    out_format_group.add_argument('--stdout', dest='output_format', action='store_const', const='stdout',
            default='stout', help='write to standard out (default)')
    # XXX - See if default above does the trick for specifying this as default
    out_format_group.add_argument('-P', '--pdf', dest='output_format', action='store_const', const='pdf',
        help="Write out to pdf")
    out_format_group.add_argument('-H', '--html', dest='output_format', action='store_const', const='html',
        help="Write out to html")

    ## Specifically for html output:
    html_options = parser.add_argument_group('HTML output')
    html_options.add_argument('-a', '--annotate')
    html_options.add_argument('--annotation-file', type=argparse.FileType('r'))
    html_options.add_argument('--table-only', action='store_true', default=False,
            help="""Don't spit out a full html document, just the alignment table and style tags. Handy if
            you'd like to include in another document.""")

    ## pdf options (only if reportlab is installed)
    if pdf.print_pdf:
        pdf_options = parser.add_argument_group(
            'PDF output',
            'These options require reportlab.')

        pdf_options.add_argument(
            "-O", "--orientation", dest="orientation",
            help="Set page orientation; choose from portrait, landscape [%(default)s]",
            metavar="ORIENTATION", default = 'portrait',
            choices = ('portrait','landscape'))

        pdf_options.add_argument(
            "-b", "--blocks-per-page", dest="blocks_per_page",
            metavar="NUMBER", type = int, default = 1,
            help="Number of aligned blocks of sequence per page [%(default)s]")


    args = parser.parse_args(arguments)

    # Ignore SIGPIPE, for head support
    exit_on_sigpipe()
    exit_on_sigint()

    name_split = False if args.name_split == 'none' else args.name_split
    seqs = list(util.readfasta(args.infile, name_split = name_split))

    if not seqs:
        if not args.quiet:
            print 'No sequences in input'
        return 0 if args.empty_ok else 1

    if args.rename_from_file:
        namedict = dict(row for row in csv.reader(args.rename_from_file))
        for seq in seqs:
            seq.name = namedict.get(seq.name, seq.name)

    if args.sort_by_name:
        sortdict = dict((line.strip(), i) for i, line in enumerate(args.sort_by_name))
    elif getattr(args, 'sort_by_tree', None):
        names = util.treeorder(args.sort_by_tree)
        sortdict = dict((line.strip(), i) for i, line in enumerate(names))
    else:
        sortdict = {}

    if sortdict:
        seqs.sort(key = lambda seq: (sortdict.get(seq.name), seq.name))

    formatted_seqs, vnumstrs, mask = util.reformat(
        seqs,
        add_consensus = args.add_consensus,
        compare = args.compare,
        compare_to = args.compare_to,
        exclude_gapcols = not args.include_gapcols,
        exclude_invariant = args.exclude_invariant,
        min_subs = args.min_subs,
        simchar = args.simchar,
        countGaps = args.include_gaps,
        seqrange = get_range(args.rawrange) if args.rawrange else None,
        reference_top = args.reference_top)

    if args.output_format == 'html':
        if args.annotation_file:
            annotations = html.AnnotationSet.from_mapping_file(args.annotation_file, mask)
        else:
            annotations = None

        html.print_html(formatted_seqs, vnumstrs, mask,
            outfile = args.outfile,
            annotations = annotations,
            fontsize = args.fontsize,
            seqnums = args.seqnums)

    all_numstrs = args.exclude_invariant or not args.include_gapcols

    pages = util.pagify(formatted_seqs, vnumstrs,
        nrow = args.nrow,
        ncol = args.ncol,
        name_min = 10,
        name_max = args.name_max,
        seqnums = args.seqnums,
        all_numstrs=all_numstrs)

    if not args.quiet:
        for page in pages:
            for line in page:
                print line.rstrip()
            print ''

    if args.output_format == 'pdf':
        pdf.print_pdf(
            pages,
            outfile = args.outfile,
            fontsize = args.fontsize,
            orientation = args.orientation,
            blocks_per_page = args.blocks_per_page
            )

if __name__ == '__main__':
    sys.exit(main(sys.argv[1:]))
