#!/usr/bin/env python
#
# Copyright John Reid 2012
#

"""
Calculates statistics for scan results.
"""

import logging
logging.basicConfig(level=logging.INFO,
                    format="%(asctime)s - %(levelname)s - %(message)s")
logger = logging.getLogger(__name__)

from optparse import OptionParser
import os
import pyicl
import numpy
import pylab
import cPickle
from stempy.scan import load_occurrences, add_options, create_html_output, \
    get_sites_by_motif, write_seq_centric_stats


parser = OptionParser(
    usage="%prog",
    description="Calculates statistics for scan results.")
add_options(parser)
parser.add_option(
    "--dataset-name",
    default='unknown data',
    help="Use DATASETNAME as the name of the input sequences in the "
        "HTML output.",
    metavar="DATASETNAME"
)
options, args = parser.parse_args()


#
# Now we know where we will write output to, start a log file
#
formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
log_filename = os.path.join(options.results_dir, 'steme-scan-stats.log')
logger.info('Writing log file to %s', log_filename)
log_file_handler = logging.FileHandler(log_filename, mode='w')
log_file_handler.setFormatter(formatter)
log_file_handler.setLevel(logging.INFO)
logging.getLogger('').addHandler(log_file_handler)


#
# Load the occurrences and associated sequence lengths
#
occurrences, seq_infos, motifs = load_occurrences(options)
motifs = list(motifs)
motifs.sort()
by_motif = get_sites_by_motif(occurrences)


#
# Create the HTML output.
#
create_html_output(options.dataset_name, motifs,
                   occurrences, by_motif, seq_infos, options)

#
# Write sequence-centric statistics
#
seq_centric_filename = os.path.join(options.results_dir, 'scan-seq-centric.csv')
logging.info('Writing sequence centric statistics to: %s',
             seq_centric_filename)
with open(seq_centric_filename, 'w') as out:
    write_seq_centric_stats(out, motifs, occurrences, seq_infos, options)