#!/usr/bin/env python
#
# Copyright John Reid 2012
#

"""
Finds occurrences of pairs of motifs of a given spacing.
"""

import logging
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
from optparse import OptionParser
from collections import defaultdict
import os, pyicl, numpy, pylab, cPickle
from stempy.spacing import parse_occurrences, footprint, handle_pairs, parse_spacings, \
    spacing_str, load_occurrences, load_spacings, add_options, spacing_header, \
    PairOccurrence, make_pair_handler_from_primary_handler


def found_pair_occurrence(spacing, primary):
    pair_occ_str = '%s: %6d %7d %s' % (
        spacing_str(spacing), primary.seq, primary.pos, primary.strand
    )
    logging.info(pair_occ_str)
    logging.debug('%s %s', primary.wmer, primary.wmer)
    if output:
        print >>output, pair_occ_str
    pair_occurrences.append(
        PairOccurrence(
            spacing=spacing, 
            seq=primary.seq, 
            pos=primary.pos,
            strand=primary.strand
        )
    )


def handle_primary(max_distance, primary, secondary, distance, upstream, same_strand):
    """Update the spacings array.
    """
    for spacing in spacings[primary.motif, secondary.motif]:
        if spacing.distance == distance and spacing.same_strand == same_strand and spacing.upstream == upstream:
            found_pair_occurrence(spacing, primary)


        
pair_header = ': %6s %7s %s' % ('Seq', 'Pos', 'Strand')


parser = OptionParser()
add_options(parser)
parser.add_option(
    "-o",
    default=None,
    dest='output',
    help="Write output to FILE.",
    metavar="FILE"
)
options, args = parser.parse_args()


#
# Now we know where we will write output to, start a log file
#
formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
log_file_handler = logging.FileHandler(os.path.join(options.results_dir, 'steme-find-spacings-max=%d.log' % options.max_distance), mode='w')
log_file_handler.setFormatter(formatter)
log_file_handler.setLevel(logging.INFO)
logging.getLogger('').addHandler(log_file_handler)



#
# Load the occurrences and associated sequence lengths
#
occurrences, seq_infos, motifs = load_occurrences(options)

#
# Load the file containing the spacings to look for
#
spacings = load_spacings(args[0], motifs, options)
logging.info('')



#
# Open output file
#
output = None
if options.output:
    output_filename = os.path.join(options.results_dir, options.output)
    logging.info('Writing pairs to %s', output_filename)
    output = open(output_filename, 'w')


#
# Iterate through the occurrences finding spacings
#
logging.info(
    'Examining spacings of up to %d b.p. between %d occurrences of %d motifs in %d sequences',
     options.max_distance, len(occurrences), len(motifs), len(seq_infos)
)
pair_occurrences = []
logging.info(spacing_header + pair_header)
logging.info('*' * (len(spacing_header) + len(pair_header)))
pair_handler = make_pair_handler_from_primary_handler(handle_primary, ignore_close_to_end=False, options=options)
handle_pairs(occurrences, seq_infos, pair_handler, options)


if options.output:
    output.close()



