#!/usr/bin/env python
#
# Copyright John Reid 2012
#

"""
Uses STEME to scan sequences for PWMs.
"""

import logging
logging.basicConfig(level=logging.INFO)
import stempy, numpy, sys
from Bio import Motif



#
# Parse options and arguments from command line
#
sys.argv = [ a.encode(sys.stdin.encoding or 'ascii') for a in sys.argv ]
options, args = stempy.parse_options(stempy.add_options)


#
# Check we have the correct number of arguments
#
if len(args) != 2:
    raise RuntimeError('USAGE: %s <options> motifs-file fasta-file', sys.argv[0])
motifs_file = args.pop(0)
fasta_file = args.pop(0)


#
# Load the motifs
#
logging.info('Loading motifs from: %s', motifs_file)
motifs = list(Motif.parse(open(motifs_file),"MEME"))


#
# Load the sequences
#
input_sequences = stempy.SequenceSet(fasta_file, options)


#
# Initialise the background
#
mm, freqs, freqs_with_pseudo_counts = input_sequences.build_model_of()
input_sequences.calculate_likelihoods(mm)



#
# For each motif
#
for i, motif in enumerate(motifs):
    logging.info('Motif %2d consensus: %s', i, motif.consensus())


    #
    # Create the instance finder
    #
    instance_finder = stempy.FindInstances(input_sequences.data, input_sequences.create_model(motif.length), .3)
    instance_finder()
    
