#!/usr/bin/env python
# -*- coding: utf-8 -*-

from py_rstr_max import rstr_max
import itertools, operator
'''
    findsubstrings
    ~~~~~~~~~~~~~~

    The Python-dna package.


'''

def common_sub_strings(stringx, stringy, limit=25):
    '''
    common_sub_strings(stringx , stringy , limit=25)

    Finds all the common substrings between stringx and stringy
    longer than limit. This function is case sensitive.

    returns a list of tuples describing the substrings
    The list is sorted longest -> shortest.

    Example:

    [(startx1,starty1,length1),(startx2,starty2,length2), ...]

    startx1 = position in x where substring 1 starts
    starty1 = position in y where substring 1 starts
    length  = lenght of substring

    '''
    rstr = rstr_max.Rstr_max()
    rstr.add_str(stringx+"&"+stringy)
    r = rstr.go()
    match=[]
    for (offset_end, nb), (l, start_plage) in r.iteritems():
        startsx=[]
        startsy=[]
        if l<limit:
            continue
        for o in range(start_plage, start_plage + nb):
            offset = rstr.idxPos[rstr.res[o]]
            if offset>len(stringx):
                startsy.append(offset-len(stringx)-1)
            else:
                startsx.append(offset)
        for a,b in itertools.product(startsx,startsy):
            match.append((a,b,l))
    match.sort(key=operator.itemgetter(0))
    match.sort(key=operator.itemgetter(2), reverse=True)
    return match

if __name__=="__main__":
    #    x='atctgaacgctttgaatgttgtctctattccacgaggcattcaaaaactggttaccgaacctcaagactaaaagattcttgaccaactctttacccaagtaatggtcaattctgtacaactcttcttctttaaagaggggccccaggtttttttgcagctccctggcagaggccaggtcgtggccgaaaggtttctctacgattacacgggtgatgccattctctgcgtacacacgactcttgatctgcttggccaccgtcaaaaaaacgcttggcggcaaggccagatagaagagacggtgtgggacatcgacgttggcacttttctcgaatttctcgatctgcgttcttaattcgtcgaagccttcatctgtgtcgtaatttcccgaaatgtagctgaccatcttgaagaactgttcgaccttagagtcatcggcttcaccgtgaggttttttcaagtggggtaggacacgggacttcaggtcctcctccatggacaatttggaccgggcataaccgaagatcttggtagatggatcaaggtaaccttctctgaaaagcccaaataaggcgggaaaagtcttcttctttgccagatcacctgacgcaccaaagacagatatgacggtatttttttcgaatttgacggggccttcactcatctgcagcccgggggatccactagttctagaa'
    #    y='atcgataagcttgatatcgaattcctgcagctaattatccttcgtatcttctggcttagtcacgggccaagcgtaagggtgcttttcgggcataacatacttgtgtttttggtaatggtcaattctgtacaactcttcatatattccttcaatccctttggacctcttgatccgtaggggtaaatttccggtgttggaccgtccggacgctctatgtgcttcagtaatggggtgaatatgccccaactgatatccaattcgtcatctctgacaaagttggaatggtcacccagtagggcgtctcttatcaacacctcgtaagcctctggaatccaaaagtcttggtacctgcttgcgtaagttagattcagatctgtgacttgggtagcatttgacagaccaggggtcttagcattaaactttaggtacacagcggcatcgggctgcactctgatgaccagttcgttatttggaatgtctttgaagacacccgatgcgaccgctttgtactgcagtctgatctccaccttggactcattcaaagccttaccggcacgcatcatgatggggacgccctcccaacgctcgttttcgatgttgaaagtcattgctgcaaaagtgacacatttagagtccttgtctacagtgtcatcatccacgtaggcgggcttagacccgtcctcagatttaccgtactggcccaagaggacgtcgtccgtgtcgatgatctgaacgctttgaatgttgtctctattccacgaggcattcaaaaaggggccacggcctttagaaccttaaccttttcgtcacgaatagattccgggtcaaaagacaccggtctttccatagtcaagagagtcatgatttgtaacagatggttctgcatcacgtctctgattatgcctatagagtcgaaatagccgccacggccttcggtgccgaacctctctttaaacgaaatctgaacgctttgaatgttgtctctattccacgaggcattcaaaaa'
    #    print common_sub_strings(x,y)
    a,b = "taaatc","aaataa"
    #print common_sub_strings(a+a, b, limit = min(25, 25*(len(a)/25)+1))


'''
(1, 1, 2)
(2, 0, 2)
(7, 1, 2)
(8, 0, 2)
(2, 4, 2)
(8, 4, 2)

(1, 0, 2)
(1, 1, 2) 1
(1, 4, 2)
(2, 0, 2) 2
(2, 1, 2)
(2, 4, 2) 3
(7, 0, 2)
(7, 1, 2) 4
(7, 4, 2)
(8, 0, 2) 5
(8, 1, 2)
(8, 4, 2) 6
'''
