from array import array
# from collections import Counter
from collections import namedtuple
from fuzzysearch.common import Match

from fuzzysearch.custom_search import _prepare_init_candidates_dict


def kevin(str1, str2, max_substitutions, max_insertions):
    """check if it is possible to transform str1 into str2 given limitations

    The limitations are the maximum allowed number of new characters inserted
    and the maximum allowed number of character substitutions.
    """
    # check simple cases which are obviously impossible
    if not len(str1) <= len(str2) <= len(str1) + max_insertions:
        return False

    # # some multi-set math to see if there are too many differing items to make
    # # transformation possible
    # c = Counter(str1)
    # c.subtract(str2)
    # if sum(map(abs, c.values())) > max_substitutions * 2 + max_insertions:
    #     return False

    scores = array('L', [0] * (len(str2) - len(str1) + 1))
    new_scores = scores[:]

    for (str1_idx, char1) in enumerate(str1):
        # make min() always take the other value in the first iteration of the
        # inner loop
        prev_score = len(str2)
        for (n_insertions, char2) in enumerate(
                str2[str1_idx:len(str2)-len(str1)+str1_idx+1]
        ):
            new_scores[n_insertions] = prev_score = min(
                scores[n_insertions] + (0 if char1 == char2 else 1),
                prev_score
            )

        # swap scores <-> new_scores
        scores, new_scores = new_scores, scores

    return min(scores) <= max_substitutions


def super_kevin(str1, str2, max_substitutions, max_insertions, max_deletions):
    """check if it is possible to transform str1 into str2 given limitations

    The limitations are the maximum allowed number of new characters inserted,
    the maximum allowed number of character substitutions and the maximum
    allowed number of character deletions.
    """
    # quick answers for simple scenarios
    if max_deletions == 0:
        if max_insertions == 0:
            return len(str1) == len(str2) and \
                   sum(a != b for a, b in zip(str1, str2)) <= max_substitutions
        else:
            return kevin(str1, str2, max_substitutions, max_insertions)
    elif max_insertions == 0:
        return kevin(str2, str1, max_substitutions, max_deletions)
    else:
        candidates = _super_kevin(str1, str2, max_substitutions,
                                  max_insertions, max_deletions)
        return len(candidates) > 0


SuperKevinCandidate = namedtuple('Candidate', ['subs', 'dels', 'ins'])


def _super_kevin(str1, str2, max_substitutions, max_insertions, max_deletions):
    """internal function implementing the actual algorithm for super_kevin()"""
    def is_candidate_valid(candidate):
        return (
            candidate.subs <= max_substitutions and
            candidate.dels <= max_deletions and
            candidate.ins <= max_insertions
        )

    candidates = [[SuperKevinCandidate(subs=0, dels=str1_idx, ins=0)]
                  for str1_idx in range(max_deletions+1)] + \
                 [[] for _i in range(len(str1) - max_deletions)]
    # from pprint import pprint as pp; pp(candidates)
    for str2_idx, str2_char in enumerate(str2):
        new_candidates = [[c._replace(ins=c.ins+1) for c in candidates[0]]] + [[] for _i in range(len(str1))]
        for str1_idx, str1_char in enumerate(str1):
            for candidate in candidates[str1_idx+1]:
                if candidate.ins < max_insertions:
                    new_candidates[str1_idx+1].append(
                        candidate._replace(ins=candidate.ins + 1)
                    )

            subs_delta = (0 if str1_char == str2_char else 1)
            for candidate in candidates[str1_idx]:
                if candidate.subs + subs_delta <= max_substitutions:
                    new_candidates[str1_idx+1].append(
                        candidate._replace(subs=candidate.subs + subs_delta)
                    )

            for candidate in new_candidates[str1_idx]:
                if candidate.dels < max_deletions:
                    new_candidates[str1_idx+1].append(
                        candidate._replace(dels=candidate.dels + 1)
                    )

        candidates = new_candidates
        # pp(candidates)

    return candidates[-1]


SuperKevinSearchCandidate = namedtuple(
    'SuperKevinSearchCandidate',
    ['start', 'subseq_index', 'l_dist', 'n_subs', 'n_ins', 'n_dels'],
)


# def _prepare_init_candidates_dict(subsequence):
#     char2index = {}
#     for (index, char) in enumerate(subsequence):
#         char2index.setdefault(char, index)
#
#     char2candidates = {}
#     for (char, index) in char2index.items():


def super_kevin_search(subsequence, sequence,
                       max_substitutions, max_insertions, max_deletions,
                       max_l_dist=None):
    """search for near-matches of subsequence in sequence

    This searches for near-matches, where the nearly-matching parts of the
    sequence must meet the following limitations:
    * the maximum allowed number of new characters inserted
    * and the maximum allowed number of character deletions
    * the maximum allowed number of character substitutions
    """
    if not subsequence:
        raise ValueError('Given subsequence is empty!')

    # print(subsequence)
    # print(sequence)
    # print(max_substitutions)
    # print(max_insertions)
    # print(max_deletions)
    # print(max_l_dist)

    # optimization: prepare some often used things in advance
    _subseq_len = len(subsequence)

    _max_l_dist = max_l_dist
    if max_l_dist is not None:
        if max_l_dist >= max_substitutions + max_insertions + max_deletions:
            _max_l_dist = None

    candidates = []
    for index, char in enumerate(sequence):
        candidates.append(SuperKevinSearchCandidate(index, 0, 0, 0, 0, 0))
        new_candidates = []

        for cand in candidates:
            # if this sequence char is the candidate's next expected char
            if char == subsequence[cand.subseq_index]:
                # if reached the end of the subsequence, return a match
                if cand.subseq_index + 1 == _subseq_len:
                    yield Match(cand.start, index + 1, cand.l_dist)
                # otherwise, update the candidate's subseq_index and keep it
                else:
                    new_candidates.append(cand._replace(
                        subseq_index=cand.subseq_index + 1,
                    ))

            # if this sequence char is *not* the candidate's next expected char
            else:
                # we can try skipping a sequence or sub-sequence char (or both),
                # unless this candidate has already skipped the maximum allowed
                # number of characters
                if cand.l_dist == max_l_dist:
                    continue

                if cand.n_ins < max_insertions:
                    # add a candidate skipping a sequence char
                    new_candidates.append(cand._replace(
                        n_ins=cand.n_ins + 1,
                        l_dist=cand.l_dist + 1,
                    ))

                if cand.subseq_index + 1 < _subseq_len:
                    if cand.n_subs < max_substitutions:
                        # add a candidate skipping both a sequence char and a
                        # subsequence char
                        new_candidates.append(cand._replace(
                            n_subs=cand.n_subs + 1,
                            subseq_index=cand.subseq_index + 1,
                            l_dist=cand.l_dist + 1,
                        ))
                    elif cand.n_dels < max_deletions and cand.n_ins < max_insertions:
                        # add a candidate skipping both a sequence char and a
                        # subsequence char
                        new_candidates.append(cand._replace(
                            n_ins=cand.n_ins + 1,
                            n_dels=cand.n_dels + 1,
                            subseq_index=cand.subseq_index + 1,
                            l_dist=cand.l_dist + 1,
                        ))

                # try skipping subsequence chars
                for n_skipped in xrange(1, max_deletions - cand.n_dels + 1):
                    # if skipping n_dels sub-sequence chars reaches the end
                    # of the sub-sequence, yield a match
                    if cand.subseq_index + n_skipped == _subseq_len:
                        yield Match(cand.start, index + 1, cand.l_dist + n_skipped)
                        break
                    # otherwise, if skipping n_skipped sub-sequence chars
                    # reaches a sub-sequence char identical to this sequence
                    # char, add a candidate skipping n_skipped sub-sequence
                    # chars
                    elif subsequence[cand.subseq_index + n_skipped] == char:
                        # add a candidate skipping n_skipped subsequence chars
                        new_candidates.append(cand._replace(
                            n_dels=cand.n_dels + n_skipped,
                            subseq_index=cand.subseq_index + 1 + n_skipped,
                            l_dist=cand.l_dist + n_skipped,
                        ))
                        break
                # note: if the above loop ends without a break, that means that
                # no candidate could be added / yielded by skipping sub-sequence
                # chars

        candidates = new_candidates
        if _max_l_dist is not None:
            candidates = [c for c in candidates if c.l_dist <= _max_l_dist]
        # from pprint import pprint; pprint(candidates)

    for cand in candidates:
        # note: index + 1 == length(sequence)
        n_skipped = _subseq_len - cand.subseq_index
        if cand.n_dels + n_skipped <= max_deletions and \
           (_max_l_dist is None or cand.l_dist + n_skipped <= _max_l_dist):
            yield Match(cand.start, index + 1, cand.l_dist + n_skipped)
