"""
Filter operations.
"""


import itertools
import random
import os
import os.path
import fnmatch
import collections
import sys
import types
import math

try:
    from ._core import filters
except ValueError:
    from _core import filters
import _rank_treap
import _csv_utils
import dagpype_c

# pylint: disable-msg=C0103


__all__ = []


def filt(trans = None, pre = None, post = None):
    """
    Filter (transform elements and / or suppress them).

    Keyword Arguments:
    trans -- Transformation function for each element (default None).
    pre -- Suppression function checked against each element before
        transformation function, if any (default None).
    post -- Suppression function checked against each element after
        transformation function, if any (default None).

    See Also:
        sink
        grep

    Example:

    >>> # square-root of non-negative elements
    >>> filter(trans = lambda x : math.sqrt(x), pre = lambda x : x >= 0)
    """

    @filters
    def _dagpype_internal_fn_act(target):
        try:
            while True:
                e = (yield)
                if pre is not None and not pre(e):
                    continue
                if trans is not None:
                    e = trans(e)
                if post is None or post(e):
                    target.send(e)
        except GeneratorExit:
            target.close()

    return _dagpype_internal_fn_act
__all__ += ['filt']


def grep(what):
    """
    Filters strings based on the occurrence of a substring or a regular expression.

    Arguments:
        what -- Either a string or a compiled regular expression.

    See Also:
        filt

    Examples:

    >>> source(['aa', 'aab', 'b']) | grep('b') | to_list()
    ['aab', 'b']

    >>> source(['aa', 'aab', 'b']) | grep(re.compile(r'(a+)b')) | to_list()
    ['aab']
    """

    return filt(pre = lambda s: s.find(what) != -1 if isinstance(what, str) else what.search(s))
__all__ += ['grep']


def select_inds(inds):
    """
    Returns a selection of the selected indices of indexable elements.

    Arguments:
    inds -- either an integer, or an iterable of integers.

    If inds is an integer, this filter will pass on a single element for    
        each element passed through it. Otherwise, it will pass a tuple.

    Examples:

    >>> source([(1, 2, 3), (4, 5, 6)]) | select_inds(2) | to_list()
    [3, 6]

    >>> source([(1, 2, 3), (4, 5, 6)]) | select_inds((0, 2)) | to_list()
    [(1, 3), (4, 6)]

    >>> source([(1, 2, 3), (4, 5, 6)]) | select_inds(()) | to_list()
    [(), ()]
    """

    if type(inds) == int:
        @filters
        def _dagpype_internal_fn_act_i(target):
            try:
                while True:
                    target.send((yield)[inds])                
            except GeneratorExit:
                target.close()

        return _dagpype_internal_fn_act_i

    inds = list(inds)

    if len(inds) == 2:
        @filters
        def _dagpype_internal_fn_act_2(target):
            i0, i1 = inds[0], inds[1]
            try:
                while True:
                    e = (yield)
                    target.send( (e[i0], e[i1]) )                
            except GeneratorExit:
                target.close()

        return _dagpype_internal_fn_act_2

    if len(inds) == 3:
        @filters
        def _dagpype_internal_fn_act_3(target):
            i0, i1, i2 = inds[0], inds[1], inds[2]
            try:
                while True:
                    e = (yield)
                    target.send( (e[i0], e[i1], e[i2]) )                
            except GeneratorExit:
                target.close()

        return _dagpype_internal_fn_act_3

    @filters
    def _dagpype_internal_fn_act(target):
            try:
                while True:
                    e = (yield)
                    target.send( tuple(e[i] for i in inds) )
            except GeneratorExit:
                target.close()

    return _dagpype_internal_fn_act
__all__ += ['select_inds']


def relay():
    """
    Sends on whatever is passed to it.

    Example:

    >>> # Find the rain auto-correlation relative to the signal 5 time units in the future.
    >>> csv_vals(open('meteo.csv'), 'rain') | relay() + skip(5) | corr()
    """

    @filters
    def _dagpype_internal_fn_act(target):
        try:
            while True:
                target.send((yield))
        except GeneratorExit:
            target.close();

    return _dagpype_internal_fn_act
__all__ += ['relay']


def window_simple_ave(wnd_len):
    """
    Transforms a sequence into a simple moving average of its values
        within some window.

    If the input sequence is x[0], x[1], ..., then the output sequence is        
        {{{
        y[i] = (x[max(0, i - len)] + ... + x[i]) / min(i + 1, wnd_len)
        }}}

    Arguments:
        wnd_len -- Averaging window length.

    See Also:
        cum_ave
        exp_ave        
        
    Examples:

    >>> source([1., 2., 3., 4.]) | window_simple_ave(0) | to_list()
    [1., 1.5, 2.5, 3.5]
    """

    @filters
    def _dagpype_internal_fn_act(target):
        assert wnd_len > 0
        print wnd_len
        vals, i, sum_ = [0] * wnd_len, 0, 0
        try:
            while i < wnd_len:
                vals[i] = (yield)
                sum_ += vals[i]
                target.send(sum_ / float(i + 1))
                i += 1
            w_ = float(wnd_len)
            while True:
                if i == wnd_len:
                    i = 0
                sum_ -= vals[i]
                vals[i] = (yield)
                sum_ += vals[i]
                target.send(sum_ / w_)
                i += 1
        except GeneratorExit:
            target.close()

    return _dagpype_internal_fn_act
__all__ += ['window_simple_ave']


def cum_ave():
    """
    Transforms a sequence into a cumulative moving average of it.

    If the input sequence is x[0], x[1], ..., then the output sequence is        
        {{{
        y[i] = (x[0] + ... + x[i]) / (i + 1)
        }}}

    See Also:
        window_simple_ave
        exp_ave        
        np.cum_ave

    Examples:

    >>> source([1., 2., 3., 4.]) | cum_ave(0) | to_list()
    [1., 1.5, 2, 2.5]
    """

    @filters
    def _dagpype_internal_fn_act(target):
        y, i = (yield), 1
        target.send(y)
        try:
            while True:
                y += ((yield) - y) / float(i + 1)
                target.send(y)
                i += 1
        except GeneratorExit:
            target.close()

    return _dagpype_internal_fn_act
__all__ += ['cum_ave']


def exp_ave(alpha):
    """
    Transforms a sequence into an exponential moving average of it.

    If the input sequence is x[0], x[1], ..., then the output sequence is
        {{{
        y[0] = x[0]
        y[i] = alpha * x[i] + (1 - alpha) * y[i - 1]
        }}}
        
    All but an epsilon of relevant weight is stored at each point in the last
        log(epsilon) / log(1 - alpha) time units.

    Arguments:
    alpha -- Responsiveness factor, should be between 0 and 1.

    See Also:
        window_simple_ave
        cum_ave    
        np.exp_ave    

    Example:

    >>> source([1., 2., 3., 4.]) | exp_ave(0.75) | to_list()
    [1., 1.75, 2.6875, 3.671875]
    """

    @filters
    def _dagpype_internal_fn_act(target):
        assert 0 <= alpha <= 1
        y = (yield)
        target.send(y)
        alpha_tag = 1 - alpha
        try:
            while True:
                y = alpha * (yield) + alpha_tag * y
                target.send(y)
        except GeneratorExit:
            target.close()

    return _dagpype_internal_fn_act
__all__ += ['exp_ave']


def _window_min_max_imp(wnd_len, cmp_):
    @filters
    def _dagpype_internal_fn_act(target):
        _Pair = collections.namedtuple('_Pair', ['val', 'death'], verbose = False)
        ring, end, last, min_pair = [None] * wnd_len, wnd_len, 0, 0
        ring[0] = _Pair((yield), wnd_len)
        target.send(ring[0].val)
        i = 1
        try:
            while True:
                if ring[min_pair].death == i:
                    min_pair += 1
                    if min_pair >= end:
                        min_pair = 0
                e = (yield)
                if cmp_(e, ring[min_pair].val) <= 0:
                    ring[min_pair] = _Pair(e, i + wnd_len)
                    last = min_pair
                else:
                    while cmp_(ring[last].val, e) >= 0:
                        if last == 0:
                            last = end
                        last -= 1
                    last += 1
                    if last == end:
                        last = 0
                    ring[last] = _Pair(e, i + wnd_len)
                target.send(ring[min_pair].val)
                i += 1
        except GeneratorExit:
            target.close()
            
    return _dagpype_internal_fn_act        
    
    
def window_min(wnd_len, cmp_ = lambda x, y: cmp(x, y)):
    """
    Transforms a sequence into its minimum within some window.
    Uses an algorithm from http://home.tiac.net/~cri/2001/slidingmin.html

    If the input sequence is x[0], x[1], ..., then the output sequence is        
        {{{
        y[i] = min(x[i], x[i - 1], ..., x[max(0, i - wnd_len)])
        }}}

    Arguments:
    wnd_len -- Averaging window length.
    
    Keyword Arguments:
    cmp_ -- Comparison function (default: cmp) used for deciding which is smaller than which.

    See Also:
        window_max
        
    Examples:

    >>> source([1, 2, 3, 4, 1, 0, 4, 4]) | window_min(2) | to_list()
    [1, 1, 2, 3, 1, 0, 0, 4]
    """    

    return _window_min_max_imp(wnd_len, cmp_)
__all__ += ['window_min']


def window_max(wnd_len, cmp_ = lambda x, y: cmp(x, y)):
    """
    See Also:
        window_min

    Examples:

    >>> source([1, 2, 3, 4, 1, 0, 4, 4]) | window_max(2) | to_list()
    [1, 2, 3, 4, 4, 1, 4, 4]
    """    

    return _window_min_max_imp(wnd_len, lambda x, y: cmp_(y, x))
__all__ += ['window_max']


def window_quantile(wnd_len, quantile = 0.5, cmp_ = lambda x, y: cmp(x, y)):
    """
    Transforms a sequence into its quantiles within some window.

    If the input sequence is x[0], x[1], ..., then the output sequence is        
        {{{
        y[i] = q_{quantile}(x[i], x[i - 1], ..., x[max(0, i - wnd_len)])
        }}}
    where q_{p}(A) is the smallest element larger than a p-th of A's elements (e.g., 0.5 is the
        median).

    Arguments:
        wnd_len -- Window length.
    
    Keyword Arguments:
        quantile -- Quantile fraction; should be between 0 and 1 (default 0.5, which is the median).
        cmp_ -- Comparison function (default: cmp) used for deciding which is smaller than which.
        
    Examples:

    >>> source([1, 4, 2, 4, 6, 9, 2, 4, 5]) | window_quantile(2, 0.5) | to_list()
    [1, 4, 4, 4, 6, 9, 9, 4, 5]
    >>> source([1, 4, 2, 4, 6, 9, 2, 4, 5]) | window_quantile(3, 0.5) | to_list()
    [1, 4, 2, 4, 4, 6, 6, 4, 4]
    """    

    @filters
    def _dagpype_internal_fn_act(target):
        assert wnd_len > 0
        assert 0 <= quantile <= 1
        es, tr = [], _rank_treap.Treap(cmp_)
        try:
            while True:
                es.append(tr.insert((yield)))
                k = int(quantile * len(es))
                target.send(tr.kth(k))            
                if len(es) == wnd_len:
                    break
            i = 0
            while True:
                tr.erase(es[i])
                es[i] = tr.insert((yield))
                i = (i + 1) % wnd_len                
                target.send(tr.kth(k))            
        except GeneratorExit:
            target.close()
            
    return _dagpype_internal_fn_act        
__all__ += ['window_quantile']


def cast(types_):
    """
    Returns a cast of elements.

    Arguments:
        types_ -- either an type, or a tuple of types. This corresponds to whether
            each element is a single item or a tuple of items.
            each element passed through it. Otherwise, it will pass a tuple.

    Examples:

    >>> source(['1']) | cast(float) | to_list()
    [1]

    >>> source([('1', '2')]) | cast((int, float)) | to_list()
    [1, 2.0]
    """

    if type(types_) == type:
        @filters
        def _dagpype_internal_fn_act_1(target):
            try:
                while True:
                    target.send(types_((yield)))
            except GeneratorExit:
                target.close()

        return _dagpype_internal_fn_act_1

    types_ = list(types_)

    if len(types_) == 2:
        @filters
        def _dagpype_internal_fn_act_2(target):
            t0, t1 = types_[0], types_[1]
            try:
                while True:
                    e = (yield)
                    target.send( (t0(e[0]), t1(e[1])) )                
            except GeneratorExit:
                target.close()

        return _dagpype_internal_fn_act_2

    if len(types_) == 3:
        @filters
        def _dagpype_internal_fn_act_3(target):
            t0, t1, t2 = types_[0], types_[1], types_[2]
            try:
                while True:
                    e = (yield)
                    target.send( (t0(e[0]), t1(e[1]), t2(e[2])) )                
            except GeneratorExit:
                target.close()

        return _dagpype_internal_fn_act_3

    @filters
    def _dagpype_internal_fn_act(target):
        try:
            while True:
                e = (yield)
                target.send( tuple(t(ee) for t, ee in zip(types_, e)) )
        except GeneratorExit:
            target.close()

    return _dagpype_internal_fn_act
__all__ += ['cast']


def prepend(what):
    """
    Prepends to the start of all elements.

    Arguments:
        e -- What to prepend.

    See Also:
        append

    Example:

    >>> source([1, 2, 3, 4]) | prepend(0) | to_list()
    [0, 1, 2, 3, 4]
    """

    @filters
    def _dagpype_internal_fn_act(target):
        target.send(what)
        try:
            while True:
                target.send((yield))
        except GeneratorExit:
            target.close()
    return _dagpype_internal_fn_act
__all__ += ['prepend']

    
def append(e):
    """
    Appends to the end of all elements.

    Arguments:
        e -- What to append.

    See Also:
        prepend

    Example:

    >>> source([1, 2, 3, 4]) | append(5) | to_list()
    [1, 2, 3, 4, 5]
    """

    @filters
    def _dagpype_internal_fn_act(target):
        try:
            while True:
                target.send((yield))
        except GeneratorExit:
            target.send(e)
            target.close()
    return _dagpype_internal_fn_act
__all__ += ['append']


def filename_filt(pattern, skip_files = False, skip_dirs = True):
    """
    Filters filenames - checks if they pass some criteria.

    Arguments:
        pattern -- Glob type of pattern.

    Keyword Arguments:
        skip_files -- Whether to skip regular files (default False)
        skip_dirs -- Whether to skip directories.

    See Also:
        os_walk

    Example:

    >>> # Counts the number of files of the form 'data?.csv'
    >>> print os_walk() | filename_filt('data?.csv') | count()
    """

    def _matches(f_name):
        if skip_files and os.path.isfile(f_name):
            return False
        if skip_dirs and os.path.isdir(f_name):
            return False
        if pattern is not None and not fnmatch.fnmatch(f_name, pattern):
            return False
        return True
    return filt(pre = lambda f_name : _matches(f_name))
__all__ += ['filename_filt']


def prob_rand_sample(prob):
    """
    Randomly passes some of the elements, with a given probability.

    Arguments:
        prob -- Probability an element will pass.

    See Also:
        size_rand_sample

    Example:

    >>> n = 9999
    >>> print (source(xrange(n)) | rand_prob_sample(0.7) | count()) / float(n)
    0.702870287029
    """

    assert 0 <= prob <= 1
    return filt(pre = lambda _ : random.random() < prob)
__all__ += ['prob_rand_sample']


def skip(n):
    """
    Skips n elements.

    Arguments:
    n - If a positive integer, skips n elements from start, else
        skips n element from the end

    See Also:
        nth
        np.skip

    Example:

    >>> source([1, 2, 3, 4]) | skip(2) | to_list()
    [3, 4]

    >>> source([1, 2, 3, 4]) | skip(-2) | to_list()
    [1, 2]
    """

    if n >= 0:
        @filters
        def _dagpype_internal_fn_act_p(target):
            i = 0
            try:
                while True:
                    e = (yield)
                    if i >= n:
                        target.send(e)
                    i += 1
            except GeneratorExit:
                target.close()

        return _dagpype_internal_fn_act_p

    @filters
    def _dagpype_internal_fn_act_n(target):
        m = -n + 1
        q = collections.deque([], m)
        try:
            while True:
                if len(q) == m:
                    target.send(q.popleft())
                q.append((yield))
        except GeneratorExit:
            target.close()

    return _dagpype_internal_fn_act_n
__all__ += ['skip']


def trace(stream = sys.stdout, enum = True, format_ = lambda e : str(e)):
    """
    Traces elements to a stream. Useful for debugging problematic streams.

    Keyword Arguments:
        stream -- Stream to which to trace (default sys.stdout)
        enum -- Whether to enumerate each element by its order (default True)
        format_ -- Format function for elements (default lambda e : str(e))

    Example:

    >>> source([1, 2, 3, 'd']) | trace() | sum()
    0 : 1
    1 : 2
    2 : 3
    3 : d
    Traceback (most recent call last):
    ...
    """

    @filters
    def _dagpype_internal_fn_act(target):
        try:
            i = 0
            while True:
                e = (yield)
                s = (str(i) + ' : ') if enum else ''
                s += format_(e) + '\n'
                stream.write(s)
                stream.flush()
                i += 1
                target.send(e)
        except GeneratorExit:
            target.close()

    return _dagpype_internal_fn_act
__all__ += ['trace']
    
    
def split(delimit = ','):
    """
    Splits a stream of strings to a stream of tuples resulting from the strings
        being split by a delimiter.
        
    Keyword Arguments:
        delimit -- Delimiting character (default ',')

    See Also:
        csv_split
        
    Example:

    >>> source(['a,b', 'c,d,e']) | split() | to_list()
    [('a', 'b'), ('c', 'd', 'e')]
    """
    
    return filt(lambda l : l.split(delimit))
    # Tmp Ami - add to unit test, add optional types
__all__ += ['split']


def csv_split(
    cols = None, 
    types_ = None, 
    delimit = b',', 
    comment = b'#', 
    skip_init_space = True):

    """
    Splits the values in a delimited stream (e.g., by commas for CSV files, or by tabs for TAB files) as tuples.

    Keyword Arguments:
        cols -- Indication of which columns to read. If either an integer or a tuple of integers,
            the corresponding columns will be read. If either a string or a tuple of strings, the 
            columns whose first rows have these string names (excluding the first row) will be 
            read. If None (which is the default), everything will be read. 
        types_ -- Either a type or a tuple of types. If this is given, the read values will
            be cast to these types. Otherwise, if this is None (which is the default) the read values
            will be cast into floats.
        delimit -- Delimiting binary character (default b',').
        comment -- Comment-starting binary character (default b'#"). Any character starting from this
            one until the line end will be ignored.
        skip_init_space -- Whether spaces starting a field will be ignored (default True).

    See Also:
        split
        stream_vals

    Examples:

    >>> # Assume the file 'junky.txt' contains lines, those containing the string
    >>> # 'moshe' are split by tabs, and we wish to find the correlation between the
    >>>   2nd and 5th values in these lines.
    >>> stream_lines('junky.txt') | grep('moshe') | csv_split( (2, 5) ) | corr()
    0.23
    """

    types__ = types_
    @filters
    def _dagpype_internal_fn_act(target):
        try:
            e = (yield)
            (cols_, single, inds, uniques, copies, max_ind, types_, c_types, cast_back) = \
                _csv_utils._csv_attribs((e, ), cols, types__, delimit, comment, skip_init_space)
            r = dagpype_c.col_reader(
                [], 
                str.encode(delimit), str.encode(comment), 1 if skip_init_space else 0, 
                1 if single else 0,
                inds, uniques, copies, max_ind,
                c_types)
            while True:
                e = dagpype_c.col_reader_parse_string(r, (yield))
                if not cast_back:
                    target.send(e)
                elif single:
                    target.send( _csv_utils._cast(e, types_[0]) )
                else:
                    target.send( tuple(_csv_utils._cast(a, b) for a, b in itertools.izip_longest(e, types_)) )
        except GeneratorExit:
            target.close()

    return _dagpype_internal_fn_act
__all__ += ['csv_split']




