"""
This module defines the SArray class which provides the
ability to create, access and manipulate a remote scalable array object.

SArray acts similarly to pandas.Series but without indexing.
The data is immutable, homogeneous, and is stored on the GraphLab Server side.
"""
import graphlab.connect as _mt
import graphlab.connect.main as glconnect
from graphlab.cython.cy_sarray import UnitySArrayProxy
from graphlab.cython.context import debug_trace as cython_context
from graphlab.util import make_internal_url
import inspect
import numpy
import pandas
import time

__all__ = ['SArray']

class SArray(object):
    """
    SArray is an array object scaled to big data.
    The data in SArray is immutable, homogeneously typed, and is stored column wise.
    SArray is also used as column in the :py:class:`graphlab.SFrame`.

    SArray can be constructed in various ways:

    Construct an SArray from list.

    >>> from graphlab import SArray
    >>> sa = SArray(data=[1,2,3,4,5], int)

    Construct an SArray from numpy.ndarray.

    >>> sa = gl.SArray(data=numpy.asarray([1,2,3,4,5]), int)

    Construct an SArray from pandas.Series.

    >>> sf = gl.SArray(data=pd.Series([1,2,3,4,5]), int)

    If the type is not specified, automatic inference is attempted via
    conversion to a Pandas Series

    >>> gl.SArray(data=pd.Series([1,2,3,4,5])).dtype()
    int
    >>> gl.SArray(data=pd.Series([1,2,3,4,5.0])).dtype()
    float

    The Datatypes that are supported are, integer, float, string, and
    list (a list of doubles). For instance:

    Creates an SArray of strings:

    >>> sa = SArray(data=['a','b'])

    Creates an SArray of list:

    >>> sa = SArray(data=[[1.0,1.0], [2.0,2.0]])

    Construct an SArray from local text file. (Only works for local server).

    >>> sf = SArray('/tmp/a_to_z.txt.gz')

    Construct an SArray from a text file downloaded from a URL.

    >>> sf = SArray('http://s3-us-west-2.amazonaws.com/testdatasets/a_to_z.txt.gz')

    Construct an SArray from a text file stored on the server side.

    >>> sf = SArray('remote:///tmp/a_to_z.txt.gz')

    SArrays support a large number of vectorized operations. For instance:

    >>> sa = SArray([1,1,1,1,1])
    >>> sb = SArray([2,2,2,2,2])
    >>> sc = sa + sb
    >>> sc
    dtype: int
    Rows: 5
    [3, 3, 3, 3, 3]
    >>> sc + 2
    dtype: int
    Rows: 5
    [5, 5, 5, 5, 5]

    Operators which are supported include all numeric operators (+,-,*,/), as
    well as comparison operators (>, >=, <, <=), and logical operators (&, |).

    For instance:

    >>> sa = SArray([1,2,3,4,5])
    >>> (sa >= 2) & (sa <= 4)
    dtype: int
    Rows: 5
    [0, 1, 1, 1, 0]

    We can also perform logical filtering operations:

    >>> sa = SArray([1,2,3,4,5])
    >>> sa[(sa >= 2) & (sa <= 4)]
    dtype: int
    Rows: 3
    [2, 3, 4]

    The numeric operators (+,-,*,/) also work on list types:

    >>> sa = SArray(data=[[1.0,1.0], [2.0,2.0]])
    >>> sa + 1
    dtype: list
    Rows: 2
    [[2.0, 2.0], [3.0, 3.0]]
    >>> sa + sa
    dtype: list
    Rows: 2
    [[2.0, 2.0], [4.0, 4.0]]

    The addition operator (+) can also be used for string concatenation:

    >>> sa = SArray(data=['a','b'])
    >>> sa + "x"
    dtype: str
    Rows: 2
    ['ax', 'bx']

    The SArray is also iterable, but not efficiently since this involves a
    streaming transmission of data from the server to the client. This should
    not be used for large data.

    >>> sa = SArray([1,2,3,4,5])
    >>> [i + 1 for i in sa]
    [2, 3, 4, 5, 6]

    Parameters
    ----------
    data : list | numpy.ndarray | pandas.Series | string
        The input data. If this is a list, numpy.ndarray, or pandas.Series
        the data in the list is converted and stored in an SArray.
        Alternatively if this is a string, it is inerpreted as a path (or
        url) to a text file.  Each line of the text file is loaded as a
        separate row. If data is a file name ending with ".sidx", this is
        loaded as a SArray file.

    dtype : SArray
        The data type of the SArray. Supported types are: {int,
        float, str}.  If not specified (None), we attempt to evaluate
        it from the input.  If it is a numpy array, or a Pandas
        series, the dtype of the array/series is used. If it is a
        list, it is first converted to a Pandas series, and the
        dtype of that is used. If it is a URL or path to a text file,
        we default to strings.

    ignore_cast_failure : bool
        If true, ignores casting failures, but warns
        when the elements that cannot be casted into the dtype
        specified.

    Notes
    -----
    When working with the graphlab EC2 instance, e.g. :py:func:`graphlab.aws.launch_EC2()`,
    SArray cannot be constructed using local file path, because it involves
    potentially large amount of data transfer from client to server.
    However, it is still ok to the remote file path.

    >>> graphlab.aws.launch_EC2('m1.large')
    >>> sf = SArray('~/mydata/foo.csv') # throws exception
    >>> sf = SArray('remote:///mydata/foo.csv') # works
    >>> sf = SArray("http://testdatasets.s3-website-us-west-2.amazonaws.com/users.csv.gz") # works
    >>> sf = SArray("s3://mybucket/foo.csv") # works
    >>> graphlab.aws.teminate_EC2()

    Similar restriction applies to :py:class:`graphlab.Graph` and :py:class:`graphlab.SFrame`.
    """

    def __init__(self, data=[], dtype=None, ignore_cast_failure=False, _proxy=None):
        """__init__(data=list(), dtype=None, ignore_cast_failure=False)
        Construct a new SArray. The source of data includes: list, numpy.ndarray, pandas.Series, and urls.

        Parameters
        ----------
        data : list | numpy.ndarray | pandas.Series | string
            The input data. If this is a list, numpy.ndarray, or pandas.Series
            the data in the list is converted and stored in an SArray.
            Alternatively if this is a string, it is inerpreted as a path (or
            url) to a text file.  Each line of the text file is loaded as a
            separate row. If data is a file name ending with ".sidx", this is
            loaded as a SArray file.

        dtype : SArray
            The data type of the SArray. Supported types are: {int,
            float, str, list}.  If not specified (None), we attempt to infer
            it from the input.  If it is a numpy array, or a Pandas
            series, the dtype of the array/series is used. If it is a
            list, it is first converted to a Pandas series, and the
            dtype of that is used. If it is a URL or path to a text file,
            we default to strings.

        ignore_cast_failure : bool
            If true, ignores casting failures, but warns
            when the elements that cannot be casted into the dtype
            specified.

        _proxy : None
            Internal, do not use.

        Notes
        -----
        If data is pandas.Series, the index will be ignored.
        """
        _mt._get_metric_tracker().track('sarray.init')
        if (_proxy):
            self.__proxy__ = _proxy
        else:
            self.__proxy__ = UnitySArrayProxy(glconnect.get_client())
            # we need to perform type inference
            if dtype is None:
                if (isinstance(data, list)):
                    # if it is a list, Get the first type and make sure
                    # the remaining items are all of the same type
                    dtype = self.__infer_dtype_of_list__(data)
                elif (isinstance(data, pandas.Series) or
                      isinstance(data, numpy.ndarray)):
                    # if it is a pandas series or numpy array, get the dtype
                    # of the series / array
                    dtype = data.dtype
                    if dtype == object:
                        # we need to get a bit more fine grained than that
                        dtype = self.__infer_dtype_of_list__(data)

                elif (isinstance(data, str)):
                    # if it is a file, we default to string
                    dtype = str

            if (isinstance(data, pandas.Series) or
                isinstance(data, numpy.ndarray) or
                isinstance(data, list)):
                with cython_context():
                    self.__proxy__.load_from_iterable(data, dtype, ignore_cast_failure)
            elif (isinstance(data, str)):
                if data.endswith('.sidx'):
                    internal_url = make_internal_url(data)
                    with cython_context():
                        self.__proxy__.load_from_sarray_index(internal_url)
                else:
                    internal_url = make_internal_url(data)
                    with cython_context():
                        self.__proxy__.load_from_url(internal_url, dtype)
            else:
                raise TypeError("Unexpected data source. " \
                                "Possible data source types are: list, " \
                                "numpy.ndarray, pandas.Series, and string(url)")

    def __infer_dtype_of_list__(self, data):
        # default is float
        if len(data) == 0:
            return float

        unique_types = set([type(x) for x in data if x is not None])
        if len(unique_types) == 0:
            return float
        # if dtype is list, recheck to make sure all values are numeric
        elif len(unique_types) == 1:
            dtype = unique_types.pop()
            if dtype != list:
                return dtype
            else:
                isvalid = [all([isinstance(elem, (int,long,float)) for elem in innerlist]) for innerlist in data]
                if all(isvalid):
                    return list
                else:
                    raise TypeError("Array type should have all entries as doubles")

        elif len(unique_types) == 2:
            # we can do both ints, longs, floats as a float
            if unique_types.issubset(set([int,long,float])):
                return float
            else:
                raise TypeError("Cannot infer Array type. Not all elements of array are the same type.")
        else:
            raise TypeError("Cannot infer Array type. Not all elements of array are the same type.")

    def save(self, targetfile):
        """
        Saves the SArray to file.

        Parameters
        ----------
        filename : string
            The location to save the SFrame. Either a local path or a remote
            URL. The filename will get the suffix '.sidx' if it does not already have it.
        """
        if not targetfile.endswith('.sidx'):
            raise ValueError("Save target must end with sidx")
        else:
            with cython_context():
                self.__proxy__.save(targetfile)

    def __repr__(self):
        """
        Returns a string description of the Array.
        """
        ret =       "dtype: " + str(self.dtype().__name__) + "\n"
        ret = ret + "Rows: " + str(self.size()) + "\n"
        ret = ret + str(self)
        return ret


    def __str__(self):
        """
        Returns a string containing the first 100 elements of the array.
        """
        headln = str(self.head(100))
        if (self.size() > 100):
            # cut the last close bracket
            # and replace it with ...
            headln = headln[0:-1] + ", ... ]"
        return headln


    def __nonzero__(self):
        """
        Returns true if the array is not empty.
        """
        return self.size() != 0

    def __len__(self):
        """
        Returns the length of the array
        """
        return self.size()

    def __iter__(self):
        """
        Provides an iterator to the contents of the array.
        """
        def generator():
            elems_at_a_time = 262144
            self.__proxy__.begin_iterator()
            ret = self.__proxy__.iterator_get_next(elems_at_a_time)
            while(True):
                for j in ret:
                    yield j

                if len(ret) == elems_at_a_time:
                    ret = self.__proxy__.iterator_get_next(elems_at_a_time)
                else:
                    break

        return generator()

    def __add__(self, other):
        """
        If other is a scalar value, adds it to the current array, returning
        the new result. If other is an SArray, performs an element-wise
        addition of the two arrays.
        """
        with cython_context():
            if type(other) is SArray:
                return SArray(_proxy = self.__proxy__.vector_operator(other.__proxy__, '+'))
            else:
                return SArray(_proxy = self.__proxy__.left_scalar_operator(other, '+'))

    def __sub__(self, other):
        """
        If other is a scalar value, subtracts it from the current array, returning
        the new result. If other is an SArray, performs an element-wise
        subtraction of the two arrays.
        """
        with cython_context():
            if type(other) is SArray:
                return SArray(_proxy = self.__proxy__.vector_operator(other.__proxy__, '-'))
            else:
                return SArray(_proxy = self.__proxy__.left_scalar_operator(other, '-'))

    def __mul__(self, other):
        """
        If other is a scalar value, multiplies it to the current array, returning
        the new result. If other is an SArray, performs an element-wise
        multiplication of the two arrays.
        """
        with cython_context():
            if type(other) is SArray:
                return SArray(_proxy = self.__proxy__.vector_operator(other.__proxy__, '*'))
            else:
                return SArray(_proxy = self.__proxy__.left_scalar_operator(other, '*'))

    def __div__(self, other):
        """
        If other is a scalar value, divides each element of the current array
        by the value, returning the result. If other is an SArray, performs
        an element-wise division of the two arrays.
        """
        with cython_context():
            if type(other) is SArray:
                return SArray(_proxy = self.__proxy__.vector_operator(other.__proxy__, '/'))
            else:
                return SArray(_proxy = self.__proxy__.left_scalar_operator(other, '/'))

    def __lt__(self, other):
        """
        If other is a scalar value, compares each element of the current array
        by the value, returning the result. If other is an SArray, performs
        an element-wise comparison of the two arrays.
        """
        with cython_context():
            if type(other) is SArray:
                return SArray(_proxy = self.__proxy__.vector_operator(other.__proxy__, '<'))
            else:
                return SArray(_proxy = self.__proxy__.left_scalar_operator(other, '<'))

    def __gt__(self, other):
        """
        If other is a scalar value, compares each element of the current array
        by the value, returning the result. If other is an SArray, performs
        an element-wise comparison of the two arrays.
        """
        with cython_context():
            if type(other) is SArray:
                return SArray(_proxy = self.__proxy__.vector_operator(other.__proxy__, '>'))
            else:
                return SArray(_proxy = self.__proxy__.left_scalar_operator(other, '>'))


    def __le__(self, other):
        """
        If other is a scalar value, compares each element of the current array
        by the value, returning the result. If other is an SArray, performs
        an element-wise comparison of the two arrays.
        """
        with cython_context():
            if type(other) is SArray:
                return SArray(_proxy = self.__proxy__.vector_operator(other.__proxy__, '<='))
            else:
                return SArray(_proxy = self.__proxy__.left_scalar_operator(other, '<='))


    def __ge__(self, other):
        """
        If other is a scalar value, compares each element of the current array
        by the value, returning the result. If other is an SArray, performs
        an element-wise comparison of the two arrays.
        """
        with cython_context():
            if type(other) is SArray:
                return SArray(_proxy = self.__proxy__.vector_operator(other.__proxy__, '>='))
            else:
                return SArray(_proxy = self.__proxy__.left_scalar_operator(other, '>='))


    def __radd__(self, other):
        """
        Adds a scalar value to the current array.
        Returned array has the same type as the array on the right hand side
        """
        with cython_context():
            return SArray(_proxy = self.__proxy__.right_scalar_operator(other, '+'))


    def __rsub__(self, other):
        """
        Subtracts a scalar value from the current array.
        Returned array has the same type as the array on the right hand side
        """
        with cython_context():
            return SArray(_proxy = self.__proxy__.right_scalar_operator(other, '-'))


    def __rmul__(self, other):
        """
        Multiplies a scalar value to the current array.
        Returned array has the same type as the array on the right hand side
        """
        with cython_context():
            return SArray(_proxy = self.__proxy__.right_scalar_operator(other, '*'))


    def __rdiv__(self, other):
        """
        Divides a scalar value by each element in the array
        Returned array has the same type as the array on the right hand side
        """
        with cython_context():
            return SArray(_proxy = self.__proxy__.right_scalar_operator(other, '/'))


    def __eq__(self, other):
        """
        If other is a scalar value, compares each element of the current array
        by the value, returning the new result. If other is an SArray, performs
        an element-wise comparison of the two arrays.
        """
        with cython_context():
            if type(other) is SArray:
                return SArray(_proxy = self.__proxy__.vector_operator(other.__proxy__, '=='))
            else:
                return SArray(_proxy = self.__proxy__.left_scalar_operator(other, '=='))


    def __ne__(self, other):
        """
        If other is a scalar value, compares each element of the current array
        by the value, returning the new result. If other is an SArray, performs
        an element-wise comparison of the two arrays.
        """
        with cython_context():
            if type(other) is SArray:
                return SArray(_proxy = self.__proxy__.vector_operator(other.__proxy__, '!='))
            else:
                return SArray(_proxy = self.__proxy__.left_scalar_operator(other, '!='))


    def __and__(self, other):
        """
        Perform a logical element-wise 'and' against another SArray.
        """
        if type(other) is SArray:
            with cython_context():
                return SArray(_proxy = self.__proxy__.vector_operator(other.__proxy__, '&'))
        else:
            raise TypeError("SArray can only perform logical and against another SArray")


    def __or__(self, other):
        """
        Perform a logical element-wise 'or' against another SArray.
        """
        if type(other) is SArray:
            with cython_context():
                return SArray(_proxy = self.__proxy__.vector_operator(other.__proxy__, '|'))
        else:
            raise TypeError("SArray can only perform logical or against another SArray")


    def __getitem__(self, other):
        """
        If other is an SArray of identical length, this function performs a
        logical filter: i.e. it subselects all the elements in this array
        where the corresponding value in the other array evaluates to true.
        """
        if type(other) is SArray:
            if len(other) != len(self):
                raise IndexError("Cannot perform logical indexing on arrays of different length.")
            with cython_context():
                return SArray(_proxy = self.__proxy__.logical_filter(other.__proxy__))
        else:
            raise IndexError("Invalid type to use for indexing")

    def size(self):
        """
        Returns the size of the sarray.
        """
        return self.__proxy__.size()

    def dtype(self):
        """
        Returns the data type of the sarray.

        Returns
        -------
        out : type
            The type of the sarray.
        """
        return self.__proxy__.dtype()


    def head(self, n=10):
        """
        Returns a pandas.DataFrame which contains the first n rows of the SFrame.

        This operation will construct a pandas.DataFrame in memory. Care must be taken
        when size of the returned object is big.

        Parameters
        ----------
        n : int
            The number of rows to fetch.

        Returns
        -------
        out : pandas.DataFrame
            The dataframe which contains the first n rows of the SFrame.
        """
        return self.__proxy__.head(n)

    def vector_slice(self, start, end=None):
        """
        If this SArray contains vectors, this returns a new SArray containing
        each individual vector sliced, between start and end, exclusive.

        For instance:

        >>> g
        dtype: list
        Rows: 2
        [[1.0, 2.0, 3.0],
         [2.0, 3.0, 4.0]]

        >>> g.vector_slice(0) # extracts the first element of each vector
        dtype: float
        Rows: 2
        [1.0, 2.0]

        >>> g.vector_slice(0, 2) # extracts the first two elements of each vector
        dtype: list
        Rows: 2
        [[1.0, 2.0], [2.0, 3.0]]

        If a vector cannot be sliced, the result will be None.

        For instance:

        >>> g
        dtype: list
        Rows: 3
        [[1.0], [1.0, 2.0], [1.0, 2.0, 3.0]]

        >>> g.vector_slice(2)
        dtype: float
        Rows: 3
        [None, None, 3.0]

        >>> g.vector_slice(0,2)
        dtype: list
        Rows: 3
        [None, [1.0, 2.0], [1.0, 2.0]]

        Parameters
        ----------
        start :  The start position of the slice.

        end :  Optional. The end position of the slice. Note that the end
        position is NOT included in the slice. Thus a g.vector_slice(1,3) will
        extract entries in position 1 and 2.
        """
        if self.dtype() != list:
            raise RuntimeError("Only Vector type can be sliced")
        if end == None:
            end = start + 1

        with cython_context():
            return SArray(_proxy=self.__proxy__.vector_slice(start, end))

    def apply(self, fn, dtype=None, skip_undefined=True, seed=None):
        """
        Returns a new SArray of dtype where each element in this SArray is
        transformed by fn(x).
        The fn should return a value which can be cast into dtype.

        If dtype is not specified, the first 100 elements of the Array are
        used to make a guess of the target datatype.

        Parameters
        ----------
        fn : function
            The function to transform each element.

        dtype : dtype
            The dtype of the new SArray. If None, the first 100
            elements of the array are used to guess the target
            data type.

        skip_undefined : bool, optional
            If True, will not apply fn to any undefined values.

        seed : int, optional
            Used as the seed if a random number generator is included in fn.

        Returns
        -------
        out : SArray
            The SArray transformed by fn.  Each element of the SArray is of
            type ``dtype``
        """
        assert inspect.isfunction(fn), "Input must be a function"
        dryrun = [fn(i) for i in self.head(100)]
        if dtype == None:
            dtype = self.__infer_dtype_of_list__(dryrun)

        if not seed:
            seed = time.time()

        _mt._get_metric_tracker().track('sarray.apply')

        with cython_context():
            return SArray(_proxy=self.__proxy__.transform(fn, dtype, skip_undefined, seed))


    def filter(self, fn, skip_undefined=True, seed=None):
        """
        Returns a new SArray which is filtered by the given function.
        If the lambda evaluates an element to true, this element is copied to the
        new SArray.  If not, it isn't.  Throws an exception if the return type
        of the lambda is not castable to a boolean value.

        Parameters
        ----------
        fn : function
            Function that filters the SArray. Must evaluate to bool or int.

        skip_undefined : bool, optional
            If True, will not apply fn to any undefined values.

        seed : int, optional
            Used as the seed if a random number generator is included in fn.

        Returns
        -------
        out : SArray
        """
        assert inspect.isfunction(fn), "Input must be a function"
        if not seed:
            seed = time.time()

        _mt._get_metric_tracker().track('sarray.filter')

        with cython_context():
            return SArray(_proxy=self.__proxy__.filter(fn, skip_undefined, seed))


    def sample(self, fraction, seed=None):
        """
        Returns an SArray which contains a subsample of the current SArray.

        Parameters
        ----------
        fraction : float
            The fractionage of the rows to fetch. Must be between 0 and 1.

        seed : int
            The random seed for the random number generator.

        Returns
        -------
        out : SArray
            The new SArray which contains the subsampled rows.
        """
        if (fraction > 1 or fraction < 0):
            raise ValueError('Invalid sampling rate: ' + str(fraction))
        if (self.size() == 0):
            return SArray()
        if not seed:
            seed = time.time()

        _mt._get_metric_tracker().track('sarray.sample')

        with cython_context():
            return SArray(_proxy=self.__proxy__.sample(fraction, seed))

    def _save_as_text(self, url):
        """
        Save the SArray to disk as text file.
        """
        raise NotImplementedError



    def all(self):
        """
        Returns True if all of the elements evaluate to True.
        The empty array returns True.

        Returns
        -------
        out : bool
        """
        with cython_context():
            return self.__proxy__.all()


    def any(self):
        """
        Returns True if any of the elements evaluate to True.
        The empty array returns False.

        Returns
        -------
        out : bool
        """
        with cython_context():
            return self.__proxy__.any()


    def max(self):
        """
        Returns the maximum value in the SArray.  The empty frame returns None.
        Throws an exception if called on an SArray with non-numeric type.

        Returns
        -------
        out : type of SArray
            Maximum value of SArray
        """
        with cython_context():
            return self.__proxy__.max()


    def min(self):
        """
        Returns the minimum value in the SArray.  The empty frame returns None.
        Throws an exception if called on an SArray with non-numeric type.

        Returns
        -------
        out : type of SArray
            Minimum value of SArray
        """
        with cython_context():
            return self.__proxy__.min()


    def sum(self):
        """
        Returns the sum of all the values in the sarray.  The empty frame
        returns None.
        If the array contains strings, this will throws an exception.
        If the array contains vectors, and all the vectors are the same length,
        the sum over all the vectors will be returned. Otherwise, if the vectors
        have different lengths, this will throw an exception.

        For large values, this may overflow without warning.

        Returns
        -------
        out : type of SArray
            Sum of all values in SArray
        """
        with cython_context():
            return self.__proxy__.sum()


    def mean(self):
        """
        Returns the mean of the values in the sarray.  The empty frame returns
        None.  Throws an exception if called on an sarray with non-numeric
        type.

        Returns
        -------
        out : float
            Mean of all values in SArray
        """
        with cython_context():
            return self.__proxy__.mean()


    def std(self, ddof=0):
        """
        Returns the standard deviation of all the values in the sarray as
        a float.The empty array returns None.

        Parameters
        ----------
        ddof : int
            "delta degrees of freedom" in the variance calculation.

        Raises
        ------
        Throws an exception if ddof >= sarray size or if the sarray is
        a non-numeric type.

        Returns
        -------
        out : float
            The standard deviation of all the values.
        """
        with cython_context():
            return self.__proxy__.std(ddof)


    def var(self, ddof=0):
        """
        Returns the variance of all the values in the sarray as
        a float. The empty array returns None.

        Parameters
        ----------
        ddof : int
            "delta degrees of freedom" in the variance calculation.

        Raises
        ------
            Throws an exception if ddof >= sarray size or if the sarray is a
            non-numeric type.

        Returns
        -------
        out : float
            Variance of all values in SArray.
        """
        with cython_context():
            return self.__proxy__.var(ddof)

    def num_missing(self):
        """
        Returns the number of missing elements in the SArray.  Returns 0 if the
        sarray is empty.

        Returns
        -------
        out : int
            Number of missing values.
        """
        with cython_context():
            return self.__proxy__.num_missing()

    def nnz(self):
        """
        Returns the number of non-zero elements in the SArray.  Returns 0 if the
        sarray is empty.

        Returns
        -------
        out : int
            Number of non-zero elements.
        """
        with cython_context():
            return self.__proxy__.nnz()

    def astype(self, dtype, undefined_on_failure=False):
        """
        Returns a new SArray with all of the current values casted to the given
        type.

        Throws an exception if the types are not castable to the given type.

        Parameters
        ----------
        dtype : type
            The type to cast the elements to in SArray

        undefined_on_failure: bool
            If set to true, runtime cast failures will be emitted as
            missing values rather than failing.

        Returns
        -------
        out : SArray (of type dtype)
            The SArray converted to the dtype
        """

        _mt._get_metric_tracker().track('sarray.astype', {'type':str(dtype)})

        with cython_context():
            return SArray(_proxy=self.__proxy__.astype(dtype, undefined_on_failure))

    def clip(self, lower=numpy.nan, upper=numpy.nan):
        """
        Returns a new SArray with the clipped values of this SArray.
        This means to modify each value outside of the given bounds
        to be the bound.

        This function can operate on numeric arrays, as well as vector arrays,
        in which case each individual element in each vector is clipped.

        If lower or upper are given numpy.nan as the argument, this is interpreted
        as a non-existent bound.

        Parameters
        ----------
        lower : int
            The lower bound to clip to. Ignored if equal to numpy.nan

        upper : int
            The upper bound to clip to. Ignored if equal to numpy.nan

        Raises
        ------
            Throws an exception if the SArray is empty, the types are non-numeric,
            or if the upper bound is less than the lower bound

        Returns
        -------
        out : SArray
        """
        with cython_context():
            return SArray(_proxy=self.__proxy__.clip(lower, upper))

    def clip_lower(self, threshold):
        """
        Returns a new SArray with all values clipped to the lower bound given.

        This function can operate on numeric arrays, as well as vector arrays,
        in which case each individual element in each vector is clipped.

        Parameters
        ----------
        threshold : float
            The lower bound to clip values to

        Raises
        ------
        Exception
            Throws an exception if the SArray is empty or the types are non-numeric

        Returns
        -------
        out : SArray
        """
        with cython_context():
            return SArray(_proxy=self.__proxy__.clip(threshold, numpy.nan))


    def clip_upper(self, threshold):
        """
        Returns a new SArray with all values clipped to the upper bound given.

        This function can operate on numeric arrays, as well as vector arrays,
        in which case each individual element in each vector is clipped.

        Parameters
        ----------
        threshold : float
            The upper bound to clip values to

        Raises
        ------
        Exception
            Throws an exception if the SArray is empty or the types are non-numeric

        Returns
        -------
        out : SArray
        """
        with cython_context():
            return SArray(_proxy=self.__proxy__.clip(numpy.nan, threshold))

    def tail(self, n=10):
        """
        Returns a list of the last n elements in the SArray.

        Does not throw. Care must be taken when size of the returned object is
        big (larger than available memory).

        Parameters
        ----------
        n : int
            The number of elements to fetch


        Returns
        -------
        out : list
            The last n elements of the SArray
        """
        with cython_context():
            return self.__proxy__.tail(n)


    def dropna(self):
        """
        Returns a new SArray containing only the non-missing values of the
        array.  The size of the returned SArray will be <= to the size of the
        original.

        A missing value shows up in an SArray as 'None'.

        Returns
        -------
        out : SArray
            The new SArray with missing values removed.
        """

        _mt._get_metric_tracker().track('sarray.dropna')

        with cython_context():
            return SArray(_proxy = self.__proxy__.drop_missing_values())


    def topk_index(self, topk=10, reverse=False):
        """
        Used to return the topk elements, sorted by those elements (descending
        by default).  Returns a new SArray of type 'int' of the same size as
        the current SArray.  Entries are '1' if the corresponding element in
        the current SArray is a part of the topk elements, and '0' if that
        corresponding element is not.

        Parameters
        ----------
        topk : int
            The number of elements to determine if 'top'

        reverse: bool
            If True, return the topk elements in ascending order

        Returns
        -------
        out : SArray (of type int)

        Notes
        -----
        This is mostly used internally by SFrame's topk function.
        """
        with cython_context():
            return SArray(_proxy = self.__proxy__.topk_index(topk, reverse))

    def sketch_summary(self):
        """
        Returns a graphlab.Sketch object which can be further queried for many
        descriptive statistics over this SArray. Many of the statistics are
        approximate. See the :class:`~graphlab.Sketch` documentation for more
        detail.

        Returns
        -------
        out : Sketch
            Sketch object that contains descriptive statistics for this SArray.
            Many of the statistics are approximate.
        """
        from graphlab.data_structures.sketch import Sketch
        _mt._get_metric_tracker().track('sarray.sketch_summary')
        return Sketch(self)
