"""
This module defines the SFrame class which provides the
ability to create, access and manipulate a remote scalable dataframe object.

SFrame acts similarly to pandas.DataFrame, but the data is completely immutable
and is stored column wise on the GraphLab Server side.
"""
import graphlab.connect as _mt
import graphlab.connect.main as glconnect
from graphlab.cython.context import debug_trace as cython_context
from graphlab.cython.cy_sframe import UnitySFrameProxy
from graphlab.util import make_internal_url
from graphlab.data_structures.sarray import SArray
import graphlab.aggregate

import inspect
import pandas
import time

_has_matplotlib = False
try:
    import matplotlib.pyplot as plt
    from mpl_toolkits.mplot3d import Axes3D
    from matplotlib import rcParams
    _has_matplotlib = True
except:
    pass


__all__ = ['SFrame']


def load_sframe(filename):
    """
    Load an SFrame. The filename extension is used to determine the format
    automatically. This function is particularly useful for SFrames previously
    saved in binary format. For CSV imports the SFrame.read_csv function
    provides greater control. If the SFrame is in binary format, provide the
    directory it is in (which is created when the SFrame is saved).

    Parameters
    ----------
    filename : string
        Location of the file to load. Can be a local path or a remote URL.

    Returns
    -------
    sf : SFrame
    """
    sf = SFrame(data=filename)
    return sf


class SFrame(object):
    """
    SFrame is a dataframe object (as in pandas or R) that can scale to big data.
    The data in SFrame is stored column-wise on the GraphLab Server side,
    and is stored on disk to avoid being constrained by memory size.

    While the columns in the SFrame are immutable, SFrame itself is mutable in
    that columns can be added and subtracted from an SFrame with ease. Each
    column of an SFrame is actually an :py:class:`graphlab.SArray`, so an
    SFrame essentially acts as an ordered dict of SArrays.

    SFrame can be constructed in various ways. Currently, we support
    constructing an SFrame from:

    * pandas.DataFrame
    * csv file (comma separated, first line is header)
    * sframe directory archive (A directory where an sframe was saved previously)
    * general text file (with csv parsing options, See :py:meth:`read_csv()`)

    >>> import graphlab
    >>> from graphlab import SFrame

    **Construction**

    Construct an SFrame from a dataframe and transfers the dataframe object
    across the network.

    >>> df = pandas.DataFrame()
    >>> sf = SFrame(data=df)

    Construct an SFrame from a local csv file (only works for local server).

    >>> sf = SFrame(data='~/mydata/foo.csv')

    Construct an SFrame from a csv file on Amazon S3. This requires the
    environment variables: *AWS_ACCESS_KEY_ID* and *AWS_SECRET_ACCESS_KEY* to be
    set before the python session started. Alternatively, you can use
    :py:func:`graphlab.aws.set_credentials()` to set the credentials after
    python is started and :py:func:`graphlab.aws.get_credentials()` to verify
    these environment variables.

    >>> sf = SFrame(data='s3://mybucket/foo.csv')

    Construct an SFrame from a server side gzipped csv file.

    >>> sf = SFrame(data='remote:///mydata/foo.csv.gz')

    Construct an SFrame from a csv file downloaded from a URL using more csv
    parsing parameters.

    >>> sf = SFrame.read_csv('http://testdatasets.s3-website-us-west-2.amazonaws.com/users.csv.gz',
                             delimiter=',',
                             header=False,
                             comment_char="#",
                             column_type_hints={'user_id': int})

    An SFrame can be constructed from a dictionary of values or SArrays:

    >>> sf = gl.SFrame({'id':[1,2,3],'val':['A','B','C']})
    >>> sf
    Columns:
        id  int 
        val str
    Rows: 3
    Data:
       id  val
    0  1   A
    1  2   B
    2  3   C

    Or equivalently:

    >>> ids = SArray([1,2,3])
    >>> vals = SArray(['A','B','C'])
    >>> sf = SFrame({'id':ids,'val':vals})

    It can also be constructed from an array of SArrays in which case column 
    names are automatically assigned.

    >>> ids = SArray([1,2,3])
    >>> vals = SArray(['A','B','C'])
    >>> sf = SFrame([ids, vals])
    >>> sf
    Columns:
        X1 int 
        X2 str
    Rows: 3
    Data:
       X1  X2
    0  1   A
    1  2   B
    2  3   C

    If the SFrame is constructed from a list of values, an SFrame of a single
    column is constructed.

    >>> sf = SFrame([1,2,3])
    >>> sf
    Columns:
        X1 int 
    Rows: 3
    Data:
       X1  
    0  1
    1  2 
    2  3

    **Saving**

    Save and load the sframe in native format.

    >>> sf.save('remote:///mysframedir')
    >>> sf2 = graphlab.load_sframe('remote:///mysframedir')

    **Column Manipulation **

    An SFrame is composed of a collection of columns of SArrays, and individual
    SArrays can be extracted easily. For instance given an SFrame:

    >>> sf = SFrame({'id':[1,2,3],'val':['A','B','C']})
    >>> sf
    Columns:
        id  int 
        val str
    Rows: 3
    Data:
       id  val
    0  1   A
    1  2   B
    2  3   C

    The "id" column can be extracted using:

    >>> sf["id"]
    dtype: int
    Rows: 3
    [1, 2, 3]

    And can be deleted using:

    >>> del sf["id"]

    Multiple columns can be selected by passing a list of column names:

    >>> sf = SFrame({'id':[1,2,3],'val':['A','B','C'],'val2':[5,6,7]})
    >>> sf
    Columns:
        id   int 
        val  str
        val2 int 
    Rows: 3
    Data:
       id  val val2
    0  1   A   5
    1  2   B   6
    2  3   C   7
    >>> sf2 = sf[['id','val']]
    >>> sf2
    Columns:
        id  int 
        val str
    Rows: 3
    Data:
       id  val
    0  1   A
    1  2   B
    2  3   C

    The same mechanism can be used to re-order columns:

    >>> sf = SFrame({'id':[1,2,3],'val':['A','B','C']})
    >>> sf
    Columns:
        id  int 
        val str
    Rows: 3
    Data:
       id  val
    0  1   A
    1  2   B
    2  3   C
    >>> sf[['val','id']]
    >>> sf
    Columns:
        val str
        id  int 
    Rows: 3
    Data:
       val id  
    0  A   1   
    1  B   2   
    2  C   3   


    **Logical Filter**
    
    An SFrame can be filtered using

    >>> sframe[binary_filter] 

    where sframe is an SFrame and binary_filter is an SArray of the same length.
    The result is a new SFrame which contains only rows of the SFrame where its
    matching row in the binary_filter is non zero.

    This permits the use of boolean operators that can be used to perform
    logical filtering operations. For instance, given an SFrame

    >>> sf
    Columns:
        id  int 
        val	str
    Rows: 3
    Data:
       id  val
    0  1   A
    1  2   B
    2  3   C

    >>> sf[(sf['id'] >= 1) & (sf['id'] <= 2)]
    Columns:
        id  int 
        val	str
    Rows: 3
    Data:
       id  val
    0  1   A
    1  2   B

    See :class:`~graphlab.SArray` for more details on the use of the logical 
    filter.

    This can also be used more generally to provide filtering capability which
    is otherwise not expressible with simple boolean functions. For instance:

    >>> sf[sf['id'].apply(lambda x: math.log(x) <= 1)]
    Columns:
        id  int 
        val	str
    Rows: 3
    Data:
       id  val
    0  1   A
    1  2   B

    Or alternatively:

    >>> sf[sf.apply(lambda x: math.log(x['id']) <= 1)]

    Parameters
    ----------
    data : Array | pandas.DataFrame | string
        The actual interpretation of this field is dependent on the "format"
        parameter. If data is an Array or a Pandas DataFrame, the contents are
        stored in the SFrame. If the contents is a string, it is interpreted as
        a file. Files can be read from local file system, or urls (local://,
        hdfs://, s3://, http://, or remote://)

    format : {'auto', 'array', 'dataframe', 'csv', 'sframe'}
        The format of the data. Default 'auto' will automatically infer the
        input data format. The inference rules are simple: If the data is an
        array/dict or a dataframe, it is associated with 'array' and 'dataframe'
        respectively. If the data is a string, it is interpreted as a file, and
        the file extension is used to infer the file format.


    Notes
    -----
    When working with the graphlab EC2 instance, e.g.
    :py:func:`graphlab.aws.launch_EC2()`, SFrame cannot be constructed using
    local file path,  because it involves potentially large amount of data
    transfer from client to server. However, it is still ok to the remote file
    path.

    >>> graphlab.aws.launch_EC2('m1.large')
    >>> sf = SFrame('~/mydata/foo.csv') # throws exception
    >>> sf = SFrame('remote:///mydata/foo.csv') # works
    >>> sf = SFrame('http://testdatasets.s3-website-us-west-2.amazonaws.com/users.csv.gz') # works
    >>> sf = SFrame('s3://mybucket/foo.csv') # works
    >>> graphlab.aws.teminate_EC2()

    Similar restriction applies to :py:class:`graphlab.SGraph` and
    :py:class:`graphlab.SArray`.
    """

    __slots__ = ['shape', '__proxy__']


    def __init__(self, data=None,
                 format='auto',
                 _proxy=None):
        """__init__(data=list(), format='auto')
        Construct a new SFrame from a url or a pandas.DataFrame.

        Parameters
        ----------
        data : Array | pandas.DataFrame | string
            The actual interpretation of this field is dependent on the "format"
            parameter. If data is an Array or a Pandas DataFrame, the contents
            are stored in the SFrame. If the contents is a string, it is
            interpreted as a file. Files can be read from local file system, or
            urls (local://, hdfs://, s3://, http://, or remote://)

        format : {'auto', 'array', 'dataframe', 'csv', 'sframe'}
            The format of the data. Default 'auto' will automatically infer the
            input data format. The inference rules are simple: If the data is an
            array/dict or a dataframe, it is associated with 'array' and
            'dataframe' respectively. If the data is a string, it is interpreted
            as a file, and the file extension is used to infer the file format.

        _proxy : None
            Internal, do not use.

        Notes
        -----
        For CSV files, the preferred constructor is SFrame.read_csv since
        that has a lot more options which can be used to control the parser.
        """
        # emit metrics for num_rows, num_columns, and type (local://, s3, hdfs, http)
        tracker = _mt._get_metric_tracker()
        if (_proxy):
            self.__proxy__ = _proxy
        else:
            self.__proxy__ = UnitySFrameProxy(glconnect.get_client())
            _format = None
            csv_delimiter = None
            if (format == 'auto'):
                if (isinstance(data, pandas.DataFrame)):
                    _format = 'dataframe'
                    tracker.track('sframe.location.memory', value=1)
                elif (isinstance(data, str) or isinstance(data, unicode)):

                    if data.find('://') == -1:
                        suffix = 'local'
                    else:
                        suffix = data.split('://')[0]
                    tracker.track(('sframe.location.%s' % (suffix)), value=1)

                    if data.endswith(('.csv', '.csv.gz')):
                        _format = 'csv'
                        csv_delimiter = ','
                    elif data.endswith(('.tsv', '.tsv.gz')):
                        _format = 'tsv'
                        csv_delimiter = '\t'
                    elif data.endswith(('.txt', '.txt.gz')):
                        print "Assuming file is csv. For other delimiters, " + \
                            "please use `SFrame.read_csv`."
                        _format = 'csv'
                    else:
                        _format = 'sframe'

                elif (hasattr(data, 'iteritems')):
                    _format = 'dict'
                    tracker.track('sframe.location.memory', value=1)

                elif hasattr(data, '__iter__'):
                    _format = 'array'
                    tracker.track('sframe.location.memory', value=1)
                elif data is None:
                    _format = 'empty'
                else:
                    raise ValueError('Cannot infer input type for data ' + str(data))
            else:
                _format = format

            tracker.track(('sframe.format.%s' % _format), value=1)

            with cython_context():
                if (_format == 'dataframe'):
                    self.__proxy__.load_from_dataframe(data)
                elif (_format == 'array'):
                    if len(data) > 0:
                        unique_types = set([type(x) for x in data if x is not None])
                        if len(unique_types) == 1 and SArray in unique_types:
                            for arr in data:
                                self.add_column(arr)
                        elif SArray in unique_types:
                            raise ValueError("Cannot create SFrame from mix of regular values and SArrays")
                        else:
                            self.__proxy__.add_column(SArray(data).__proxy__, "")
                elif (_format == 'dict'):
                    for key,val in iter(sorted(data.iteritems())):
                        if (type(val) == SArray):
                            self.__proxy__.add_column(val.__proxy__, key)
                        else:
                            self.__proxy__.add_column(SArray(val).__proxy__, key)
                elif (_format == 'csv' or _format == 'tsv'):
                    url = make_internal_url(data)
                    csv_config = dict()
                    if csv_delimiter is not None:
                        csv_config['delimiter'] = csv_delimiter
                        csv_config['use_header'] = True
                    self.__proxy__.load_from_csv(url, csv_config, dict())
                elif (_format == 'sframe'):
                    url = make_internal_url(data)
                    self.__proxy__.load_from_sframe_index(url)
                elif (_format == 'empty'):
                    pass
                else:
                    raise ValueError('Unknown input type: ' + format)

        self.shape = (self.num_rows(), self.num_cols())
        tracker.track('sframe.row.size', value=self.num_rows())
        tracker.track('sframe.col.size', value=self.num_cols())

    @classmethod
    def read_csv(cls,
                 url,
                 delimiter=',',
                 header=True,
                 error_bad_lines=True,
                 comment_char='',
                 escape_char='\\',
                 double_quote=True,
                 quote_char='\"',
                 skip_initial_space=True,
                 column_type_hints=str,
                 na_values=[], 
                 nrows=None,
                 verbose=True):
        """
        Constructs an SFrame from a CSV file.

        Parameters
        ----------
        delimiter : string
            This describes the delimiter used for parsing csv files. Must be a
            single character.

        header : bool
            If true, uses the first row as the column names.
            Otherwise use the default column names:'X1, X2,...'.

        error_bad_lines: bool
            If true, will fail upon encountering a bad line. If false, will
            continue parsing skipping lines which fail to parse correctly.
            A sample of the first 10 encountered bad lines will be printed.
            Defaults to True.

        comment_char : string
            The character which denotes that the
            remainder of the line is a comment.

        escape_char : string
            Character which begins a C escape sequence

        double_quote : bool
            If two consecutive quotes in a string parses to
            to a single quote.

        skip_initial_space : bool
            If extra spaces at the start of a field is ignored

        column_type_hints : type, list[type], dict[string, type]
            This provides type hints for each column.
            Supported types are int, float, str
            - If a single type is provided, the type will be
            applied to all columns. For instance, column_type_hints=float will
            force all columns to be parsed as float.
            - If a list of types is provided, the types applies
            to each column in order, e.g.[int, float, str]
            will parse the first column as int, second as float and third as string.
            - If a dictionary of column name to type is provided,
            each type value in the dictonary is applied to the key it belongs to.
            For instance {'user':int} will hint that the column
            called "user" should be parsed as an integer, and the rest will default
            to string.

        na_values: A list of strings to be interpreted as missing values.

        nrows: integer
            If set, only this many rows will be read from the file.

        verbose : bool
            If True, print the progress.
        """
        parsing_config = dict()
        parsing_config["delimiter"] = delimiter
        parsing_config["use_header"] = header
        parsing_config["continue_on_failure"] = not error_bad_lines
        parsing_config["comment_char"] = comment_char
        parsing_config["escape_char"] = escape_char
        parsing_config["double_quote"] = double_quote
        parsing_config["quote_char"] = quote_char
        parsing_config["skip_initial_space"] = skip_initial_space
        if na_values is not None and len(na_values) > 0:
            parsing_config["na_values"] = na_values 

        if nrows != None:
          parsing_config["row_limit"] = nrows

        proxy = UnitySFrameProxy(glconnect.get_client())
        internal_url = make_internal_url(url)

        if (not verbose):
            glconnect.get_client().set_log_progress(False)

        if type(column_type_hints) is type:
            type_hints = {'__all_columns__': column_type_hints}
        elif type(column_type_hints) is list:
            type_hints = dict(zip(['__X%d__' % i for i in range(len(column_type_hints))], column_type_hints))
        elif type(column_type_hints) is dict:
            type_hints = column_type_hints
        else:
            raise TypeError("Invalid type for column_type_hints. Must be a dictionary, list or a single type.")


        _mt._get_metric_tracker().track('sframe.csv.parse')

        suffix=''
        if url.find('://') == -1:
            suffix = 'local'
        else:
            suffix = url.split('://')[0]

        _mt._get_metric_tracker().track(('sframe.location.%s' % (suffix)), value=1)

        with cython_context():
            proxy.load_from_csv(internal_url, parsing_config, type_hints)
        glconnect.get_client().set_log_progress(True)

        return cls(_proxy=proxy)

    def __repr__(self):
        """
        Returns a string description of the frame
        """
        colnames = self.column_names()
        coltypes = self.column_types()
        ret = "Columns:\n"
        if len(colnames) > 0:
            for i in range(len(colnames)):
                ret = ret + "\t" + colnames[i] + "\t" + coltypes[i].__name__ + "\n"
            ret = ret + "\n"
        else:
            ret = ret + "\tNone\n\n"
        ret = ret + "Rows: " + str(len(self)) + "\n\n"
        ret = ret + "Data:\n"
        if (len(self) > 0):
            ret = ret + str(self)
        else:
            ret = ret + "\t[]"
        return ret

    def __str__(self):
        """
        Returns a string containing the first 10 elements of the frame, along
        with a description of the frame.
        """
        headln = str(self.head(10))
        ## pandas dataframe displays a [#row * #col] at the end which causes confusion.
        ## replace the last line with the actual row and columns
        headln_lines = headln.split("\n")
        if (len(self) > 10):
            headln_lines[-1] = "...\n[%d rows x %d columns]\n" % self.shape
        else:
            headln_lines[-1] = "[%d rows x %d columns]\n" % self.shape
        headln = "\n".join(headln_lines)
        return headln

    def __nonzero__(self):
        """
        Returns true if the frame is not empty.
        """
        return self.num_rows() != 0

    def __len__(self):
        """
        Returns the number of rows of the sframe
        """
        return self.num_rows()

    def __copy__(self):
        """
        Returns a shallow copy of the sframe
        """
        return self.select_columns(self.column_names())

    def _row_selector(self, other):
        """
        Where other is an SArray of identical length as the current Frame,
        this returns a selection of a subset of rows in the current SFrame
        where the corresponding row in the selector is non-zero.
        """
        if type(other) is SArray:
            if len(other) != len(self):
                raise IndexError("Cannot perform logical indexing on arrays of different length.")
            with cython_context():
                return SFrame(_proxy=self.__proxy__.logical_filter(other.__proxy__))

    def dtype(self):
        """
        Returns the column types. Same as :py:meth:`column_types`.

        Returns
        -------
        out : list[type]
            Column types of the SFrame.
        """

        return self.column_types()

    def num_rows(self):
        """
        Returns the number of rows.

        Returns
        -------
        out : int
            Number of rows in the SFrame.
        """
        return self.__proxy__.num_rows()

    def num_cols(self):
        """
        Returns the number of columns.

        Returns
        -------
        out : int
            Number of columns in the SFrame.
        """
        return self.__proxy__.num_columns()

    def num_columns(self):
        """
        Returns the number of columns.

        Returns
        -------
        out : int
            Number of columns in the SFrame.
        """
        return self.__proxy__.num_columns()

    def column_names(self):
        """
        Returns the column names.

        Returns
        -------
        out : list[string]
            Column names of the SFrame.
        """
        return self.__proxy__.column_names()

    def column_types(self):
        """
        Returns the column types.

        Returns
        -------
        out : list[type]
            Column types of the SFrame.
        """
        return self.__proxy__.dtype()

    def head(self, n=10):
        """
        Returns a pandas.DataFrame which contains the first n rows of the
        SFrame.

        This operation will construct a pandas.DataFrame in memory. Care must
        be taken when size of the returned object is big.

        Parameters
        ----------
        n : int
            The number of rows to fetch.

        Returns
        -------
        out : pandas.DataFrame
            the dataframe which contains the first n rows of SFrame
        """
        return self.__proxy__.head(n)

    def to_dataframe(self):
        """
        Returns a pandas.DataFrame which contains the all rows of the
        SFrame.

        This operation will construct a pandas.DataFrame in memory. Care must
        be taken when size of the returned object is big.

        Returns
        -------
        out : pandas.DataFrame
            The dataframe which contains all rows of SFrame
        """
        return self.head(self.num_rows())

    def tail(self, n=10):
        """
        Returns a pandas.DataFrame which contains the last n rows of the
        SFrame.

        This operation will construct a pandas.DataFrame in memory. Care must
        be taken when size of the returned object is big.

        Parameters
        ----------
        n : int
            The number of rows to fetch.

        Returns
        -------
        out : pandas.DataFrame
            The dataframe which contains the last n rows of SFrame
        """
        return self.__proxy__.tail(n)

    def apply(self, fn, dtype=None, seed=None):
        """
        Returns a new SArray of dtype where each element in this SArray is
        transformed by fn(x) where x is a row in the sframe, as a dictionary.
        The fn should return a value which can be cast into dtype.

        If dtype is not specified, the first 100 rows of the SFrame are
        used to make a guess of the target datatype.

        Parameters
        ----------
        fn : function
            The function to transform each row of the sframe. The return
            type should be convertible to dtype if dtype is not None.

        dtype : dtype
            The dtype of the new SArray. If None, the first 100
            elements of the array are used to guess the target
            data type.

        seed : int, optional
            Used as the seed if a random number generator is included in fn.

        Returns
        -------
        out : SArray
            The SArray transformed by fn.  Each element of the SArray is of
            type ``dtype``


        Example
        -------

        >>> import graphlab
        >>> sf = graphlab.SFrame.read_csv('netflix.csv')

        The following code create a new SArray where each element is the
        string concatination of the 'user_id', 'movie_id' and 'rating' columns.

        >>> sa = sf.apply(lambda x: str(x['user_id']) + str(x['movie_id']) + str(x['rating']))

        """
        assert inspect.isfunction(fn), "Input must be a function"
        dryrun = [fn(dict(zip(self.column_names(), i))) for i in self.head(10).values]
        if dtype is None:
            dtype = SArray(dryrun).dtype()

        if not seed:
            seed = int(time.time())

        _mt._get_metric_tracker().track('sframe.apply')

        with cython_context():
            return SArray(_proxy=self.__proxy__.transform(fn, dtype, seed))

    def sample(self, fraction, seed=None):
        """
        Return an SFrame with a sample of the current SFrame's rows.

        Parameters
        ----------
        fraction : float
            Approximate fraction of the rows to fetch. Must be between 0 and 1.
            The number of rows returned is approximately the fraction times the
            number of rows.

        seed : int, optional
            Seed for the random number generator.

        Returns
        -------
        out : SFrame
            A new SFrame containing sampled rows of the current SFrame.

        Examples
        --------
        Suppose we have an SFrame called my_sf with 6,145 rows.

        >>> print my_sf.num_rows()
        6145

        Retrieve about 30% of the SFrame rows.

        >>> sub_sf = my_sf.sample(fraction=0.3, seed=None)
        >>> print sub_sf.num_rows()
        1886

        For prototyping and testing, it is important to be able to set the seed
        so the same subset of rows can be retrieved each time.

        >>> sub_sf = my_sf.sample(fraction=0.3, seed=5)

        Notes
        -----
        For more examples of SFrame.sample, see the `Introduction to SFrames
        <http://graphlab.com/learn/notebooks/introduction_to_sframes.html#Splitting-and-Sampling>`_
        notebook.
        """
        if not seed:
            seed = int(time.time())

        if (fraction > 1 or fraction < 0):
            raise ValueError('Invalid sampling rate: ' + str(fraction))

        _mt._get_metric_tracker().track('sframe.sample')

        if (self.num_rows() == 0 or self.num_cols() == 0):
            return self
        else:
            with cython_context():
                return SFrame(_proxy=self.__proxy__.sample(fraction, seed))

    def random_split(self, fraction, seed=None):
        """
        Return a pair of SFrames by random splitting the current one. The first
        SFrame contains *M* rows, sampled uniformly randomly (without
        replacement) from the original SFrame. *M* is approximately the fraction
        times the original number of rows. The second SFrame contains the
        remaining rows of the original SFrame.

        Parameters
        ----------
        fraction : float
            Approximate fraction of the rows to fetch for the first returned
            SFrame. Must be between 0 and 1.

        seed : int, optional
            Seed for the random number generator.

        Returns
        -------
        out : tuple [SFrame]
            Two new SFrames.

        Examples
        --------
        Suppose we have an SFrame called my_sf with 6,145 rows.

        >>> print my_sf.num_rows()
        6145

        Randomly split my_sf into training and testing datasets with about a 70%/30%
        split.

        >>> sf_train, sf_test = my_sf.random_split(fraction=0.7, seed=None)
        >>> print sf_train.num_rows(), sf_test.num_rows()
        4294 1851

        For prototyping and testing, it is important to be able to set the seed
        so the same split can be reproduced in each call.

        >>> sf_train, sf_test = my_sf.sample(fraction=0.7, seed=5)

        Notes
        -----
        For more examples of SFrame.random_split, see the `Introduction to SFrames
        <http://graphlab.com/learn/notebooks/introduction_to_sframes.html#Splitting-and-Sampling>`_
        notebook.
        """
        if (fraction > 1 or fraction < 0):
            raise ValueError('Invalid sampling rate: ' + str(fraction))
        if (self.num_rows() == 0 or self.num_cols() == 0):
            return (SFrame(), SFrame())

        if not seed:
            seed = int(time.time())

        # The server side requires this to be an int, so cast if we can
        try:
            seed = int(seed)
        except ValueError:
            raise ValueError('The \'seed\' parameter must be of type int.')

        _mt._get_metric_tracker().track('sframe.random_split')

        with cython_context():
            proxy_pair = self.__proxy__.random_split(fraction, seed)
            return (SFrame(data=[], _proxy=proxy_pair[0]), SFrame(data=[], _proxy=proxy_pair[1]))

    def topk(self, column_name, k=10, reverse=False):
        """
        Returns the topk rows sorted by the column_name in descending order.

        Parameters
        ----------
        column_name : string
            The column to sort on

        k : int
            The number of rows to return

        reverse : bool
            If True, return the topk rows in ascending order.

        Returns
        -------
        out : pandas.DataFrame
            A pandas.DataFrame containing topk rows sorted by column_name.
        """
        if type(column_name) is not str:
            raise TypeError("column_name must be a string")

        _mt._get_metric_tracker().track('sframe.topk')

        df = self[self[column_name].topk_index(k, reverse)].to_dataframe()
        df = df.sort(column_name, ascending=reverse)
        df.index = range(len(df))
        return df

    def save(self, filename, format=None):
        """
        Save the SFrame to file/directory..

        Parameters
        ----------
        filename : string
            The location to save the SFrame. Either a local directory or a
            remote URL. If the format is 'binary', a directory will be created
            at the location which will contain the sframe.

        format : {'binary', 'csv'}, optional
            Format in which to save the SFrame. Binary saved sframes can be
            re-loaded much faster and without any format conversion losses.

        """

        _mt._get_metric_tracker().track('sframe.save', properties={'format':format})
        if format == None:
            if filename.endswith(('.csv', '.csv.gz')):
                format = 'csv'
            else:
                format = 'binary'
        else:
            if format is 'csv':
                if not filename.endswith(('.csv', '.csv.gz')):
                    filename = filename + '.csv'
            elif format is not 'binary':
                raise ValueError("Invalid format: {}. Supported formats are 'csv' and 'binary'".format(format))

        ## Save the SFrame
        url = make_internal_url(filename)

        with cython_context():
            if format is 'binary':
                self.__proxy__.save(url)

            elif format is 'csv':
                assert filename.endswith(('.csv', '.csv.gz'))
                self.__proxy__.save_as_csv(url, {})
            else:
                raise ValueError("Unsupported format: {}".format(format))

    def select_column(self, key):
        """
        Return the SArray with one column that corresponds to the key

        Throws an exception if the key is something other than a str or
        if the key is not found.

        Parameters
        ----------
        key : str
            The column name

        Returns
        -------
        out : graphlab.SArray
            The sarray that is referred by 'key'
        """
        if not isinstance(key, str):
            raise TypeError("Invalid key type: must be str")
        with cython_context():
            return SArray(data=[], _proxy=self.__proxy__.select_column(key))

    def select_columns(self, keylist):
        """
        Returns an SFrame with the columns listed in 'keylist'.

        Raises
        ------
        TypeError
            Raises an exception if ANY of the keys are not in this SFrame or
            if keylist is anything other than a list of strings.

        Parameters
        ----------
        keylist : list
            The list of column names

        Returns
        -------
        out : graphlab.SFrame
            A new SFrame that is made up of the columns
            referred to in 'keylist' in this current SFrame
        """
        if not hasattr(keylist, '__iter__'):
            raise TypeError("keylist must be an iterable")
        if not all([isinstance(x, str) for x in keylist]):
            raise TypeError("Invalid key type: must be str")
        with cython_context():
            return SFrame(data=[], _proxy=self.__proxy__.select_columns(keylist))

    def add_column(self, data, name=""):
        """
        Adds the specified column to this SFrame.  The number of elements in
        the data given must match every other column of the SFrame.

        Parameters
        ----------
        data : SArray
            The 'column' of data.

        name : string
            The name of the column. If no name is given, a default name is chosen.
        """
        # Check type for pandas dataframe or SArray?
        if not isinstance(data, SArray):
            raise TypeError("Must give column as SArray")
        if not isinstance(name, str):
            raise TypeError("Invalid column name: must be str")
        with cython_context():
            self.__proxy__.add_column(data.__proxy__, name)
            self.shape = (self.num_rows(), self.num_cols())

    def add_columns(self, datalist, namelist):
        """
        Adds columns to the SFrame.  The number of elements in all columns must
        match every other column of the SFrame.

        Parameters
        ----------
        datalist : list of SArray
            A list of columns

        namelist : list of string
            A list of column names. All names must be specified.
        """
        if not hasattr(datalist, '__iter__'):
            raise TypeError("datalist must be an iterable")
        if not hasattr(namelist, '__iter__'):
            raise TypeError("namelist must be an iterable")
        if not all([isinstance(x, SArray) for x in datalist]):
            raise TypeError("Must give column as SArray")
        if not all([isinstance(x, str) for x in namelist]):
            raise TypeError("Invalid column name in list: must all be str")
        with cython_context():
            self.__proxy__.add_columns([x.__proxy__ for x in datalist], namelist)
            self.shape = (self.num_rows(), self.num_cols())

    def remove_column(self, name):
        """
        Removes the column with the given name from the SFrame.

        Parameters
        ----------
        name : string
            The name of the column to remove.
        """
        colid = self.column_names().index(name)
        with cython_context():
            self.__proxy__.remove_column(colid)
            self.shape = (self.num_rows(), self.num_cols())

    def swap_columns(self, column_1, column_2):
        """
        Swaps the columns with the given names.

        Parameters
        ----------
        column_1 : string
            Name of column to swap

        column_2 : string
            Name of other column to swap
        """
        colnames = self.column_names()
        colid_1 = colnames.index(column_1)
        colid_2 = colnames.index(column_2)
        with cython_context():
            self.__proxy__.swap_columns(colid_1, colid_2)

    def rename(self, names):
        """
        Rename the columns using the 'names' dict.  This changes the names of
        the columns given as the keys and replaces them with the names given as
        the values.

        Parameters
        ----------
        names : dict[string, string]
            Dictionary of [old_name, new_name]
        """
        if (type(names) is not dict):
            raise TypeError('names must be a dictionary: oldname -> newname')
        with cython_context():
            for k in names:
                colid = self.column_names().index(k)
                self.__proxy__.set_column_name(colid, names[k])

    def __getitem__(self, key):
        """
        Wrapper around select_column to allow column selection with array index.
        Only text keys are accepted.
        """
        if type(key) is SArray:
            return self._row_selector(key)
        elif type(key) is list:
            return self.select_columns(key)
        elif type(key) is str:
            return self.select_column(key)
        else:
            raise TypeError("Invalid index type: must be SArray, list, or str")

    def __setitem__(self, key, value):
        """
        A wrapper around add_column(s).  Key can be either a list or a str.  If
        value is an SArray, it is added to the SFrame as a column.  If it is a
        constant value (int, str, or float), then a column is created where
        every entry is equal to the constant value.  Existing columns can also
        be replaced using this wrapper.
        """
        if type(key) is list:
            self.add_columns(value, key)
        elif type(key) is str:
            # set new column
            if not key in self.column_names():
                if (type(value) is SArray):
                    self.add_column(value, key)
                elif hasattr(value, '__iter__'):
                    self.add_column(SArray(value), key)
                else:
                    with cython_context():
                        self.__proxy__.add_column_from_const(key, value)
                        self.shape = (self.num_rows(), self.num_cols())
            else:
                # special case if replacing the only column.
                # server would fail the replacement if the new column has different
                # length than current one, which doesn't make sense if we are replacing
                # the only column. To support this, we first take out the only column
                # and then put it back if exception happens
                single_column = (self.num_cols() == 1)
                if (single_column):
                    tmpname = key
                    saved_column = self.select_column(key)
                    self.remove_column(key)
                else:
                    # add the column to a unique column name.
                    tmpname = '__' + '-'.join(self.column_names())

                try:
                    if (type(value) is SArray):
                        self.add_column(value, tmpname)
                    elif hasattr(value, '__iter__'):
                        self.add_column(SArray(value), tmpname)
                    else:
                        with cython_context():
                            self.__proxy__.add_column_from_const(tmpname, value)
                            self.shape = (self.num_rows(), self.num_cols())
                except:
                    if (single_column):
                        self.add_column(saved_column, key)
                    raise

                if (not single_column):
                    # if add succeeded, remove the column name and rename tmpname->columnname.
                    self.swap_columns(key, tmpname)
                    self.remove_column(key)
                    self.rename({tmpname: key})
        else:
            raise TypeError('Cannot set column with value type: ' + type(value))

    def __delitem__(self, key):
        """
        Wrapper around remove_column.
        """
        self.remove_column(key)

    def __materialize__(self):
        """
        For a SFrame that is lazily evaluated, force persist of all sarrays
        underneath the SFrame to enable benchmarking or other usage
        """
        with cython_context():
            self.__proxy__.materialize()

    def __is_materialized__(self):
        """
        Returns whether or not the sframe has been materialized. A materialized
        SFrame has all underneath sarrays saved on disk
        """
        return self.__proxy__.is_materialized()

    def __iter__(self):
        """
        Provides an iterator to the rows of the sframe.
        """

        _mt._get_metric_tracker().track('sframe.__iter__')

        def generator():
            elems_at_a_time = 262144
            self.__proxy__.begin_iterator()
            ret = self.__proxy__.iterator_get_next(elems_at_a_time)
            column_names = self.column_names()
            while(True):
                for j in ret:
                    yield dict(zip(column_names, j))

                if len(ret) == elems_at_a_time:
                    ret = self.__proxy__.iterator_get_next(elems_at_a_time)
                else:
                    break

        return generator()

    def append(self, other):
        """
        Append the second SFrame to current SFrame. Returns a new SFrame contains rows from both SFrames.
        Both SFrames have to have the same set of columns with the same column names and column types.

        Parameters
        ----------
        other : SFrame
            Another SFrame whose rows are appended to current SFrame

        Returns
        -------
        out_sf : SFrame
            A new SFrame that have the same shape and contains rows from both SFrames

        Notes
        -----
        Both SFrames have to have the same set of columns with the same column names and column types.
        """
        _mt._get_metric_tracker().track('sframe.append')
        if type(other) is not SFrame:
            raise RuntimeError("SFrame append can only work with SFrame")

        left_empty = len(self.column_names()) == 0
        right_empty = len(other.column_names()) == 0

        if (left_empty and right_empty):
            return SFrame()

        if (left_empty or right_empty):
            non_empty_sframe = self if right_empty else other
            return non_empty_sframe.select_columns(non_empty_sframe.column_names())

        my_column_names = self.column_names()
        my_column_types = self.column_types()
        other_column_names = other.column_names()
        other_column_types = other.column_types()
        if (len(my_column_names) != len(other_column_names)):
            raise RuntimeError("Two SFrames have to have the same number of columns")

        # we allow name order of two sframes to be different, so we create a new sframe from
        # "other" sframe to make it has exactly the same shape
        processed_other_frame = SFrame()
        for i in range(0, len(my_column_names)):
            col_name = my_column_names[i]
            if(col_name not in other_column_names):
                raise RuntimeError("Column " + my_column_names[i] + " does not exist in second SFrame")

            other_column = other.select_column(col_name);
            processed_other_frame.add_column(other_column, col_name)

            # check column type
            if my_column_types[i] != other_column.dtype():
                raise RuntimeError("Column " + my_column_names[i] + " type is not the same in two SFrames, one is " + str(my_column_types[i]) + ", the other is " + str(other_column.dtype()))

        with cython_context():
            return SFrame(_proxy=self.__proxy__.append(processed_other_frame.__proxy__))

    def group(self, column):
        """
        Return a new SFrame where the rows are grouped according to the
        value of the provided column.

        Parameters
        ----------
        column : string
            Name of column to group.

        Returns
        -------
        out_sf : SFrame
            A new SFrame having the same shape and data, but the rows
            are grouped by the given column.

        Notes
        -----
        The group method does not sort on the selected column.
        """
        _mt._get_metric_tracker().track('sframe.group')
        if column not in self.column_names():
            raise RuntimeError("Column " + column + " does not exist in SFrame")
        else:
            with cython_context():
                return SFrame(_proxy=self.__proxy__.group(column))

    def groupby(self, key_columns, operations, *args):
        """
        Perform a group on the key_columns followed by aggregations on the
        columns listed in operations.

        The operations parameter is a dictionary that indicates which
        aggregation operators to use and which columns to use them on. The
        available operators are SUM, MAX, MIN, COUNT, AVG, VAR, STDV, and
        QUANTILE. For convenience, aggregators MEAN, STD, and VARIANCE are
        available as synonyms for AVG, STDV, and VAR. See
        :mod:`~graphlab.aggregate` for more detail on the aggregators.

        The columns of the output SFrame are named "[operator] of
        [aggregation column name]". For example, if key_columns is 'user_id' and
        operations is
        {'rating_sum': gl.aggregate.SUM('rating_sum'),
         'rating_count':gl.aggregate.COUNT()},
        the output is an SFrame with three columns: 'user_id', 'rating_sum',
        and 'rating_count'.

        Parameters
        ----------
        key_columns : string | list[string]
            Column(s) to group by.

        operations: dict, list
            Dictionary of columns and aggregation operations. Each key is a
            output column name and each value is an aggregator. This can also
            be a list of aggregators, in which case column names will be
            automatically assigned.

        *args: All other remaining arguments will be interpreted in the same
               way as the operations argument.

        Returns
        -------
        out_sf : SFrame
            A new SFrame, with a column for each groupby column and each
            aggregation operation.

        Examples
        --------
        >>> import graphlab as gl
        >>> import graphlab.aggregate as agg
        >>> sf = gl.load_sframe('netflix')

        Compute the number of occurrences of each user.

        >>> user_count = sf.groupby(key_columns='user_id',
                                    operations={'count': agg.COUNT()})

        Compute the number of occurrences of each user, automatically assigning
        a column name.

        >>> user_count = sf.groupby(key_columns='user_id',
                                    operations=agg.COUNT())

        Compute the mean and standard deviation of ratings per user.

        >>> user_rating_stats = sf.groupby(key_columns='user_id',
                    operations={'mean_rating': agg.MEAN('rating'),
                                    'std_rating':agg.STD('rating')})

        Compute the count, mean, and standard deviation of ratings per (user,
        time), automatically assignming output column names.

        >>> user_rating_stats = sf.groupby(['user_id', 'time'],
                                           [agg.COUNT(),
                                             agg.AVG('rating'),
                                             agg.STDV('rating')]})

        The groupby function can take a variable length list of aggregation
        specifiers so if we want the count and the 0.25 and 0.75 quantiles of
        ratings:

        >>> user_rating_stats = sf.groupby(['user_id', 'time'], agg.COUNT(),
                                           {'rating_quantiles': agg.QUANTILE('rating',[0.25, 0.75])})
        """
        # some basic checking first
        # make sure key_columns is a list
        if isinstance(key_columns, str):
            key_columns = [key_columns]
        # check that every column is a string, and is a valid column name
        my_column_names = self.column_names()
        key_columns_array = []
        for column in key_columns:
            if not isinstance(column, str):
                raise TypeError("Column name must be a string")
            if column not in my_column_names:
                raise KeyError("Column " + column + " does not exist in SFrame")
            key_columns_array.append(column)

        group_output_columns = []
        group_columns = []
        group_ops = []

        all_ops = [operations] + list(args)

        for op_entry in all_ops:
            # if it is not a dict, nor a list, it is just a single aggregator
            # element (probably COUNT). wrap it in a list so we can reuse the
            # list processing code
            operation = op_entry
            if not(isinstance(operation, list) or isinstance(operation, dict)):
              operation = [operation]

            if isinstance(operation, dict):
              # now sweep the dict and add to group_columns and group_ops
              for key in operation:
                  val = operation[key]
                  group_output_columns = group_output_columns + [key]
                  if type(val) is tuple:
                    (op, column) = val
                    group_columns = group_columns + [column]
                    group_ops = group_ops + [op]
                  elif val == graphlab.aggregate.COUNT:
                    val = graphlab.aggregate.COUNT()
                    (op, column) = val
                    group_columns = group_columns + [column]
                    group_ops = group_ops + [op]
                  else:
                    raise TypeError("Unexpected type in aggregator definition of output column: " + key)
            elif isinstance(operation, list):
              # we will be using automatically defined column names
              for val in operation:
                  group_output_columns = group_output_columns + [""]
                  if type(val) is tuple:
                    (op, column) = val
                    group_columns = group_columns + [column]
                    group_ops = group_ops + [op]
                  elif val == graphlab.aggregate.COUNT:
                    val = graphlab.aggregate.COUNT()
                    (op, column) = val
                    group_columns = group_columns + [column]
                    group_ops = group_ops + [op]
                  else:
                    raise TypeError("Unexpected type in aggregator definition.")


        # let's validate group_columns and group_ops are valid
        for (col, op) in zip(group_columns, group_ops):
            if not isinstance(col, str):
                raise TypeError("Column name must be a string")
            elif not isinstance(op, str):
                raise TypeError("Operation type not recognized.")
            else:
                _mt._get_metric_tracker().track('sframe.groupby', properties={'operator':op})
                if op is not graphlab.aggregate.COUNT()[0] and col not in my_column_names:
                    raise KeyError("Column " + column + " does not exist in SFrame")
        with cython_context():
            return SFrame(_proxy=self.__proxy__.groupby_aggregate(key_columns_array, group_columns,
                                                                  group_output_columns, group_ops))

    def join(self, right, on=None, how='inner'):
        """
        Merges the current (left) SFrame with the given (right) SFrame using a
        SQL-style equi-join operation by columns.

        Parameters
        ----------
        right : SFrame
            The SFrame to join

        on : None, str, list, or dict
            The column name(s) representing the set of join keys.  Each row
            that has the same value in this set of columns will be merged
            together.

            If 'None' is given, join will use all columns that have
            the same name as the set of join keys.

            If a str is given, this is interpreted as a join using one column,
            where both frames have the same column name.

            If a list is given, this is interpreted as a join using one or
            more column names, where each column name given exists in both
            SFrames.

            If a dict is given, each dict key is taken as a column name in the
            left SFrame, and each dict value is taken as the column name in
            right SFrame that will be joined together. e.g.
            {'left_col_name':'right_col_name'}.


        how : str in {'left','right','outer','inner'}
            The type of join to perform.  'inner' is default.

            - inner: Equivalent to a SQL inner join.  Result consists of the
              rows from the two frames whose join key values match exactly,
              merged together into one SFrame.

            - left: Equivalent to a SQL left outer join. Result is the union
              between the result of an inner join and the rest of the rows from
              the left SFrame, merged with missing values.

            - right: Equivalent to a SQL right outer join.  Result is the union
              between the result of an inner join and the rest of the rows from
              the right SFrame, merged with missing values.

            - outer: Equivalent to a SQL full outer join. Result is
              the union between the result of a left outer join and a right
              outer join.

        Returns
        -------
        out : SFrame

        Examples
        --------
        Assume sf_left and sf_right are two SFrames, each with a column called 'itemid' and 'region'.

        An inner join on columns of the same name:
        >>> result = sf_left.join(sf_right, on=['itemid','region'])

        A full outer join on columns with (possibly) different names:
        >>> result = sf_left.join(sf_right, how='outer', on={'thingid':'stuffid','some_col':'some_col2', 'itemid':'itemid'})

        As a more concrete example, let's create some SFrames to join:
        >>> import graphlab as gl

        >>> animals = gl.SFrame({'id':[1,2,3,4], 'name':['dog','cat','sheep','cow']})

        >>> sounds = gl.SFrame({'id':[1,3,4,5], 'sound':['woof','baa','moo','oink']})

        And then use each available type of join and see what the data looks like:
        >>> inner_result = animals.join(sounds, on='id', how='inner')

        >>> print inner_result
        (...output truncated...)
        Rows: 3
        Data:
           id   name sound
        0   1    dog  woof
        1   3  sheep   baa
        2   4    cow   moo

        >>> left_result = animals.join(sounds, on='id', how='left')

        >>> print left_result
        (...output truncated...)
        Rows: 4
        Data:
           id   name sound
        0   3  sheep   baa
        1   1    dog  woof
        2   4    cow   moo
        3   2    cat  None

        >>> right_result = animals.join(sounds, on='id', how='right')

        >>> print right_result
        (...output truncated...)
        Rows: 4
        Data:
           id   name sound
        0   3  sheep   baa
        1   1    dog  woof
        2   4    cow   moo
        3   5   None  oink

        >>> outer_result = animals.join(sounds, on='id', how='outer')

        >>> print outer_result
        (...output truncated...)
        Rows: 5
        Data:
           id   name sound
        0   1    dog  woof
        1   3  sheep   baa
        2   5   None  oink
        3   4    cow   moo
        4   2    cat  None
        """
        _mt._get_metric_tracker().track('sframe.join', properties={'type':how})
        available_join_types = ['left','right','outer','inner']

        if type(right) is not SFrame:
            raise TypeError("Can only join two SFrames")

        if how not in available_join_types:
            raise ValueError("Invalid join type")

        join_keys = dict()
        if on is None:
            left_names = self.column_names()
            right_names = right.column_names()
            common_columns = [name for name in left_names if name in right_names]
            for name in common_columns:
                join_keys[name] = name
        elif type(on) is str:
            join_keys[on] = on
        elif type(on) is list:
            for name in on:
                if type(name) is not str:
                    raise TypeError("Join keys must each be a str.")
                join_keys[name] = name
        elif type(on) is dict:
            join_keys = on
        else:
            raise TypeError("Must pass a str, list, or dict of join keys")

        with cython_context():
            return SFrame(_proxy=self.__proxy__.join(right.__proxy__, how, join_keys))

    def filter_by(self, values, column_name):
        """
        Filters an SFrame by values inside an SArray. Result is an SFrame that
        only includes the rows that have a column with the given 'column_name'
        which holds one of the values in the given 'values' SArray. If 'values'
        is not an SArray, we attempt to convert it to one before filtering.

        Parameters
        ----------
        values : SArray | list | numpy.ndarray | pandas.Series | str
            The values to use to filter the SFrame.  The resulting SFrame
            will only include rows that have one of these values.

        column_name : str
            The column of the SFrame to match with the given 'values'.

        Returns
        -------
        out : SFrame

        Examples
        --------
        Let's create an SFrame of animals I own:

        >>> import graphlab as gl
        >>> sf = gl.SFrame({'id':[1,2,3,4], 'animal_type':['dog','cat','cow','horse'], 'name':['bob','jim','jimbob','bobjim'})

        The filter_by function let's you filter an SFrame by the actual values in a
        column.  So, for example, I have a list of common household pets:

        >>> common_household_pets = ['cat','hamster','dog','fish','bird','snake']

        I can use this to filter out household pets from barnyard animals, perhaps:

        >>> household_sf = sf.filter_by(common_household_pets, 'animal_type')
        >>> household_sf
        Columns:
        animal_type     str
        id      int
        name    str
        Rows: 2
        Data:
          animal_type  id name
        0         cat   2  jim
        1         dog   1  bob
        [2 rows x 3 columns]
        """
        _mt._get_metric_tracker().track('sframe.filter_by')
        if type(column_name) is not str:
            raise TypeError("Must pass a str as column_name")

        if type(values) is not SArray:
            # If we were given a single element, try to put in list and convert
            # to SArray
            if not hasattr(values, '__iter__'):
                values = [values]
            values = SArray(values)

        value_sf = SFrame()
        value_sf.add_column(values, column_name)

        # Make sure the values list has unique values, or else join will not
        # filter.
        value_sf = value_sf.groupby(column_name, {})

        existing_columns = self.column_names()
        if column_name not in existing_columns:
            raise KeyError("Column '" + column_name + "' not in SFrame.")

        existing_type = self.column_types()[self.column_names().index(column_name)]
        given_type = value_sf.column_types()[0]
        if given_type != existing_type:
            raise TypeError("Type of given values does not match type of column '" +
                column_name + "' in SFrame.")

        with cython_context():
            return SFrame(_proxy=self.__proxy__.join(value_sf.__proxy__,
                                                     'inner',
                                                     {column_name:column_name}))
