import graphlab.connect as _mt
import graphlab as _graphlab
import random as _random
import array

import graphlab.aggregate as _Aggregate
from graphlab.data_structures.sarray import SArray as _SArray
from graphlab.data_structures.sframe import SFrame as _SFrame
from graphlab.deps import pandas as _pandas, HAS_PANDAS as _HAS_PANDAS
from graphlab.deps import numpy as _numpy, HAS_NUMPY as _HAS_NUMPY

_has_pyplot = False
try:
    import matplotlib.pyplot as _pp
    from matplotlib import rcParams as _rcParams
    _has_pyplot = True
except:
    pass


def create(observation_data,
           user_id='user_id', item_id='item_id', target=None,
           user_data=None, item_data=None,
           verbose=True,
           method='auto', **method_options):
    r"""
    A unified interface for training recommender models.

    A trained model can be used to predict ratings and make recommendations.

    Parameters
    ----------
    observation_data : SFrame
        The dataset to use for training the model. It must contain a column of
        user ids and a column of item ids. Each row represents an observed 
        interaction between the user and the item.  The (user, item) pairs
        are stored with the model so that they can later be excluded from
        recommendations if desired. It can optionally contain a target ratings
        column. All other columns are interpreted by the underlying model as 
        side features for the observations. 

        The user id and item id columns must be of type 'int' or 'str'. The
        target column must be of type 'int' or 'float'.

    user_id : string, optional
        The name of the column in `observation_data` that corresponds to the 
        user id.

    item_id : string, optional
        The name of the column in `observation_data` that corresponds to the 
        item id.

    target : string, optional
        The `observation_data` can optionally contain a column of scores 
        representing ratings given by the users. If present, the name of this 
        column may be specified variables `target`. This parameter is required 
        when ``method`` is 'matrix_factorization', 'factorization_model', or
        'linear_model', or when 'item_similarity' is chosen with a
        'similarity_type' of 'cosine' or 'pearson'.

    user_data : SFrame, optional
        Side information for the users.  This SFrame must have a column with
        the same name as what is specified by the `user_id` input parameter. 
        `user_data` can provide any amount of additional user-specific 
        information.

    item_data : SFrame, optional
        Side information for the items.  This SFrame must have a column with
        the same name as what is specified by the `item_id` input parameter. 
        `item_data` can provide any amount of additional item-specific 
        information.

    method : string, optional
        Name of the method to be used. Available methods include:

        - *auto* (default): automatically choose the method based on the type 
            of input data. When a `target` column is present, the default 
            method is 'matrix_factorization'. Otherwise, the default method is 
            'item_similarity'.

        - *item_means*: create a :class:`~graphlab.recommender.ItemMeansModel`.

        - *item_similarity*: create a
          :class:`~graphlab.recommender.ItemSimilarityModel`.

        - *factorization_model*: create a
          :class:`~graphlab.recommender.FactorizationModel`.

        - *linear_model*: create a
          :class:`~graphlab.recommender.LinearRegressionModel`.

        - *matrix_factorization*: create a
          :class:`~graphlab.recommender.MatrixFactorizationModel`.

        - *popularity*: create a :class:`~graphlab.recommender.PopularityModel`.

        See the individual model APIs for more details on other create options.

    **method_options : optional
        Options to pass to the chosen method. See the documentation for 
        individual models for details.

    verbose : bool, optional
        Enables verbose output.

    Returns
    -------
    out : RecommenderModel
        A trained model, one of 
        :class:`~graphlab.recommender.ItemMeansModel`,
        :class:`~graphlab.recommender.ItemSimilarityModel`,
        :class:`~graphlab.recommender.FactorizationModel`,
        :class:`~graphlab.recommender.LinearRegressionModel`,
        :class:`~graphlab.recommender.MatrixFactorizationModel`,
        or
        :class:`~graphlab.recommender.PopularityModel`.

    See Also
    --------
    ItemMeansModel
    ItemSimilarityModel
    FactorizationModel
    LinearRegressionModel
    MatrixFactorizationModel
    PopularityModel

    Examples
    --------
    **Basic usage**

    Given basic user-item observation data, an 
    :class:`~graphlab.recommender.ItemSimilarityModel` is created:

    >>> sf = graphlab.SFrame({'user_id': ['0', '0', '0', '1', '1', '2', '2', '2'],
    ...                       'item_id': ['a', 'b', 'c', 'a', 'b', 'b', 'c', 'd']})
    >>> m = graphlab.recommender.create(sf)
    >>> recs = m.recommend()

    **Creating a model for ratings data**

    This trains a :class:`~graphlab.recommender.MatrixFactorizationModel` that 
    can predict target ratings:

    >>> sf2 = graphlab.SFrame({'user_id': ['0', '0', '0', '1', '1', '2', '2', '2'],
    ...                        'item_id': ['a', 'b', 'c', 'a', 'b', 'b', 'c', 'd'],
    ...                        'rating': [1, 3, 2, 5, 4, 1, 4, 3]})
    >>> m2 = graphlab.recommender.create(sf2, target="rating")

    **Creating models with additional method options**

    Most models allow for a number of additional options during create. For 
    the complete list of acceptable options, please refer to the documentation
    for individual models. Such options can be passed to the underlying model
    just like any other parameter. For example, the following code creates 
    an :class:`~graphlab.recommender.ItemSimilarityModel` with a space-saving
    option called `only_top_k`. The returned model stores only the 25 most
    similar items for item:

    >>> graphlab.recommender.create(sf, method = "item_similarity", only_top_k=2)
    """

    _mt._get_metric_tracker().track('toolkit.recsys.create')
    _mt._get_metric_tracker().track('toolkit.recsys.create.' + method)

    from item_similarity import ItemSimilarityModel
    from matrix_factorization import MatrixFactorizationModel
    from popularity import PopularityModel
    from item_means import ItemMeansModel
    from linear_regression import LinearRegressionModel
    from factorization_model import FactorizationModel

    _mt._get_metric_tracker().track('toolkit.recsys.default.create')

    if not (isinstance(observation_data, _SFrame) or (_HAS_PANDAS and isinstance(observation_data, _pandas.DataFrame))):
        raise TypeError('observation_data input must be a pandas.DataFrame \
                         or SFrame')

    if method == 'auto':
        if target is None:
            method = 'item_similarity'
        else:
            method = 'matrix_factorization'

    opts = {'model_name': method}

    if method in ['popularity', 'item_means']:
        opts['model_name'] = 'item_counts'

    response = _graphlab.toolkits.main.run("recsys_init", opts)
    model_proxy = response['model']

    if _HAS_PANDAS and type(observation_data) == _pandas.DataFrame:
        observation_data = _SFrame(observation_data)
    if user_data is None:
        user_data = _graphlab.SFrame()
    if item_data is None:
        item_data = _graphlab.SFrame()

    if 'nearest_items' not in method_options:
        method_options['nearest_items'] = _graphlab.SFrame()

    opts = {'dataset': observation_data,
            'user_id': user_id,
            'item_id': item_id,
            'target': target,
            'user_data': user_data,
            'item_data': item_data,
            'model': model_proxy,
            'random_seed': 1}
    opts.update(method_options)
    plot = False

    # Disable ranking_regularization together with binary_targets
    if (opts.get('binary_targets', False) and 
        opts.get('ranking_regularization', 0) > 0):

        raise ValueError('Using binary_targets together with ranking_regularization is not yet supported.')

    response = _graphlab.toolkits.main.run('recsys_train', opts, verbose, plot)

    if method == "item_similarity":
        m = ItemSimilarityModel(response['model'])
    elif method == 'popularity':
        m = PopularityModel(response['model'])
    elif method == 'item_means':
        m = ItemMeansModel(response['model'])
    elif method == "matrix_factorization":
        m = MatrixFactorizationModel(response['model'])
    elif method == "linear_model":
        m = LinearRegressionModel(response['model'])
    elif method == "factorization_model":
        m = FactorizationModel(response['model'])
    else:
        raise RuntimeError("Provided method not recognized.")
    return m


def __plot_histogram(measurements, means, names=None, metric_name=None):
    """
    Plot histograms of the measurements, overlaid with vertical lines
    representing the means of the measurements.

    Parameters
    ----------
    measurements : list
        List of measurements (recall, precision or RMSE).

    means : list
        List of doubles, intended to be the mean of each list in
        'measurements'.

    names : list
        List of model name strings.

    metric_name : string
        Name of the metric.
    """
    num_measurements = len(measurements)

    # A list of colors for plotting
    COLORS_LIST = ['b', 'g', 'r', 'k', 'm', 'c']
    colors = [COLORS_LIST[i % len(COLORS_LIST)] for i in range(num_measurements)]

    hist_handle = _pp.hist(measurements, bins=20,
                           color=colors,
                           label=names, hold=True)
    _pp.legend()
    # hist() returns a list of two lists, the first is a list of all the counts,
    # the second is a list of the bin centers.  We need the maximum count so we know
    # how tall the vertical line should be.  So we take the max of the max of the
    # first list of lists
    max_count = max([max(hcount) for hcount in hist_handle[0]])
    _pp.vlines(means, 0, max_count, colors=colors)
    _pp.xlabel(metric_name)
    _pp.ylabel('Counts')


def __plot_overlap_hists(results, label, names, bins=20, alpha=0.3):
    """
    Plot overlapping (un-normalized) histograms for a list of one-dimensional
    series.

    Parameters
    ----------
    results : list
        List of list-like objects. Each element is plotted as a separate histogram.

    label : string
        Label for the x-axis of the histogram.

    names : list
        Names for each series in `results'.

    bins : int
        Number of bins. Default is 20.

    alpha : float
        Opacity of the histogram patches. Default is 0.4.
    """
    assert _HAS_NUMPY, 'Cannot find numpy'
    fig, ax = _pp.subplots()

    # plot first series to fix the bins
    counts, bins, patches = ax.hist(results[0], bins=bins, alpha=alpha, lw=0.1,
                                    label=names[0])
    clr = patches[0].get_facecolor()
    counts = _numpy.insert(counts, 0, 0)
    ax.step(bins, counts, color=clr, lw=5)

    # plot the rest of the series
    if len(results) > 1:
        for series, name in zip(results[1:], names[1:]):
            counts, bins, patches = ax.hist(series, bins=bins, alpha=alpha, lw=0.03,
                                            label=name, fill=True)
            clr = patches[0].get_facecolor()
            counts = _numpy.insert(counts, 0, 0)
            ax.step(bins, counts, color=clr, lw=4)

    ax.ticklabel_format(style='sci', scilimits=(0, 0), axis='y')
    ax.set_xlabel(label)
    ax.set_ylabel('Frequency')
    ax.legend()
    fig.show()


def _compare_results_precision_recall(results, model_names=None):
    """
    Compare models that output precision/recall. Display the per-user
    precision/recall scatter plot, the histograms of precision, and the
    histograms of recall.

    Parameters
    ----------
    results : list
        List of SFrames. Each SFrame describes the evaluation results for a
        separate model.

    model_names : list
        List of model name strings.
    """

    num_models = len(results)
    COLORS_LIST = ['b', 'g', 'r', 'k', 'm', 'c']

    if num_models < 1:
        return

    if model_names is None:
        model_names = ["model {}".format(i) for i in range(num_models)]

    aggs = {'recall': _Aggregate.MEAN('recall'),
            'precision': _Aggregate.MEAN('precision')}
    pr_curves_by_model = [res.groupby('cutoff', aggs) for res in results]
    fig, ax = _pp.subplots()

    for i in range(num_models):
        pr_curve = pr_curves_by_model[i]
        pr_curve = pr_curve.sort('recall')
        name = 'Model ' + str(i + 1)
        if model_names is not None:
            name = model_names[i]

        ax.plot(list(pr_curve['recall']), list(pr_curve['precision']),
                COLORS_LIST[i % len(COLORS_LIST)], label=name)

    ax.set_title('Precision-Recall Averaged Over Users')
    ax.set_xlabel('Recall')
    ax.set_ylabel('Precision')
    ax.legend()
    fig.show()


def _compare_results_rmse2(results, model_names=None):
    """
    Compare models that output root-mean-squared error (RMSE).

    Parameters
    ----------
    results : list
        List of dataframes describing evaluation results for each model.

    model_names : list
        List of model name strings.
    """
    assert _HAS_NUMPY, 'Cannot find numpy'
    ## Validate the results
    num_models = len(results)

    if num_models < 1 or num_models > len(_rcParams['axes.color_cycle']):
        return

    if model_names is None:
        model_names = ["model {}".format(i) for i in range(num_models)]

    ## Histograms of per-user and per-item RMSE
    user_rmse_by_model = [list(elem['rmse_by_user']['rmse']) for elem in results]
    __plot_overlap_hists(user_rmse_by_model, 'Per-User RMSE', model_names, bins=100)

    item_rmse_by_model = [list(elem['rmse_by_item']['rmse']) for elem in results]
    __plot_overlap_hists(item_rmse_by_model, 'Per-Item RMSE', model_names, bins=100)

    ## Bar chart of Overall RMSE by model
    overall_rmse_by_model = [elem['rmse_overall'] for elem in results]

    fig, ax = _pp.subplots()
    BAR_WIDTH = 0.3
    centers = _numpy.arange(num_models) + BAR_WIDTH
    ax.bar(centers, overall_rmse_by_model, BAR_WIDTH)
    ax.set_xticks(centers + BAR_WIDTH / 2)
    ax.set_xticklabels(model_names)
    ax.set_title('Overall RMSE')
    fig.show()


def compare_models(dataset, models, model_names=None, user_sample=1.0,
                   metric='auto',
                   target=None,
                   exclude_known_for_precision_recall=True,
                   make_plot=True,
                   verbose=True,
                   **kwargs):
    """
    Compare the prediction or recommendation performance of recommender models
    on a common test dataset.

    Models that are trained to predict ratings are compared separately from
    models that are trained without target ratings.  The ratings prediction
    models are compared on root-mean-squared error, and the rest are compared on
    precision-recall.

    Parameters
    ----------
    dataset : SFrame
        The dataset to use for model evaluation.

    models : list[RecommenderModel]
        List of trained recommender models.

    model_names : list[str], optional
        List of model name strings for display.

    user_sample : float, optional
        Sampling proportion of unique users to use in estimating model
        performance. Defaults to 1.0, i.e. use all users in the dataset.

    metric : str, {'auto', 'rmse', 'precision_recall'}, optional
        Metric for the evaluation. The default automatically splits
        models into two groups with their default evaluation metric respectively:
        'rmse' for models trained with a target, and 'precision_recall'
        otherwise.

    target : str, optional 
        The name of the target column for evaluating rmse. If the model is
        trained with a target column, the default is to using the same column.
        If the model is trained without a target column and `metric='rmse'`,
        then this option must be provided by user.

    exclude_known_for_precision_recall : bool, optional
        A useful option when `metric='precision_recall'`. Recommender models
        automatically exclude items seen in the training data from the
        final recommendation list. If the input evaluation `dataset` is the 
        same as the data used for training the models, set this option to False.

    make_plot : bool, optional
        If true, a plot is made using matplotlib.

    verbose : bool, optional
        If true, print the progress.

    Returns
    -------
    out : list[SFrame]
        A list of results where each one is an sframe of evaluation results of
        the respective model on the given dataset

    Examples
    --------
    If you have created two ItemSimilarityModels ``m1`` and ``m2`` and have
    an :class:`~graphlab.SFrame` ``test_data``, then you may compare the
    performance of the two models on test data using:

    >>> train_data = graphlab.SFrame({'user_id': ["0", "0", "0", "1", "1", "2", "2", "2"],
    ...                               'item_id': ["a", "c", "e", "b", "f", "b", "c", "d"]})
    >>> test_data = graphlab.SFrame({'user_id': ["0", "0", "1", "1", "1", "2", "2"],
    ...                              'item_id': ["b", "d", "a", "c", "e", "a", "e"]})
    >>> m1 = graphlab.recommender.create(train_data, method='item_similarity')
    >>> m2 = graphlab.recommender.create(train_data, method='item_similarity', only_top_k=1)
    >>> graphlab.recommender.compare_models(test_data, [m1, m2], model_names=["m1", "m2"])

    The evaluation metric is automatically set to 'precision_recall', and the
    evaluation will be based on recommendations that exclude items seen in the
    training data.

    If you want to evaluate on the original training set:

    >>> graphlab.recommender.compare_models(train_data, [m1, m2], 
    ...                                     exclude_known_for_precision_recall=False)

    Suppose you have four models, two trained with a target rating column, and
    the other two trained without a target. By default, the models are put into
    two different groups with "rmse", and "precision-recall" as the evaluation
    metric respectively.

    >>> train_data2 = graphlab.SFrame({'user_id': ["0", "0", "0", "1", "1", "2", "2", "2"],
    ...                                'item_id': ["a", "c", "e", "b", "f", "b", "c", "d"],
    ...                                'rating': [1, 3, 4, 5, 3, 4, 2, 5]})
    >>> test_data2 = graphlab.SFrame({'user_id': ["0", "0", "1", "1", "1", "2", "2"],
    ...                               'item_id': ["b", "d", "a", "c", "e", "a", "e"],
    ...                               'rating': [3, 5, 4, 4, 3, 5, 2]})
    >>> m3 = graphlab.recommender.create(train_data2, target='rating', 
    ...                                  method='matrix_factorization')
    >>> m4 = graphlab.recommender.create(train_data2, target='rating', 
    ...                                  method='factorization_model')
    >>> graphlab.recommender.compare_models(test_data2, [m1, m2, m3, m4])

    To compare all four models using the same 'precision_recall' metric, you can
    do:

    >>> graphlab.recommender.compare_models(test_data2, [m1, m2, m3, m4], 
    ...                                     metric='precision_recall')
    """

    _mt._get_metric_tracker().track('toolkit.recsys.compare_models')

    num_models = len(models)

    if model_names is None:
        model_names = ['M' + str(i) for i in range(len(models))]

    if num_models < 1:
        raise ValueError("Must pass in at least one recommender model to \
                           evaluate")

    if model_names is not None and len(model_names) != num_models:
        raise ValueError("Must pass in the same number of model names as \
                          models")

    # if we are asked to sample the users, come up with a list of unique users
    if user_sample < 1.0:
        user_id_name = models[0].get('user_id')
        if user_id_name is None:
            raise ValueError("user_id not set in model(s)")
        user_sa = dataset[user_id_name]
        unique_users = list(user_sa.unique())
        nusers = len(unique_users)
        ntake = int(round(user_sample * nusers))

        _random.shuffle(unique_users)

        users = unique_users[:ntake]
        print "compare_models: using " + str(ntake) + \
              " users to estimate model performance"
        users = frozenset(users)
        ix = [u in users for u in dataset[user_id_name]]
        dataset_subset = dataset[_SArray(ix) == True]
    else:
        dataset_subset = dataset

    results = []
    for (m, mname) in zip(models, model_names):
        if verbose:
            print 'PROGRESS: Evaluate model %s' % mname
        r = m.evaluate(dataset_subset,
                       metric,
                       exclude_known_for_precision_recall,
                       target,
                       verbose=verbose,
                       cutoffs=range(2, 50, 2), **kwargs)
        results.append(r)

    if _has_pyplot is True:
        # separate into two sets of models: those that evaluate rmse by default,
        # and those that evaluate precision-recall.
        ## The following relies on the fact that evaluate_rmse return a dict
        ## while evaluate_precision_recall returns an SFrame with column names
        ## of user_id, 'cutoff', 'precision', 'recall', 'count'
        is_pr = [isinstance(r, _SFrame) and ('precision' in r.column_names()) for r in results]
        results_pr = [results[i] for i in range(num_models) if is_pr[i]]
        results_rmse = [results[i] for i in range(num_models) if is_pr[i] is False]
        model_names_pr = None
        model_names_rmse = None
        if model_names is not None:
            model_names_pr = [model_names[i] for i in range(num_models) if is_pr[i]]
            model_names_rmse = [model_names[i] for i in range(num_models) if not is_pr[i]]

        if make_plot:
            if len(results_pr) > 0:
                _compare_results_precision_recall(results_pr, model_names_pr)
            if len(results_rmse) > 0:
                _compare_results_rmse2(results_rmse, model_names_rmse)
            _pp.show()
    else:
        "Warning: Matplotlib could not be imported - no plot output."

    return results


def precision_recall_by_user(observed_user_items,
                             recommendations,
                             cutoffs=[10]):
    """
    Compute precision and recall at a given cutoff for each user. In information
    retrieval terms, precision represents the ratio of relevant, retrieved items
    to the number of relevant items. Recall represents the ratio of relevant,
    retrieved items to the number of relevant items.

    Let :math:`p_k` be a vector of the first :math:`k` elements in the
    recommendations for a particular user, and let :math:`a` be the set of items
    in ``observed_user_items`` for that user. The "precision at cutoff k" for
    this user is defined as

    .. math::
        P(k) = \\frac{ | a \cap p_k | }{k},

    while "recall at cutoff k" is defined as

    .. math::
        R(k) = \\frac{ | a \cap p_k | }{|a|}

    The order of the elements in the recommendations affects the returned
    precision and recall scores. 

    Parameters
    ----------
    observed_user_items : SFrame
        An SFrame containing observed user item pairs, where the first
        column contains user ids and the second column contains item ids.

    recommendations : SFrame
        An SFrame containing columns pertaining to the user id, the item id,
        the score given to that pair, and the rank of that item among the
        recommendations made for user id. For example, see the output of
        recommend() produced by any graphlab.recommender model.

    cutoffs : list[int], optional
        The cutoffs to use when computing precision and recall.

    Returns
    -------
    out : SFrame
        An SFrame containing columns user id, cutoff, precision, recall, and
        count where  the precision and recall are reported for each user at
        each requested cutoff, and count is the number of observations for
        that user id.

    Notes
    -----
    The corner cases that involve empty lists were chosen to be consistent
    with the feasible set of precision-recall curves, which start at
    (precision, recall) = (1,0) and end at (0,1). However, we do not believe
    there is a well-known concensus on this choice.

    Examples
    --------
    Given SFrames ``train_data`` and ``test_data`` with columns user_id
    and item_id:

    >>> m = graphlab.recommender.create(train_data)
    >>> recs = m.recommend()
    >>> m.precision_recall_by_user(test_data, recs, cutoffs=[5, 10])
    """
    _mt._get_metric_tracker().track('toolkit.recsys.precision_recall_by_user')

    if _HAS_PANDAS and type(observed_user_items) is _pandas.DataFrame:
        observed_user_items = _SFrame(observed_user_items)

    assert type(observed_user_items) == _SFrame
    assert type(recommendations) == _SFrame
    assert type(cutoffs) == list
    assert min(cutoffs) > 0, "All cutoffs must be positive integers."
    assert recommendations.num_columns() >= 2
    user_id = recommendations.column_names()[0]
    item_id = recommendations.column_names()[1]

    assert observed_user_items.num_rows() > 0, \
           "Evaluating precision and recall requires a non-empty \
            observed_user_items."
    assert user_id in observed_user_items.column_names(), \
            "User column required in observed_user_items."
    assert item_id in observed_user_items.column_names(), \
            "Item column required in observed_user_items."
    assert observed_user_items[user_id].dtype() == \
           recommendations[user_id].dtype(), \
           "The user column in the two provided SFrames must have the same type."
    assert observed_user_items[item_id].dtype() == \
           recommendations[item_id].dtype(), \
           "The user column in the two provided SFrames must have the same type."

    cutoffs = array.array('f', cutoffs)

    opts = {'data': observed_user_items,
            'recommendations': recommendations,
            'cutoffs': cutoffs}

    response = _graphlab.toolkits.main.run('evaluation_precision_recall_by_user', opts)
    return _SFrame(None, _proxy=response['pr'])


def random_split_by_user(dataset,
                         user_id='user_id',
                         item_id='item_id',
                         max_num_users=1000,
                         item_test_proportion=.2,
                         random_seed=0):
    """
    Create a recommender-friendly train-test split of the provided data set.

    The test dataset is generated by first choosing `max_num_users` out of the
    total number of users in `dataset`. Then, for each of the chosen test users,
    a portion of the user's items (determined by `item_test_proportion`) is
    randomly chosen to be included in the test set. This split allows the
    training data to retain enough information about the users in the testset,
    so that adequate recommendations can be made. The total number of users
    in the test set may be fewer than `max_num_users` if a user was chosen for
    the test set but none of their items are selected.

    Parameters
    ----------
    dataset : SFrame
        An SFrame containing (user, item) pairs.

    user_id : str, optional
        The name of the column in ``dataset`` that contains user ids.

    item_id : str, optional
        The name of the column in ``dataset`` that contains item ids.

    max_num_users : int, optional
        The maximum number of users to use to construct the test set. If
        set to 'None', then use all available users.

    item_test_proportion : float, optional
        The desired probability that a test user's item will be chosen
        for the test set.

    random_seed : int, optional
        The random seed to use for randomization.

    Returns
    -------
    train, test : SFrame
        A tuple with two datasets to be used for training and testing.

    Examples
    --------
    >>> sf = graphlab.SFrame('s3://GraphLab-Datasets/audioscrobbler')
    >>> train, test = graphlab.recommender.random_split_by_user(sf, max_num_users=100)
    """

    _mt._get_metric_tracker().track('toolkit.recsys.random_split_by_user')

    if _HAS_PANDAS and type(dataset) is _pandas.DataFrame:
        dataset = _SFrame(dataset)

    assert user_id in dataset.column_names(), \
        'Provided user column "{0}" not found in data set.'.format(user_id)
    assert item_id in dataset.column_names(), \
        'Provided item column "{0}" not found in data set.'.format(item_id)

    if max_num_users == 'all':
        max_num_users = None

    opts = {'dataset': dataset,
            'user_id': user_id,
            'item_id': item_id,
            'max_num_users': max_num_users,
            'item_test_proportion': item_test_proportion,
            'random_seed': random_seed}
    
    response = _graphlab.toolkits.main.run('recsys_train_test_split', opts)
    train = _SFrame(None, _proxy=response['train'])
    test = _SFrame(None, _proxy=response['test'])
    return train, test
