import graphlab.connect as _mt
import graphlab as _graphlab
import random as _random
import array

import graphlab.aggregate as _Aggregate
from graphlab.data_structures.sarray import SArray as _SArray
from graphlab.data_structures.sframe import SFrame as _SFrame
from graphlab.deps import pandas as _pandas, HAS_PANDAS as _HAS_PANDAS
from graphlab.deps import numpy as _numpy, HAS_NUMPY as _HAS_NUMPY

_has_pyplot = False
try:
    import matplotlib.pyplot as _pp
    from matplotlib import rcParams as _rcParams
    _has_pyplot = True
except:
    pass


def create(observation_data,
           user_column='user_id', item_column='item_id', target_column=None,
           user_data=None, item_data=None,
           verbose=True,
           method='auto', **method_options):
    """
    Trains a GraphLab RecommenderModel.

    A trained model can be used to score (user, item) pairs and make
    recommendations.

    Parameters
    ----------
    observation_data : SFrame
        The dataset to use for training the model. The (user, item) pairs
        are stored with the model so that they can be excluded from
        recommendations if desired.

    user_column : string
        The column name in observation_data that corresponds to the user id.

    item_column : string
        The column name in observation_data that corresponds to the item id.

    target_column : string
        If a score is present in the information -- such as a rating -- then
        this specifies the name of the column in observation_data to use as
        the target variable. This argument is required when ``method`` is
        ``matrix_factorization``, ``factorization_model``, or
        ``linear_model``, or when ``item_similarity`` is chosen with a
        ``similarity_type`` of ``cosine``.

    user_data : SFrame
        Side information for the users.  This SFrame must have a column named
        with the  user column name given above. It provides any amount of
        additional user-specific information.

    item_data : SFrame
        Side information for the items. This SFrame must have a column named
        with the item column name given above. It provides any amount of
        additional item-specific information.

    method : string, optional
        Name of the method to be used. See the references for mode details on
        each. Available methods include:

        - auto: automatically chooses the method based on the type of input
          data.
        - matrix_factorization: creates a
          :class:`~graphlab.recommender.MatrixFactorizationModel`
        - factorization_model: creates a
          :class:`~graphlab.recommender.FactorizationModel`
        - item_similarity: creates a
          :class:`~graphlab.recommender.ItemSimilarityModel`
        - linear_model: creates a
          :class:`~graphlab.recommender.LinearRegressionModel`
        - popularity: creates a :class:`~graphlab.recommender.PopularityModel`
        - item_means: creates a :class:`~graphlab.recommender.ItemMeansModel`

    **method_options : optional
        Options to pass to the chosen method.

        When ``method=linear_model``, the following ``method_options`` are
        available.

        +---------------------------------+---------+------------------------------------+
        |        Options                  | Default |   Description                      |
        +=================================+=========+====================================+
        | regularization                  | 0.0001  | L2 regularization for any          |
        |                                 |         | interaction terms.                 |
        +---------------------------------+---------+------------------------------------+
        | binary_targets                  | False   | Assume the target column is        |
        |                                 |         | binary. If True, use logistic      |
        |                                 |         | regresion.                         |
        +---------------------------------+---------+------------------------------------+

        When ``method=matrix_factorization``, the following ``method_options``
        are available.  See :class:`MatrixFactorizationModel 
        <graphlab.recommender.MatrixFactorizationModel>` for more information.

        +---------------------------------+---------+------------------------------------+
        |        Options                  | Default |   Description                      |
        +=================================+=========+====================================+
        | n_factors                       | 8       | Number of latent factors.          |
        +---------------------------------+---------+------------------------------------+
        | regularization                  | 0.0001  | Regularization for interaction     |
        |                                 |         | terms. (L2)                        |
        +---------------------------------+---------+------------------------------------+
        | linear_regularization           | 0.0     | Regularization for linear term     |
        +---------------------------------+---------+------------------------------------+
        | nmf                             | False   | Use nonnegative matrix             |
        |                                 |         | factorization, which forces the    |
        |                                 |         | factors to be nonnegative. Disables|
        |                                 |         | linear and intercept terms.        |
        +---------------------------------+---------+------------------------------------+
        | unobserved_rating_regularization| 0       | If nonzero, then penalize the      |
        |                                 |         | predicted value of user-item pairs |
        |                                 |         | not in the training set.  Larger   |
        |                                 |         | values increase this penalization. |
        +---------------------------------+---------+------------------------------------+
        | unobserved_rating_value         | None    | If unobserved_rating_regularization|
        |                                 |         | is greater than 0, then penalize   |
        |                                 |         | unobserved items predicted to be   |
        |                                 |         | scored larger than this value.     |
        |                                 |         | If None, the rating mean is used.  |
        +---------------------------------+---------+------------------------------------+
        | binary_targets                  | False   | Assume the target column is        |
        |                                 |         | binary. If True, use logistic      |
        |                                 |         | loss to fit the model.             |
        +---------------------------------+---------+------------------------------------+



        When ``method=factorization_model``, the following ``method_options``
        are available.  See :class:`FactorizationModel 
        <graphlab.recommender.FactorizationModel>` for more information.

        +---------------------------------+---------+------------------------------------+
        |        Options                  | Default |   Description                      |
        +=================================+=========+====================================+
        | n_factors                       | 8       | Number of latent factors.          |
        +---------------------------------+---------+------------------------------------+
        | regularization                  | 0.0001  | Regularization for interaction     |
        |                                 |         | terms. (L2)                        |
        +---------------------------------+---------+------------------------------------+
        | linear_regularization           | 0.0     | Regularization for linear          |
        |                                 |         | terms.                             |
        +---------------------------------+---------+------------------------------------+
        | unobserved_rating_regularization| 0       | If nonzero, then penalize the      |
        |                                 |         | predicted value of user-item pairs |
        |                                 |         | not in the training set.  Larger   |
        |                                 |         | values increase this penalization. |
        +---------------------------------+---------+------------------------------------+
        | unobserved_rating_value         | None    | If unobserved_rating_regularization|
        |                                 |         | is greater than 0, then penalize   |
        |                                 |         | unobserved items predicted to be   |
        |                                 |         | scored larger than this value.     |
        |                                 |         | If None, the rating mean is used.  |
        +---------------------------------+---------+------------------------------------+
        | binary_targets                  | False   | Assume the target column is        |
        |                                 |         | binary. If True, use logistic      |
        |                                 |         | loss to fit the model.             |
        +---------------------------------+---------+------------------------------------+

        When ``method=item_similarity``, the following ``method_options``
        are available.  See :class:`ItemSimilarityModel 
        <graphlab.recommender.ItemSimilarityModel>` for more information.

        +---------------------------+---------+-------------------------------+
        |        Options            | Default |   Description                 |
        +===========================+=========+===============================+
        | similarity_type           | jaccard | Metric to use (jaccard,       |
        |                           |         | cosine, or pearson).          |
        +---------------------------+---------+-------------------------------+
        | threshold                 | 0.001   | Predictions ignore items      |
        |                           |         | below this similarity value.  |
        +---------------------------+---------+-------------------------------+
        | only_top_k                | 100     | Number of similar items to    |
        |                           |         | store for each items. Default |
        |                           |         | value is the number of items. |
        +---------------------------+---------+-------------------------------+
        | nearest_items             | None    | See notes.                    |
        +---------------------------+---------+-------------------------------+

    verbose : bool, optional
        Enables verbose output. Default is verbose.

    Returns
    -------
    out : RecommenderModel
        A trained model.

    Examples
    --------
    Fit a recommender using an :class:`~graphlab.SFrame` containing a
    ``user_id`` column and an ``item_id`` column and get recommendations
    for some users::

      >>> import graphlab
      >>> data = graphlab.SFrame('s3://GraphLab-Datasets/audioscrobbler')
      >>> m = graphlab.recommender.create(data)
      >>> recs = m.recommend(users=users)

    If there is some target of interest, such as a "rating" column, one can
    provide the column names manually. By default this fits a matrix
    factorization model.
    One can fit a matrix factorization model with regularization of 0.1 with a
    target column called "rating"::

      >>> gl.recommender.create(data,
                                user_column="user",
                                item_column="item",
                                target_column="rating")

    Other options are available. For example one can modify the number of
    parameters used during matrix factorization::

      >>> gl.recommender.create(data,
                                user_column="user",
                                item_column="item",
                                target_column="rating",
                                method = "matrix_factorization",
                                regularization = 0.1)

    **Examples for Item Similarity**

    One can also fit item similarity models with additional options, such as
    restricting the model to only store each item's 25 closest items::

      >>> gl.recommender.create(data, "user", "item",
                                method = "item_similarity",
                                only_top_k = 25)

    The default item similarity method is "jaccard". There are another two
    methods ("cosine" or "pearson") to use if one wants to compute the
    similarities between pairs of items by the ratings they received::

      >>> gl.recommender.create(data,
                                user_column="user",
                                item_column="item",
                                target_column="rating",
                                method = "item_similarity",
                                similarity_type = "pearson",
                                threshold = 0.01,
                                only_top_k = 50)

    When ``threshold`` is set, items below this similarity score are ignored
    when making predictions.
    For "pearson" similarity, the threshold is for the absolute values.

    **Examples for Factorization Models**

    The factorization models, matrix_factorization and
    factorization_model, both find a mix of linear and interation
    terms that attempt to predict the ratings of user and item pairs
    as accurately as possible.  The models differ in how user and item
    side feature are treated -- the factorization_model fits
    interaction factors to these side features, while the
    matrix_factorization only fits the linear terms.

    The following examples are given for "matrix_factorization", but
    "factorization_model" works in exactly the same way.

    The basic use is simple::

       >>> gl.recommender.create(data, "user", "item", "rating",
                                 method = "matrix_factorization")

    To penalize items that are not in the training set, set
    `unobserved_rating_regularization` to a value greater than 0.  The
    expresses the thought that if a user is not observed interacting
    with an item, then that user has implied a weak preference against
    that item.  The algorithm attempts to find a model that predicts
    all unseen user-item pairs to score below
    `unobserved_rating_value`, with `unobserved_rating_regularization`
    controlling how it ballances this objective with predicting the
    actual scores of the model.  When `unobserved_rating_regularization ==
    1`, then penalizing unseen items scoring higher than
    `unobserved_rating_value` is given equal weight to fitting the
    actual scores.

    For example, if the ratings are given between 1 and 5, you might
    want to push unseen items to somewhere close to the mean::

       >>> gl.recommender.create(data, "user", "item", "rating",
                                 method = "matrix_factorization",
                                 unobserved_rating_regularization = 0.1,
                                 unobserved_rating_value = 3)

    Notes
    -----

    **Categorical vs. Numeric Features**

    In all the recommender models, users and items are treated as categorical
    variables.  In this context, each distinct value are assigned seperate
    coefficients.  As such, only integer and string types are allowed for the
    user and item columns.  With side data or additional observation columns,
    the type of the column determines how it's treated -- strings are treated
    as categorical variables and integers and floats are treated as numerical
    variables.


    """

    _mt._get_metric_tracker().track('toolkit.recsys.create')
    _mt._get_metric_tracker().track('toolkit.recsys.create.' + method)

    from item_similarity import ItemSimilarityModel
    from matrix_factorization import MatrixFactorizationModel
    from popularity import PopularityModel
    from item_means import ItemMeansModel
    from linear_regression import LinearRegressionModel
    from factorization_model import FactorizationModel

    _mt._get_metric_tracker().track('toolkit.recsys.default.create')

    if not (isinstance(observation_data, _SFrame) or (_HAS_PANDAS and isinstance(observation_data, _pandas.DataFrame))):
        raise TypeError('observation_data input must be a pandas.DataFrame \
                         or SFrame')

    if method == 'auto':
        if target_column is None:
            method = 'item_similarity'
        else:
            method = 'matrix_factorization'

    opts = {'model_name': method}

    if method in ['popularity', 'item_means']:
        opts['model_name'] = 'item_counts'

    response = _graphlab.toolkits.main.run("recsys_init", opts)
    model_proxy = response['model']

    if _HAS_PANDAS and type(observation_data) == _pandas.DataFrame:
        observation_data = _SFrame(observation_data)
    if user_data is None:
        user_data = _graphlab.SFrame()
    if item_data is None:
        item_data = _graphlab.SFrame()

    if 'nearest_items' not in method_options:
        method_options['nearest_items'] = _graphlab.SFrame()

    opts = {'dataset': observation_data,
            'user_column': user_column,
            'item_column': item_column,
            'target_column': target_column,
            'user_data': user_data,
            'item_data': item_data,
            'model': model_proxy,
            'random_seed': 1}
    opts.update(method_options)
    plot = False

    # Disable unobserved_rating_regularization together with binary_targets
    if opts.get('binary_targets', False) and \
            (opts.get('unobserved_rating_regularization', 0) > 0 or \
            opts.get('unobserved_rating_value', 0) > 0):
        raise ValueError('Using binary_targets together with unobserved_rating regularization is not yet supported.')

    response = _graphlab.toolkits.main.run('recsys_train', opts, verbose, plot)

    if method == "item_similarity":
        m = ItemSimilarityModel(response['model'])
    elif method == 'popularity':
        m = PopularityModel(response['model'])
    elif method == 'item_means':
        m = ItemMeansModel(response['model'])
    elif method == "matrix_factorization":
        m = MatrixFactorizationModel(response['model'])
    elif method == "linear_model":
        m = LinearRegressionModel(response['model'])
    elif method == "factorization_model":
        m = FactorizationModel(response['model'])
    else:
        raise RuntimeError("Provided method not recognized.")
    return m


def __plot_histogram(measurements, means, names=None, metric_name=None):
    """
    Plot histograms of the measurements, overlaid with vertical lines
    representing the means of the measurements.

    Parameters
    ----------
    measurements : list
        List of measurements (recall, precision or RMSE).

    means : list
        List of doubles, intended to be the mean of each list in
        'measurements'.

    names : list
        List of model name strings.

    metric_name : string
        Name of the metric.
    """
    num_measurements = len(measurements)

    # A list of colors for plotting
    COLORS_LIST = ['b', 'g', 'r', 'k', 'm', 'c']
    colors = [COLORS_LIST[i % len(COLORS_LIST)] for i in range(num_measurements)]

    hist_handle = _pp.hist(measurements, bins=20,
                           color=colors,
                           label=names, hold=True)
    _pp.legend()
    # hist() returns a list of two lists, the first is a list of all the counts,
    # the second is a list of the bin centers.  We need the maximum count so we know
    # how tall the vertical line should be.  So we take the max of the max of the
    # first list of lists
    max_count = max([max(hcount) for hcount in hist_handle[0]])
    _pp.vlines(means, 0, max_count, colors=colors)
    _pp.xlabel(metric_name)
    _pp.ylabel('Counts')


def __plot_overlap_hists(results, label, names, bins=20, alpha=0.3):
    """
    Plot overlapping (un-normalized) histograms for a list of one-dimensional
    series.

    Parameters
    ----------
    results : list
        List of list-like objects. Each element is plotted as a separate histogram.

    label : string
        Label for the x-axis of the histogram.

    names : list
        Names for each series in `results'.

    bins : int
        Number of bins. Default is 20.

    alpha : float
        Opacity of the histogram patches. Default is 0.4.
    """
    assert _HAS_NUMPY, 'Cannot find numpy'
    fig, ax = _pp.subplots()

    # plot first series to fix the bins
    counts, bins, patches = ax.hist(results[0], bins=bins, alpha=alpha, lw=0.1,
                                    label=names[0])
    clr = patches[0].get_facecolor()
    counts = _numpy.insert(counts, 0, 0)
    ax.step(bins, counts, color=clr, lw=5)

    # plot the rest of the series
    if len(results) > 1:
        for series, name in zip(results[1:], names[1:]):
            counts, bins, patches = ax.hist(series, bins=bins, alpha=alpha, lw=0.03,
                                            label=name, fill=True)
            clr = patches[0].get_facecolor()
            counts = _numpy.insert(counts, 0, 0)
            ax.step(bins, counts, color=clr, lw=4)

    ax.ticklabel_format(style='sci', scilimits=(0, 0), axis='y')
    ax.set_xlabel(label)
    ax.set_ylabel('Frequency')
    ax.legend()
    fig.show()


def _compare_results_precision_recall(results, model_names=None):
    """
    Compare models that output precision/recall. Display the per-user
    precision/recall scatter plot, the histograms of precision, and the
    histograms of recall.

    Parameters
    ----------
    results : list
        List of SFrames. Each SFrame describes the evaluation results for a
        separate model.

    model_names : list
        List of model name strings.
    """

    num_models = len(results)
    COLORS_LIST = ['b', 'g', 'r', 'k', 'm', 'c']

    if num_models < 1:
        return

    if model_names is None:
        model_names = ["model {}".format(i) for i in range(num_models)]

    aggs = {'recall': _Aggregate.MEAN('recall'),
            'precision': _Aggregate.MEAN('precision')}
    pr_curves_by_model = [res.groupby('cutoff', aggs) for res in results]
    fig, ax = _pp.subplots()

    for i in range(num_models):
        pr_curve = pr_curves_by_model[i]
        pr_curve = pr_curve.sort('recall')
        name = 'Model ' + str(i + 1)
        if model_names is not None:
            name = model_names[i]

        ax.plot(list(pr_curve['recall']), list(pr_curve['precision']),
                COLORS_LIST[i % len(COLORS_LIST)], label=name)

    ax.set_title('Precision-Recall Averaged Over Users')
    ax.set_xlabel('Recall')
    ax.set_ylabel('Precision')
    ax.legend()
    fig.show()


def _compare_results_rmse2(results, model_names=None):
    """
    Compare models that output root-mean-squared error (RMSE).

    Parameters
    ----------
    results : list
        List of dataframes describing evaluation results for each model.

    model_names : list
        List of model name strings.
    """
    assert _HAS_NUMPY, 'Cannot find numpy'
    ## Validate the results
    num_models = len(results)

    if num_models < 1 or num_models > len(_rcParams['axes.color_cycle']):
        return

    if model_names is None:
        model_names = ["model {}".format(i) for i in range(num_models)]

    ## Histograms of per-user and per-item RMSE
    user_rmse_by_model = [list(elem['rmse_by_user']['rmse']) for elem in results]
    __plot_overlap_hists(user_rmse_by_model, 'Per-User RMSE', model_names, bins=100)

    item_rmse_by_model = [list(elem['rmse_by_item']['rmse']) for elem in results]
    __plot_overlap_hists(item_rmse_by_model, 'Per-Item RMSE', model_names, bins=100)

    ## Bar chart of Overall RMSE by model
    overall_rmse_by_model = [elem['rmse_overall'] for elem in results]

    fig, ax = _pp.subplots()
    BAR_WIDTH = 0.3
    centers = _numpy.arange(num_models) + BAR_WIDTH
    ax.bar(centers, overall_rmse_by_model, BAR_WIDTH)
    ax.set_xticks(centers + BAR_WIDTH / 2)
    ax.set_xticklabels(model_names)
    ax.set_title('Overall RMSE')
    fig.show()


def compare_models(dataset, models, model_names=None, user_sample=1.0,
                   metric='auto',
                   target_column=None,
                   exclude_known_for_precision_recall=True,
                   make_plot=True,
                   verbose=True,
                   **kwargs):
    """
    Compare models with respect to a common validation set.
    Models that are trained to predict ratings are compared separately from
    models that are trained without target ratings.  The ratings prediction
    models are compared on RMSE, and the rest are compared on precision-recall.

    Parameters
    ----------
    dataset : pandas.DataFrame/SFrame
        Validation dataset.

    models : list
        List of trained RecommenderModels.

    model_names : list
        List of model name strings for display.

    user_sample : double
        Sampling proportion of unique users to use in estimating model
        performance. Defaults to 1.0, i.e. use all users in the dataset.

    metric : str, {'auto', 'rmse', 'precision_recall'}
        Metric for the evaluation. The default automatically splits
        models into two groups with their default evaluation metric respectively:
        'rmse' for models trained with a target_column, and 'precision_recall'
        otherwise.

    target_column : str, optional
        The target column for evaluating rmse.
        If the model is trained with a target column, the default
        is to using the same column.
        If the model is trained without a target column, this option
        must provided by user.

    exclude_known_for_precision_recall : bool
        A useful option for evaluating precision-recall.
        Recommender models have the option to exclude items seen in the training
        data from the final recommendation list.
        Set this option to True when evaluating on test data,
        and False when evaluating precision-recall on training data.

    make_plot : bool
        If true, a plot is made using matplotlib.

    verbose : bool
        If true, print the progress.

    Returns
    -------
    out : list[SFrame]
        A list of results where each one is an sframe of evaluation results of
        the respective model on the given dataset

    Examples
    --------
    If you have created two ItemSimilarityModels ``m1`` and ``m2`` and have
    an :class:`~graphlab.SFrame` ``test_data``, then you may compare the
    performance of the two models on test data using:

    >>> import graphlab
    >>> dataset = graphlab.
    >>> m1 = graphlab.recommender.create(train_data, method='item_similarity')
    >>> m2 = graphlab.recommender.create(train_data, method='item_similarity')
    >>> graphlab.recommender.compare_models(test_data, [m1, m2])

    The evaluation metric is automatically set to 'precision_recall', and the evaluation
    will be based on recommendations that exclude items from the training data.

    If you want to evaluate on training set:

    >>> graphlab.recommender.compare_models(train_data, [m1, m2], exclude_known_for_precision_recall=False)

    If you have four models, two trained with matrix_factorization and a target rating column, and the other
    two trained with item similarity, by default, the models are put into two different groups with
    "rmse", and "precision_recall" as the evaluation metric respectively.

    >>> m3 = graphlab.recommender.create(train_data, method='matrix_factorization')
    >>> m4 = graphlab.recommender.create(train_data, method='matrix_factorization')
    >>> graphlab.recommender.compare_models(test_data, [m1, m2, m3, m4])

    To compare the above 4 models using the same "precision_recall" metric, you can do:

    >>> graphlab.recommender.compare_models(test_data, [m1, m2, m3, m4], metric='precision_recall')
    """
    _mt._get_metric_tracker().track('toolkit.recsys.compare_models')

    num_models = len(models)

    if model_names is None:
        model_names = ['M' + str(i) for i in range(len(models))]

    if num_models < 1:
        raise ValueError("Must pass in at least one recommender model to \
                           evaluate")

    if model_names is not None and len(model_names) != num_models:
        raise ValueError("Must pass in the same number of model names as \
                          models")

    # if we are asked to sample the users, come up with a list of unique users
    if user_sample < 1.0:
        user_column_name = models[0].get('user_column')
        if user_column_name is None:
            raise ValueError("user_column not set in model(s)")
        user_sa = dataset[user_column_name]
        unique_users = list(user_sa.unique())
        nusers = len(unique_users)
        ntake = int(round(user_sample * nusers))

        _random.shuffle(unique_users)

        users = unique_users[:ntake]
        print "compare_models: using " + str(ntake) + \
              " users to estimate model performance"
        users = frozenset(users)
        ix = [u in users for u in dataset[user_column_name]]
        dataset_subset = dataset[_SArray(ix) == True]
    else:
        dataset_subset = dataset

    results = []
    for (m, mname) in zip(models, model_names):
        if verbose:
            print 'PROGRESS: Evaluate model %s' % mname
        r = m.evaluate(dataset_subset,
                       metric,
                       exclude_known_for_precision_recall,
                       target_column,
                       verbose=verbose,
                       cutoffs=range(2, 50, 2), **kwargs)
        results.append(r)

    if _has_pyplot is True:
        # separate into models
        ## relying on the fact that _evaluate_rmse retusn dict
        ## while _evaluate_precision_recall returns SFrame.
        ## Bad practice...Oh well I don't want to break the _evalute API now.
        is_pr = [isinstance(r, _SFrame) for r in results]
        results_pr = [results[i] for i in range(num_models) if is_pr[i]]
        results_rmse = [results[i] for i in range(num_models) if is_pr[i] is False]
        model_names_pr = None
        model_names_rmse = None
        if model_names is not None:
            model_names_pr = [model_names[i] for i in range(num_models) if is_pr[i]]
            model_names_rmse = [model_names[i] for i in range(num_models) if not is_pr[i]]

        if make_plot:
            if len(results_pr) > 0:
                _compare_results_precision_recall(results_pr, model_names_pr)
            if len(results_rmse) > 0:
                _compare_results_rmse2(results_rmse, model_names_rmse)
            _pp.show()
    else:
        "Warning: Matplotlib could not be imported - no plot output."

    return results


def precision_recall_by_user(observed_user_items,
                             recommendations,
                             cutoffs=[10]):
    """
    Compute precision and recall at a given cutoff for each user.

    In information retrieval terms, precision represents the ratio of relevant,
    retrieved items to the number of relevant items. Recall represents the
    ratio of relevant, retrieved items to the number of relevant items.

    Let :math:`p_k` be a vector of the first :math:`k` elements of the argument
    "predicted", and let :math:`a` be the set of items in the "actual" argument.
    The "precision at cutoff k" is defined as

    .. math::
        P(k) = \\frac{ | a \cap p_k | }{k}

    while "recall at cutoff k" is defined as

    .. math::
        R(k) = \\frac{ | a \cap p_k | }{|a|}

    The order of the elements in predicted affects the returned score.
    Only unique predicted values contribute to the score.

    Parameters
    ----------
    observed_user_items : SFrame
        An SFrame containing observed user item pairs, where the first
        column contains user ids and the second column contains item ids.

    recommendations : SFrame
        An SFrame containing columns pertaining to the user id, the item id,
        the score given to that pair, and the rank of that item among the
        recommendations made for user id. For example, see the output of
        recommend() produced by any graphlab.recommender model.

    cutoffs : list of int
        The cutoffs to use when computing precision and recall.

    Returns
    -------
    out: SFrame
        An SFrame containing columns user id, cutoff, precision, recall, and
        count where  the precision and recall are reported for each user at
        each requested cutoff, and count is the number of observations for
        that user id.

    Examples
    --------

    Given SFrames ``train_data`` and ``test_data`` with columns user_id
    and item_id:

    >>> m = graphlab.recommender.create(train_data)
    >>> recs = m.recommend()
    >>> m.precision_recall_by_user(test_data, recs, cutoffs=[5, 10])

    Notes
    -----
    The corner cases that involve empty lists were chosen to be consistent
    with the feasible set of precision-recall curves, which start at
    (precision, recall) = (1,0) and end at (0,1). However, we do not believe
    there is a well-known concensus on this choice.

    """
    _mt._get_metric_tracker().track('toolkit.recsys.precision_recall_by_user')

    if _HAS_PANDAS and type(observed_user_items) is _pandas.DataFrame:
        observed_user_items = _SFrame(observed_user_items)

    assert type(observed_user_items) == _SFrame
    assert type(recommendations) == _SFrame
    assert type(cutoffs) == list
    assert min(cutoffs) > 0, "All cutoffs must be positive integers."
    assert recommendations.num_columns() >= 2
    user_column = recommendations.column_names()[0]
    item_column = recommendations.column_names()[1]

    assert observed_user_items.num_rows() > 0, \
           "Evaluating precision and recall requires a non-empty \
            observed_user_items."
    assert user_column in observed_user_items.column_names(), \
            "User column required in observed_user_items."
    assert item_column in observed_user_items.column_names(), \
            "Item column required in observed_user_items."
    assert observed_user_items[user_column].dtype() == \
           recommendations[user_column].dtype(), \
           "The user column in the two provided SFrames must have the same type."
    assert observed_user_items[item_column].dtype() == \
           recommendations[item_column].dtype(), \
           "The user column in the two provided SFrames must have the same type."

    cutoffs = array.array('f', cutoffs)

    opts = {'data': observed_user_items,
            'recommendations': recommendations,
            'cutoffs': cutoffs}

    response = _graphlab.toolkits.main.run('evaluation_precision_recall_by_user', opts)
    return _SFrame(None, _proxy=response['pr'])


def random_split_by_user(data,
                         user_column='user_id',
                         item_column='item_id',
                         max_num_users=1000,
                         min_items_per_user=5,
                         item_test_proportion=.2,
                         random_seed=0):
    """
    Create a random split of the provided data set generated by first
    choosing at most ``max_num_users``, and then each set of users'
    items are randomly partitioned according to some probability.
    This can be useful for evaluating recommendation systems, e.g. the
    partitions provide a useful 'train' and 'test' set that allows one
    to evaluate performance on a per-user basis.

    Parameters
    ----------
    data : SFrame
        An SFrame containing (user, item) pairs.

    user_column : str
        The column name of ``data`` that contains user ids.

    item_column : str
        The column name of ``data`` that contains item ids.

    max_num_users : int, optional
        The maximum number of users to use to construct the test set. If
        None, uses all available users.  Default is 1000; increase
        this for more accuracy at the expense of speed.

    min_items_per_user : int, optional
        Sampled users must have at least this many observations.

    item_test_proportion : float, optional
        The desired probability that a given user's item will be chosen
        for the test set.

    random_seed : int, optional
        The random seed to use for randomization.

    Returns
    -------
    (train, test): (SFrame, SFrame)
        ``train`` and ``test`` represent a random partition

    Examples
    --------

        >>> import graphlab
        >>> from graphlab.toolkits.recommender.util import random_split_by_user
        >>> sf = graphlab.SFrame('s3://GraphLab-Datasets/audioscrobbler')
        >>> train, test = random_split_by_user(sf,
                                               max_num_users=100,
                                               min_items_per_user=10)

    """

    _mt._get_metric_tracker().track('toolkit.recsys.random_split_by_user')

    if _HAS_PANDAS and type(data) is _pandas.DataFrame:
        data = _SFrame(data)

    assert user_column in data.column_names(), \
        'Provided user column "{0}" not found in data set.'.format(user_column)
    assert item_column in data.column_names(), \
        'Provided item column "{0}" not found in data set.'.format(item_column)

    if max_num_users == 'all':
        max_num_users = None

    # Remove users that do not have enough observations
    counts = data.groupby(user_column, _graphlab.aggregate.COUNT)
    chosen_users = counts[counts['Count'] >= min_items_per_user][user_column]
    data = data.filter_by(chosen_users, user_column)

    opts = {'dataset': data,
            'user_column': user_column,
            'item_column': item_column,
            'max_num_users': max_num_users,
            'item_test_proportion': item_test_proportion,
            'random_seed': random_seed}
    response = _graphlab.toolkits.main.run('recsys_train_test_split', opts)
    train = _SFrame(None, _proxy=response['train'])
    test = _SFrame(None, _proxy=response['test'])
    return train, test
