"""
Methods for creating models that rank items according to their similarity
to other items.
"""
import graphlab.connect as _mt
import graphlab as _graphlab
from graphlab.toolkits.recommender.recommender import RecommenderModel
from graphlab.data_structures.sframe import SFrame as _SFrame
from pandas import DataFrame as _DataFrame


def create(dataset, user, item, target=None, similarity_type='jaccard',
           only_top_k=0, holdout_probability=0.0, threshold=0.0,
           threshold_degree=None, threshold_window=None,
           verbose=True, plot=False, random_seed=0, **kwargs):
    """
    Create an :class:`~graphlab.recommender.item_similarity.ItemSimilarityModel`.
    This scores an item according to its similarity to other items observed for the
    user in question. The similarity can be computed based on the observed users
    for each item, e.g. the rating given to both items by users who have rated
    both items.

    Parameters
    ----------
    dataset : pandas.DataFrame/SFrame
        The dataset to use for training the model.

    user : string
        The column name of the dataset that corresponds to user id.

    item : string
        The column name of the dataset that corresponds to item id.

    target : string, optional
        The column name of the dataset that corresponds to the target of
        interest, e.g. the rating a user gave a particular item.

    similarity_type : {'jaccard', 'cosine'}, optional
        Measure of similarity between two items. The allowed functions are:
        - 'jaccard' (default)
        - 'cosine'

    only_top_k : int, optional
        If this value is greater than 0, only the distances to each item's
        k most similar items are stored and used for predictions.
        When there are relatively few items given the number of observations,
        it can be faster to precompute and store all item-to-item similarity
        scores. When many unique items exist, using k>0 can help reduce
        the amount of memory required.

    holdout_probability : float, optional
        Proportion of the dataset held out of model training. The held-out
        subset is used to estimate the model's error rate when making
        predictions with new data; the prediction is available in the model's
        training_stats field.

    verbose : bool, optional
        If True, print progress updates.

    plot : bool, optional
        If True, display the progress plot.

    random_seed : integer, optional
        The random seed used to choose the training and validation
        split if holdout_probability is nonzero.

    threshold : float, optional
        Any items that have similarity below this threshold are ignored during
        prediction.

    threshold_degree : int, optional
        Skip all items with fewer than ``threshold_degree`` observations
        during prediction. This option is only available of ``only_top_k`` > 0.

    threshold_window : float, optional
        Experimental: When ``only_top_k`` > 0, this allows one to skip
        computing similarities for item pairs (i, j) whenever
        max(d_i, d_j) / min(d_i, d_j) > ``threshold_window``, where
        d_i is the number of observations for item i.

    -------
    out : ItemSimilarityModel
        A trained ItemSimilarityModel.

    Examples
    --------
    If given an :class:`~graphlab.SFrame` ``sf`` with columns ``user_id`` and
    ``item_id``, then we can create a
    :class:`~graphlab.item_similarity.ItemSimilarityModel` as follows:

    >>> from graphlab import recommender
    >>> m = recommender.item_similarity.create(sf, 'user_id', 'item_id')

    With this model object one can make recommendations for the unique users in ``sf``:

    >>> recs = m.recommend(sf)

    The model can be saved to disk as follows:

    >>> m.save(filename)

    For more, see the documentation for
    :class:`~graphlab.recommender.item_similarity.ItemSimilarityModel`.

    References
    ----------
    - Sarwar, B., et al. (2001) `Item-Based Collaborative Filtering
      Recommendation Algorithms
      <http://files.grouplens.org/papers/www10_sarwar.pdf>`_. Proceedings of the
      10th International Conference on World Wide Web. pp. 285-295.
    - `Wikipedia - Jaccard index <http://en.wikipedia.org/wiki/Jaccard_index>`_
    - `Wikipedia - cosine similarity <http://en.wikipedia.org/wiki/Cosine_similarity>`_
    """

    _mt._get_metric_tracker().track('toolkit.recsys.item_similarity.create')

    if not isinstance(dataset, (_DataFrame, _SFrame)):
        raise TypeError('dataset input must be a pandas.DataFrame or SFrame')

    if type(dataset) != _SFrame:
        dataset = _SFrame(dataset)

    if plot is True:
        print "The plot functionality for item similarity is not yet implemented."
        plot = False

    if target is None:
        target = ''

    opts = {'model_name': 'itemcf',
            'similarity_type': similarity_type}
    response = _graphlab.toolkits.main.run("recsys_init", opts)
    model_proxy = response['model']

    opts = {'user_column': user,
            'item_column': item,
            'target_column': target,
            'training_data': dataset,
            'model': model_proxy,
            'random_seed' : random_seed,
            'only_top_k' : only_top_k,
            'threshold': threshold,
            'holdout_probability': holdout_probability}
    opts.update(kwargs)

    # Add threshold options if provided by the user.
    if threshold_degree is not None:
        opts.update({'threshold_lower_degree': threshold_degree})

    if threshold_window is not None:
        opts.update({'threshold_window_degree': threshold_window})

    response = _graphlab.toolkits.main.run("recsys_train", opts, verbose, plot)

    return ItemSimilarityModel(model_proxy)


class ItemSimilarityModel(RecommenderModel):
    """
    A model that ranks an item according to its similarity to other items
    observed for the user in question. The similarity can be computed based on
    the observed users for each item, e.g. the rating given to both items by
    users who have rated both items.
    """

    def __init__(self, model_proxy):
        self.__proxy__ = model_proxy

    def _get_wrapper(self):
        def model_wrapper(model_proxy):
            return ItemSimilarityModel(model_proxy)
        return model_wrapper
