"""
Methods for creating models that rank items according to their similarity
to other items.
"""
import graphlab.connect as _mt
import graphlab as _graphlab
from graphlab.toolkits.recommender.util import _Recommender
from graphlab.data_structures.sframe import SFrame as _SFrame
from graphlab.data_structures.sarray import SArray as _SArray
import json as _json

DEFAULT_HYPER_PARAMETER_RANGE = {
    'similarity_type': ['jaccard', 'cosine', 'pearson']
}
from graphlab.toolkits._model import _get_default_options_wrapper


def create(observation_data,
           user_id='user_id', item_id='item_id', target=None,
           user_data=None, item_data=None,
           nearest_items=None,
           similarity_type='jaccard',
           training_method='auto',
           threshold=0.001,
           only_top_k=100,
           random_seed=0,
           verbose=True):
    """
    Create a recommender that uses item-item similarities based on
    users in common.

    Parameters
    ----------
    observation_data : SFrame
        The dataset to use for training the model. It must contain a column of
        user ids and a column of item ids. Each row represents an observed
        interaction between the user and the item.  The (user, item) pairs
        are stored with the model so that they can later be excluded from
        recommendations if desired. It can optionally contain a target ratings
        column. All other columns are interpreted by the underlying model as
        side features for the observations.

        The user id and item id columns must be of type 'int' or 'str'. The
        target column must be of type 'int' or 'float'.

    user_id : string, optional
        The name of the column in `observation_data` that corresponds to the
        user id.

    item_id : string, optional
        The name of the column in `observation_data` that corresponds to the
        item id.

    target : string, optional
        The `observation_data` can optionally contain a column of scores
        representing ratings given by the users. If present, the name of this
        column may be specified variables `target`.

    user_data : SFrame, optional
        Side information for the users.  This SFrame must have a column with
        the same name as what is specified by the `user_id` input parameter.
        `user_data` can provide any amount of additional user-specific
        information. (NB: This argument is currently ignored by this model.)

    item_data : SFrame, optional
        Side information for the items.  This SFrame must have a column with
        the same name as what is specified by the `item_id` input parameter.
        `item_data` can provide any amount of additional item-specific
        information. (NB: This argument is currently ignored by this model.)

    similarity_type : {'jaccard', 'cosine', 'pearson'}, optional
        Similarity metric to use. See ItemSimilarityRecommender for details.
        Default: 'jaccard'.

    threshold : float, optional
        Predictions ignore items below this similarity value.
        Default: 0.001.

    only_top_k : int, optional
        Number of similar items to store for each item. Default value is the
        number of items.
        Default: 100.

    nearest_items : SFrame, optional
        A set of each item's nearest items. When provided, this overrides
        the similarity computed above.
        See Notes in the documentation for ItemSimilarityRecommender.
        Default: None.

    training_method : {'auto', 'in_memory', 'sgraph'}, optional,
        Two internal implementations are available: an in-memory version that
        stores a symmetric matrix containing item-item similarities, and an
        on-disk version that scales to more items. These can be chosen using
        the arguments 'in_memory' and 'sgraph', repsectively. By default, the
        method automaticaly chooses 'in_memory' for datasets containing
        20,000 or fewer unique items in the observation_data.

    Examples
    --------
    Given basic user-item observation data, an
    :class:`~graphlab.recommender.item_similarity_recommender.ItemSimilarityRecommender` is created:

    >>> sf = graphlab.SFrame({'user_id': ['0', '0', '0', '1', '1', '2', '2', '2'],
    ...                       'item_id': ['a', 'b', 'c', 'a', 'b', 'b', 'c', 'd']})
    >>> m = graphlab.item_similarity_recommender.create(sf)
    >>> recs = m.recommend()

    When a target is available, one can specify the desired similarity. For
    example we may choose to use a cosine similarity, and use it to make
    predictions or recommendations.

    >>> sf2 = graphlab.SFrame({'user_id': ['0', '0', '0', '1', '1', '2', '2', '2'],
    ...                        'item_id': ['a', 'b', 'c', 'a', 'b', 'b', 'c', 'd'],
    ...                        'rating': [1, 3, 2, 5, 4, 1, 4, 3]})
    >>> m2 = graphlab.item_similarity_recommender.create(sf2, target="rating",
    ...                                                  similarity_type='cosine')
    >>> m2.predict(sf)
    >>> m2.recommend()

    Notes
    -----
    Currently, :class:`~graphlab.recommender.item_similarity_recommender.ItemSimilarityRecommender`
    does not leverage the use of side features `user_data` and `item_data`.

    **Incorporating pre-defined similar items**

    For item similarity models, one may choose to provide user-specified
    nearest neighbors graph using the keyword argument `nearest_items`. This is
    an SFrame containing, for each item, the nearest items and the similarity
    score between them. If provided, these item similarity scores are used for
    recommendations. The SFrame must contain the columns
    'similar_item' and 'score' as well as one with the same name used for
    the items in `observation_data`, e.g. 'item_id'. For example, suppose
    you first create an ItemSimilarityRecommender and use
    :class:`~graphlab.recommender.ItemSimilarityRecommender.get_similar_items`:

    >>> sf = graphlab.SFrame({'user_id': ["0", "0", "0", "1", "1", "2", "2", "2"],
    ...                       'item_id': ["a", "b", "c", "a", "b", "b", "c", "d"]})
    >>> m = graphlab.item_similarity_recommender.create(sf)
    >>> nn = m.get_similar_items()
    >>> m2 = graphlab.item_similarity_recommender.create(sf, nearest_items=nn)

    With the above code, the item similarities computed for model `m` can be
    used to create a new recommender object, `m2`. Note that we could have
    created `nn` from some other means, but now use `m2` to make
    recommendations via `m2.recommend()`.


    See Also
    --------
    ItemSimilarityRecommender

    """

    _mt._get_metric_tracker().track('toolkit.recsys.item_similarity_recommender.create')

    method = 'item_similarity'

    opts = {'model_name': method}
    response = _graphlab.toolkits._main.run("recsys_init", opts)
    model_proxy = response['model']

    if user_data is None:
        user_data = _graphlab.SFrame()
    if item_data is None:
        item_data = _graphlab.SFrame()
    if nearest_items is None:
        nearest_items = _graphlab.SFrame()

    opts = {'dataset': observation_data,
            'user_id': user_id,
            'item_id': item_id,
            'target': target,
            'user_data': user_data,
            'item_data': item_data,
            'nearest_items': nearest_items,
            'model': model_proxy,
            'random_seed': 1,
            'similarity_type': similarity_type,
            'training_method': training_method,
            'threshold': threshold,
            'only_top_k': only_top_k}

    response = _graphlab.toolkits._main.run('recsys_train', opts, verbose)
    return ItemSimilarityRecommender(response['model'])



get_default_options = _get_default_options_wrapper(
                          'item_similarity', 
                          'recommender.item_similarity', 
                          'ItemSimilarityRecommender')

class ItemSimilarityRecommender(_Recommender):
    """
    A model that ranks an item according to its similarity to other items
    observed for the user in question.

    **Creating an ItemSimilarityRecommender**

    This model cannot be constructed directly.  Instead, use 
    :func:`graphlab.recommender.item_similarity_recommender.create` 
    to create an instance
    of this model. A detailed list of parameter options and code samples 
    are available in the documentation for the create function.

    Notes
    -----
    **Model Definition**

    This model first computes the similarity
    between items using the observations of users who have interacted with both
    items. Given a similarity between item :math:`i` and :math:`j`,
    :math:`S(i,j)`, it scores an item :math:`j` for user :math:`u` using a
    weighted average of the user's previous observations :math:`I_u`.

    There are three choices of similarity metrics to use: 'jaccard',
    'cosine' and 'pearson'.

    `Jaccard similarity
    <http://en.wikipedia.org/wiki/Jaccard_index>`_
    is used to measure the similarity between two set of elements.
    In the context of recommendation, the Jaccard similarity between two
    items is computed as

    .. math:: \mbox{JS}(i,j)
            = \\frac{|U_i \cap U_j|}{|U_i \cup U_j|}

    where :math:`U_{i}` is the set of users who rated item :math:`i`.
    Jaccard is a good choice when one only has implicit feedbacks of items
    (e.g., people rated them or not), or when one does not care about how
    many stars items received.

    If one needs to compare the ratings of items, Cosine and Pearson similarity
    are recommended.

    The Cosine similarity between two items is computed as

    .. math:: \mbox{CS}(i,j)
            = \\frac{\sum_{u\in U_{ij}} r_{ui}r_{uj}}
                {\sqrt{\sum_{u\in U_{i}} r_{ui}^2}
                 \sqrt{\sum_{u\in U_{j}} r_{uj}^2}}

    where :math:`U_{i}` is the set of users who rated item :math:`i`,
    and :math:`U_{ij}` is the set of users who rated both items :math:`i` and
    :math:`j`. A problem with Cosine similarity is that it does not consider
    the differences in the mean and variance of the ratings made to
    items :math:`i` and :math:`j`.

    Another popular measure that compares ratings where the effects of means and
    variance have been removed is Pearson Correlation similarity:

    .. math:: \mbox{PS}(i,j)
            = \\frac{\sum_{u\in U_{ij}} (r_{ui} - \\bar{r}_i)
                                        (r_{uj} - \\bar{r}_j)}
                {\sqrt{\sum_{u\in U_{ij}} (r_{ui} - \\bar{r}_i)^2}
                 \sqrt{\sum_{u\in U_{ij}} (r_{uj} - \\bar{r}_j)^2}}

    The predictions of items depend on whether `target` is specified.
    When the `target` is absent, a prediction for item :math:`j` is made via

    .. math:: y_{uj}
            = \\frac{\sum_{i \in I_u} \mbox{SIM}(i,j)  }{|I_u|}


    Otherwise, predictions for ``jaccard`` and ``cosine`` similarities are made via

    .. math:: y_{uj}
            = \\frac{\sum_{i \in I_u} \mbox{SIM}(i,j) r_{ui} }{\sum_{i \in I_u} \mbox{SIM}(i,j)}

    Predictions for ``pearson`` similarity are made via

    .. math:: y_{uj}
            = \\bar{r}_j + \\frac{\sum_{i \in I_u} \mbox{SIM}(i,j) (r_{ui} - \\bar{r}_i) }{\sum_{i \in I_u} \mbox{SIM}(i,j)}


    For more details of item similarity methods, please see, e.g.,
    Chapter 4 of [Ricci_et_al]_.

    See Also
    --------
    create

    References
    ----------
    .. [Ricci_et_al] Francesco Ricci, Lior Rokach, and Bracha Shapira.
        `Introduction to recommender systems handbook
        <http://www.ics.uci.edu/~smyth/courses/cs27
        7/papers/intro-rec-sys-handbook.pdf>`_. Springer US, 2011.
    """

    def __init__(self, model_proxy):
        '''__init__(self)'''
        self.__proxy__ = model_proxy


    def _get_wrapper(self):
        def model_wrapper(model_proxy):
            return ItemSimilarityRecommender(model_proxy)
        return model_wrapper



