"""
Methods for creating models that rank items according to their similarity
to other items.
"""
import graphlab.connect as _mt
import graphlab as _graphlab
from graphlab.toolkits.recommender.recommender import RecommenderModel
from graphlab.data_structures.sframe import SFrame as _SFrame
from graphlab.data_structures.sarray import SArray as _SArray
from graphlab.deps import pandas as _pandas, HAS_PANDAS as _HAS_PANDAS
import logging

class ItemSimilarityModel(RecommenderModel):
    """
    A model that ranks an item according to its similarity to other items
    observed for the user in question. 

    **Creating an ItemSimilarityModel**

    This model can be created using
    :func:`graphlab.recommender.create(..., method='item_similarity') <graphlab.recommender.create>`.
    Do NOT instantiate this model class directly.

    When creating a ItemSimilarityModel, the following options may be 
    specified:

    +---------------------------+---------+-------------------------------+
    |        Options            | Default |   Description                 |
    +===========================+=========+===============================+
    | similarity_type           | jaccard | Similarity metric to use.     |
    |                           |         | One of 'jaccard', 'cosine',   |
    |                           |         | or 'pearson'.                 |
    +---------------------------+---------+-------------------------------+
    | threshold                 | 0.001   | Predictions ignore items      |
    |                           |         | below this similarity value.  |
    +---------------------------+---------+-------------------------------+
    | only_top_k                | 100     | Number of similar items to    |
    |                           |         | store for each items. Default |
    |                           |         | value is the number of items. |
    +---------------------------+---------+-------------------------------+
    | nearest_items             | None    | See notes.                    |
    +---------------------------+---------+-------------------------------+

    **Side information**

    Currently, ItemSimilarityModel does not support side features.

    Notes
    -----
    **Model Definition**

    A model with ``method=item_similarity`` first computes the similarity
    between items using the observations of users who have interacted with both
    items. Given a similarity between item :math:`i` and :math:`j`,
    :math:`S(i,j)`, it scores an item :math:`j` for user :math:`u` using a
    weighted average of the user's previous observations :math:`I_u`.

    There are three choices of similarity metrics to use: 'jaccard',
    'cosine' and 'pearson'.

    `Jaccard similarity
    <http://en.wikipedia.org/wiki/Jaccard_index>`_
    is used to measure the similarity between two set of elements.
    In the context of recommendation, the Jaccard similarity between two
    items is computed as

    .. math:: \mbox{JS}(i,j)
            = \\frac{|U_i \cap U_j|}{|U_i \cup U_j|}

    where :math:`U_{i}` is the set of users who rated item :math:`i`.
    Jaccard is a good choice when one only has implicit feedbacks of items
    (e.g., people rated them or not), or when one does not care about how
    many stars items received.

    If one needs to compare the ratings of items, Cosine and Pearson similarity
    are recommended.

    The Cosine similarity between two items is computed as

    .. math:: \mbox{CS}(i,j)
            = \\frac{\sum_{u\in U_{ij}} r_{ui}r_{uj}}
                {\sqrt{\sum_{u\in U_{i}} r_{ui}^2}
                 \sqrt{\sum_{u\in U_{j}} r_{uj}^2}}

    where :math:`U_{i}` is the set of users who rated item :math:`i`,
    and :math:`U_{ij}` is the set of users who rated both items :math:`i` and
    :math:`j`. A problem with Cosine similarity is that it does not consider
    the differences in the mean and variance of the ratings made to
    items :math:`i` and :math:`j`.

    Another popular measure that compares ratings where the effects of means and
    variance have been removed is Pearson Correlation similarity:

    .. math:: \mbox{PS}(i,j)
            = \\frac{\sum_{u\in U_{ij}} (r_{ui} - \\bar{r}_i)
                                        (r_{uj} - \\bar{r}_j)}
                {\sqrt{\sum_{u\in U_{ij}} (r_{ui} - \\bar{r}_i)^2}
                 \sqrt{\sum_{u\in U_{ij}} (r_{uj} - \\bar{r}_j)^2}}

    The predictions of items depend on whether `target` is specified.
    When the `target` is absent, a prediction for item :math:`j` is made via

    .. math:: y_{uj}
            = \\frac{\sum_{i \in I_u} \mbox{SIM}(i,j)  }{|I_u|}


    Otherwise, predictions for ``jaccard`` and ``cosine`` similarities are made via

    .. math:: y_{uj}
            = \\frac{\sum_{i \in I_u} \mbox{SIM}(i,j) r_{ui} }{\sum_{i \in I_u} \mbox{SIM}(i,j)}

    Predictions for ``pearson`` similarity are made via

    .. math:: y_{uj}
            = \\bar{r}_j + \\frac{\sum_{i \in I_u} \mbox{SIM}(i,j) (r_{ui} - \\bar{r}_i) }{\sum_{i \in I_u} \mbox{SIM}(i,j)}


    For more details of item similarity methods, please see, e.g.,
    Chapter 4 of [Ricci_et_al]_.

    **Incorporating pre-defined similar items**

    For item similarity models, one may choose to provide user-specified
    nearest neighbors graph using the keyword argument `nearest_items`. This is
    an SFrame containing, for each item, the nearest items and the similarity
    score between them. If provided, these item similarity scores are used for
    recommendations. The SFrame must contain the columns
    'similar_item' and 'score' as well as one with the same name used for
    the items in `observation_data`, e.g. 'item_id'. For example, suppose
    you first create an ItemSimilarityModel and use
    :class:`~graphlab.recommender.ItemSimilarityModel.get_similar_items`:

    >>> sf = graphlab.SFrame({'user_id': ["0", "0", "0", "1", "1", "2", "2", "2"],
    ...                       'item_id': ["a", "b", "c", "a", "b", "b", "c", "d"]})
    >>> m = graphlab.recommender.create(sf, method='item_similarity')
    >>> nn = m.get_similar_items()
    >>> m2 = graphlab.recommender.create(sf,
                                         method='item_similarity',
                                         nearest_items=nn)

    With the above code, the item similarities computed for model `m` can be
    used to create a new recommender object, `m2`. Note that we could have
    created `nn` from some other means, but now use `m2` to make
    recommendations via `m2.recommend()`.

    References
    ----------
    .. [Ricci_et_al] Francesco Ricci, Lior Rokach, and Bracha Shapira.
        `Introduction to recommender systems handbook
        <http://www.ics.uci.edu/~smyth/courses/cs27
        7/papers/intro-rec-sys-handbook.pdf>`_. Springer US, 2011.
    """

    def __init__(self, model_proxy):
        '''__init__(self)'''
        self.__proxy__ = model_proxy


    def _get_wrapper(self):
        def model_wrapper(model_proxy):
            return ItemSimilarityModel(model_proxy)
        return model_wrapper


    def get_similar_items(self, items=None, k=10):
        """
        Get the k most similar items for each item in items

        Parameters
        ----------
        items : SArray or list; optional        
            An :class:`~graphlab.SArray` or list of item ids for which to get
            similar items. If 'None', then return the `k` most similar items for
            all items in the training set.

        k : int, optional
            The number of similar items for each item.

        Returns
        -------
        out : SFrame
            A SFrame with the top ranked similar items for each item. The
            columns are `item_id`, 'similar_item', 'score' and 'rank', where
            `item_id` matches the item column name specified at training time.
            The 'rank' is between 1 and `k` and 'score' gives the similarity
            score of that item. The value of the score depends on the method
            used for computing item similarities.

        Examples
        --------

        >>> sf = graphlab.SFrame({'user_id': ["0", "0", "0", "1", "1", "2", "2", "2"],
                                  'item_id': ["a", "b", "c", "a", "b", "b", "c", "d"]})
        >>> m = graphlab.recommender.create(sf, method='item_similarity')
        >>> nn = m.get_similar_items()
        """

        _mt._get_metric_tracker().track('toolkit.recsys.get_similar_items')

        if items is None:
            items = _SArray()

        assert type(k) == int

        if isinstance(items, list):
            items = _SArray(items)

        def check_type(arg, arg_name, required_type, allowed_types):
            if not isinstance(arg, required_type):
                raise TypeError("Parameter " + arg_name + " must be of type(s) "
                                + (", ".join(allowed_types) )
                                + "; Type '" + str(type(arg)) + "' not recognized.")


        check_type(items, "items", _SArray, ["SArray", "list"])

        opt = {'model': self.__proxy__,
                'items': items,
                'top_k': k}

        response = _graphlab.toolkits.main.run('recsys_get_similar_items', opt)

        neighbors = _SFrame(None, _proxy=response['data'])

        return neighbors


