"""
Methods for creating models that rank items according to their similarity
to other items.
"""
import graphlab.connect as _mt
import graphlab as _graphlab
from graphlab.toolkits.recommender.recommender import RecommenderModel
from graphlab.data_structures.sframe import SFrame as _SFrame
from graphlab.data_structures.sarray import SArray as _SArray
from graphlab.deps import pandas as _pandas, HAS_PANDAS as _HAS_PANDAS
import logging


def create(dataset,
           user_column='user_id', item_column='item_id', target_column=None,
           user_data=None, item_data=None,
           similarity_type='jaccard',
           only_top_k=0, threshold=0.0,
           threshold_degree=None, threshold_window=None,
           verbose=True, plot=False, **method_options):
    """
    *Deprecated*. Please use

    ``graphlab.recommender.create(..., method="item_similarity")``

    instead.
    """

    _mt._get_metric_tracker().track('toolkit.recsys.item_similarity.create')

    logging.warning("item_similarity.create will be deprecated soon. Please switch to recommender.create")

    if not (isinstance(dataset, _SFrame) or (_HAS_PANDAS and isinstance(dataset, _pandas.DataFrame))):
        raise TypeError('dataset input must be a pandas.DataFrame or SFrame')

    if type(dataset) != _SFrame:
        dataset = _SFrame(dataset)

    if plot is True:
        print "The plot functionality for item similarity is not yet implemented."
        plot = False

    method_options = {'similarity_type': similarity_type,
                      'only_top_k': only_top_k,
                      'threshold': threshold}

    # Add threshold options if provided by the user.
    if threshold_degree is not None:
        method_options.update({'threshold_lower_degree': threshold_degree})

    if threshold_window is not None:
        method_options.update({'threshold_window_degree': threshold_window})


    m = _graphlab.recommender.create(dataset,
            user_column=user_column, item_column=item_column, target_column=target_column,
            user_data=user_data, item_data=item_data,
            verbose=verbose,
            method='item_similarity',
            **method_options)

    return m


class ItemSimilarityModel(RecommenderModel):
    """
    A model that ranks an item according to its similarity to other items
    observed for the user in question. The similarity can be computed based on
    the observed users for each item, e.g. the rating given to both items by
    users who have rated both items.

    An instance of this model can be created using
    :func:`create(..., method='item_similarity') <graphlab.recommender.create>`.
    Do NOT construct the model directly.

    **Model Definition**

    A model with ``method=item_similarity`` first computes the similarity
    between items using the observations of users who have rated those two
    items. Given a similarity between item :math:`i` and :math:`j`,
    :math:`S(i,j)`, we score an item :math:`j` for user :math:`u` using a
    weighted average of the user's previous observations :math:`I_u`.

    There are three choices of similarity metrics to use: ``jaccard``,
    ``cosine`` and ``pearson``.

    `Jaccard similarity
    <http://en.wikipedia.org/wiki/Jaccard_index>`_
    is used to measure the similarity between two set of elements.
    In the context of recommendation, the Jaccard similarity between two
    items is computed as

    .. math:: \mbox{JS}(i,j)
            = \\frac{|U_i \cap U_j|}{|U_i \cup U_j|}

    where :math:`U_{i}` is the set of users who rated item :math:`i`.
    Jaccard is a good choice when one only has implicit feedbacks of items
    (e.g., people rated them or not), or when one does not care about how
    many stars items received. A prediction for item :math:`j` is made via

    .. math:: y_{uj}
            = \\frac{\sum_{i \in I_u} \mbox{JS}(i,j)  }{|I_u|}

    If one needs to compare the ratings of items, Cosine and Pearson similarity
    are recommended.

    The Cosine similarity between two items is computed as

    .. math:: \mbox{CS}(i,j)
            = \\frac{\sum_{u\in U_{ij}} r_{ui}r_{uj}}
                {\sqrt{\sum_{u\in U_{i}} r_{ui}^2}
                 \sqrt{\sum_{u\in U_{j}} r_{uj}^2}}

    where :math:`U_{i}` is the set of users who rated item :math:`i`,
    and :math:`U_{ij}` is the set of users who rated both items :math:`i` and
    :math:`j`. A problem with Cosine similarity is that it does not consider
    the differences in the mean and variance of the ratings made to
    items :math:`i` and :math:`j`. Predictions are made via

    .. math:: y_{uj}
            = \\frac{\sum_{i \in I_u} \mbox{CS}(i,j) r_{ui} }{\sum_{i \in I_u} \mbox{CS}(i,j)}

    A popular measure that compares ratings where the effects of means and
    variance have been removed is Pearson Correlation similarity:

    .. math:: \mbox{PS}(i,j)
            = \\frac{\sum_{u\in U_{ij}} (r_{ui} - \\bar{r}_i)
                                        (r_{uj} - \\bar{r}_j)}
                {\sqrt{\sum_{u\in U_{ij}} (r_{ui} - \\bar{r}_i)^2}
                 \sqrt{\sum_{u\in U_{ij}} (r_{uj} - \\bar{r}_j)^2}}

    Predictions are made via

    .. math:: y_{uj}
            = \\frac{\sum_{i \in I_u} \mbox{PS}(i,j) r_{ui} }{\sum_{i \in I_u} |\mbox{PS}(i,j)|}

    For more details of item similarity methods, please see, e.g.,
    Chapter 4 of [1]_

    For item similarity models, one may choose to provide user-specified
    nearest neighbors graph using the keyword argument `nearest_items`. This is
    an SFrame containing, for each item, the nearest items and the similarity
    score between them. If provided, these item similarity scores are used for
    recommendations. The SFrame must contain the columns
    'similar_item' and 'score' as well as one with the same name used for
    the items in `observation_data`, e.g. 'item_id'. For example, suppose
    you first create an ItemSimilarityModel and use
    :class:`~graphlab.recommender.ItemSimilarityModel.get_similar_items`:

    >>> m = gl.recommender.create(data, 'user', 'item',
                                  method='item_similarity')
    >>> nn = m.get_similar_items()
    >>> m2 = gl.recommender.create(data, 'user', 'item',
                                   method='item_similarity',
                                   nearest_items=nn)

    With the above code, the item similarities computed for model `m` can be
    used to create a new recommender object, `m2`. Note that we could have
    created `nn` from some other means, but now use `m2` to make
    recommendations via `m2.recommend()`.

    .. [1] Ricci, Francesco, Lior Rokach, and Bracha Shapira.
            Introduction to recommender systems handbook. Springer US, 2011.

    """

    def __init__(self, model_proxy):
        '''__init__(self)'''
        self.__proxy__ = model_proxy


    def _get_wrapper(self):
        def model_wrapper(model_proxy):
            return ItemSimilarityModel(model_proxy)
        return model_wrapper


    def get_similar_items(self, items=None, k=10):
        """
        Get the k most similar items for each item in items

        Parameters
        ----------
        items: SArray or list; optional
            An :class:`~graphlab.SArray` (or list) of items for which to get similar items.
            If 'None', then get_similar_items(...) generates most similar items for all items in the training set.

        k: int, optional
            The number of similar items for each item.

        Returns
        ------
        out : SFrame

            A SFrame with the top ranked similar items for each item.
            The columns are ``item_column``, *similar_item*, *sore* and *rank*,
            where ``item_column`` matches the item column name specified at training time.
            The *rank* is between 1 and ``k`` and *score* gives the similarity score of that item.
            The value of the score depends on the method used for computing item similarities.

        """

        _mt._get_metric_tracker().track('toolkit.recsys.get_similar_items')

        if items is None:
            items = _SArray()

        assert type(k) == int

        if isinstance(items, list):
            items = _SArray(items)

        def check_type(arg, arg_name, required_type, allowed_types):
            if not isinstance(arg, required_type):
                raise TypeError("Parameter " + arg_name + " must be of type(s) "
                                + (", ".join(allowed_types) )
                                + "; Type '" + str(type(arg)) + "' not recognized.")


        check_type(items, "items", _SArray, ["SArray", "list"])

        opt = {'model': self.__proxy__,
                'items': items,
                'top_k': k}

        response = _graphlab.toolkits.main.run('recsys_get_similar_items', opt)

        neighbors = _SFrame(None, _proxy=response['data'])

        return neighbors


