"""
Methods for performing logistic regression.  See
graphlab.logistic_regression.create for additional documentation.
"""

import graphlab.connect as _mt
import graphlab as _graphlab
from graphlab.toolkits.recommender.recommender import RecommenderModel
from graphlab.data_structures.sframe import SFrame as _SFrame
from pandas import DataFrame as _DataFrame


def create(dataset, user, item, target, reg=1e-5,
           holdout_probability=0.0, sgd_step_size=0.1, max_iterations=1000,
           verbose=True, plot=False,
           random_seed=0,
           **kwargs):
    """
    Create a
    :class:`~graphlab.recommender.logistic_regression.LogisticRegressionModel`.
    This model is appropriate when the target of interest is binary. A trained
    model can be used to score (user, item) pairs and make recommendations.

    Logistic regression creates a generalized linear function that
    predicts a binary value given the user, item, and any additional
    features.  This model can use any number of additional features
    such as user or item attributes to help make an accurate
    prediction.

    The parallel optimization method used to build this model is
    inherently random, so different calls to `create()` may return
    slightly different models.

    In addition to the `user` and `item` features,
    LogisticRegressionModel also uses any additional columns in
    `dataset` to build the model and make predictions.  These features
    can include additional attributes of the user or the item in the
    observation.  Additionally, observation-specific information such
    as the time of day the user rated the item can also be included.
    Values for these side features must be present both when the model
    is created and when preditions are made.

    The exact function of these features in the regression model is
    determined by the type of the SFrame column holding the feature
    values.  If these features are strings or integer types, they are
    treated as categorical variables alongside users and items; if
    they are typed as floating point numbers, then they are treated as
    continuous values.

    Parameters
    ----------
    dataset : SFrame/DataFrame
        The dataset to use for training the model. (required)

    user : string
        The column name of the dataset that corresponds to user id. (required)

    item : string
        The column name of the dataset that corresponds to item id. (required)

    target : string
        The colnum name of the target column. The model will be
        trained to predict the values in this column. The values must
        be binary (0 or 1).  (required)

    reg : float, optional
        Regularization weight.

    holdout_probability : float, optional
        Proportion of the dataset held out of model training. The held-out
        subset is used to estimate the model's error rate when making
        predictions with new data; the prediction is available in the model's
        training_stats field.

    sgd_step_size : float, optional
        Step size for stochastic gradient descent. Smaller values may
        add accuracy but cause the model to converge more slowly.

    max_iterations : int, optional
        Maximum number of iterations through the data.

    verbose : bool, optional
        If True, print progress updates.

    plot : bool
        If true, display the progress plot.

    random_seed : integer, optional
        The random seed used to choose the training and validation
        split if holdout_probability is nonzero.  It is also used to
        choose a random starting point with which to initialize the model.

    Returns
    -------
    out : LogisticRegressionModel
        A trained model.

    Examples
    --------
    If given an :class:`~graphlab.SFrame` ``sf`` with columns ``user_id`` and ``item_id`` and
    a column of 1's and 0's that we would like to predict called ``target``, then we can
    create a :class:`~graphlab.logistic_regression.LogisticRegressionModel` as follows:

    >>> from graphlab import recommender
    >>> m = recommender.logistic_regression.create(sf, 'user_id', 'item_id', target='target')

    With this model object one can make recommendations for the unique users in ``sf``:

    >>> recs = m.recommend(sf)

    The model can be saved to disk as follows:

    >>> m.save(filename)

    For more, see the documentation for
    :class:`~graphlab.recommender.logistic_regression.LogisticRegressionModel`.

    References
    ----------
    - `Wikipedia - logistic regression (general)
      <http://en.wikipedia.org/wiki/Logistic_regression>`_
    """

    _mt._get_metric_tracker().track('toolkit.recsys.logistic_regression.create')

    if not isinstance(dataset, (_DataFrame, _SFrame)):
        raise TypeError('dataset input must be a pandas.DataFrame or SFrame')

    if type(dataset) != _SFrame:
        dataset = _SFrame(dataset)

    if plot is True:
        print "The plot functionality for item means is not yet implemented."
        plot = False

    opts = {'model_name': 'logistic_regression'}
    response = _graphlab.toolkits.main.run("recsys_init", opts)
    model_proxy = response['model']

    opts = {'user_column': user,
            'item_column': item,
            'target_column': target,
            'training_data': dataset,
            'linear_regularization' : reg,
            'random_seed' : random_seed,
            'max_iterations' : max_iterations,
            'model': model_proxy,
            'holdout_probability': holdout_probability}
    opts.update(kwargs)
    response = _graphlab.toolkits.main.run("recsys_train", opts, verbose)
    return LogisticRegressionModel(model_proxy)


class LogisticRegressionModel(RecommenderModel):
    """
    A logistic regression model that learns a coefficient for each user and
    a coefficient for each item, as well as a global offset term, and tries
    to predict binary (0 or 1) user-item ratings.  The "user_id" and "item_id"
    columns in the input data are treated as categorical variables.

    See Wikipedia for a description of logistic regression:
    http://en.wikipedia.org/wiki/Logistic_regression
    """

    def __init__(self, model_proxy):
        self.__proxy__ = model_proxy

    def _get_wrapper(self):
        def model_wrapper(model_proxy):
            return LogisticRegressionModel(model_proxy)
        return model_wrapper
