"""
This package contains methods that can be useful for evaluating the performance
of machine learning models.
"""
import graphlab.connect as _mt
import graphlab as _graphlab
from graphlab.data_structures.sframe import SFrame as _SFrame
from graphlab.data_structures.sarray import SArray as _SArray

def max_error(targets, predictions):
    r"""
    Compute the maximum absolute deviation between two vectors, defined as:

    .. math::

        max_error = \max_{i \in 1,\ldots,N} \|\widehat{y}_i - y_i\|

    Parameters
    ----------
    targets: SArray of floats
        An Sarray of observed values.

    predictions: SArray of floats
        The prediction that corresponds to each target value.
        This vector must have the same length as ``targets``.

    Returns
    -------
    out : float
        The maximum absolute deviation error between the two vectors.
    """
    _mt._get_metric_tracker().track('evaluation.max_error')

    assert type(targets) == _SArray, 'Input targets much be an SArray'
    assert type(predictions) == _SArray, 'Input predictions much be an SArray'
    assert targets.size() == predictions.size(), 'Input targets and predictions must have the same size'

    val = (targets - predictions).apply(lambda x: abs(x)).max()
    return val

def rmse(targets, predictions):
    r"""
    Compute root mean squared error between two vectors, defined as:

    .. math::

        RMSE = \sqrt{\frac{1}{N} \sum_{i=1}^N (\widehat{y}_i - y_i)^2}

    Parameters
    ----------
    targets: SArray of floats
        An Sarray of observed values.

    predictions: SArray of floats
        The prediction that corresponds to each target value.
        This vector must have the same length as ``targets``.

    Returns
    -------
    out : float
        The RMSE between the two vectors.

    References
    ----------
    - `Wikipedia - root-mean-square deviation
      <http://en.wikipedia.org/wiki/Root-mean-square_deviation>`_
    """
    _mt._get_metric_tracker().track('evaluation.rmse')

    assert type(targets) == _SArray, 'Input targets much be an SArray'
    assert type(predictions) == _SArray, 'Input predictions much be an SArray'
    assert targets.size() == predictions.size(), 'Input targets and predictions must have the same size'

    opts = {'targets': targets,
            'predictions': predictions}
    response = _graphlab.toolkits.main.run("evaluation_rmse", opts)
    return response["rmse"]

def confusion_matrix(targets, predictions, threshold=0.5):
    r"""
    Compute the confusion matrix for classification predictions, i.e.
    the number of

    - true positives: target is 1 and prediction above threshold
    - false positives: target is 1 and prediction below threshold
    - true negatives: target is 0 and prediction below threshold
    - false negatives: target is 0 and prediction above threshold

    Parameters
    ----------
    targets: SArray
        Must be of type int and contain only 0s and 1s.

    predictions: SArray of floats
        The prediction that corresponds to each target value.
        This vector must have the same length as ``targets``.

    threshold: float, optional (default .5)
        Predictions greater than cutoff will be considered predictions of 1.
        Predictions less than or equal to cutoff will be considered
        predictions of 0.

    Returns
    -------
    out: dict
        A dictionary containing each element of the confusion matrix.
    """
    _mt._get_metric_tracker().track('evaluation.confusion_matrix')

    assert type(targets) == _SArray, 'Input targets much be an SArray'
    assert type(predictions) == _SArray, 'Input predictions much be an SArray'
    assert targets.size() == predictions.size(), 'Input targets and predictions must have the same size'

    opts = {'targets': targets,
            'predictions': predictions,
            'threshold': threshold}
    response = _graphlab.toolkits.main.run("evaluation_confusion_matrix", opts)
    return response

def accuracy(targets, predictions, threshold=0.5):
    """
    Compute the proportion of correct predictions.

    Parameters
    ----------
    targets: SArray
        Must be of type int and contain only 0s and 1s.

    predictions: SArray of floats
        Predictions to compare against target.
        This vector must have the same length as ``targets``.

    threshold: float, optional (default .5)
        Predictions greater than cutff will be considered predictions of 1.
        Predictions less than or equal to cutoff will be considered
        predictions of 0.

    Returns
    -------
    out: float
        The proportion of times that a target is 1 and the corresponding prediction is above the cutoff.
    """
    _mt._get_metric_tracker().track('evaluation.accuracy')

    cm = confusion_matrix(targets, predictions, threshold)
    return float(cm['true_positive'] + cm['true_negative']) / targets.size()


