import graphlab.canvas.views.base
import graphlab.connect as _mt

import array
import math
import operator
import threading

# keep in sync with _INITIAL_SKETCH_ROWS in sframe.jsx
_INITIAL_SKETCH_ROWS = 10000

def _frange(start, stop, step):
    """ Helper: Range function that can take float start/stop/step """
    while start < stop:
        yield start
        start += step

def _compose(f, g):
    # returns f of g as a function
    def f_of_g(arg):
        return f(g(arg))
    return f_of_g

def _sanitize_number(n):
    # get rid of non-JSON-serializable numeric values
    if n == float('inf') or \
       n == float('-inf') or \
       math.isnan(n):
      return None
    return n

class SArrayView(graphlab.canvas.views.base.BaseView):

    def __init__(self, obj):
        super(SArrayView, self).__init__(obj)
        _mt._get_metric_tracker().track('canvas.sarray.row.size', value=len(obj))
        self.register_handler('get', 'sketch', self.__get_sketch)
        self.register_handler('get', 'subsketch', self.__get_subsketch)
        self.__sketch_lock = threading.RLock()
        self.__cached_sketch = None
        self.__cached_sketch_initial = None
        self.__cached_sketch_with_subsketches = None

    def get_metadata(self):
        return {
            'descriptives': {
                'rows': len(self.obj)
            },
            'dtype': self.obj.dtype().__name__,
            'max_subcolumn_length': self.__max_element_length()
        }

    def get_staticdata(self):
        return {
            'sketch': self.get_sketch(background=False)
        }

    def __get_sketch(self, url, handler):
        """
        Gives a sketch summary for this column.
        """
        handler.write(self.get_sketch())

    def __max_element_length(self):
        """
        If this view wraps an SArray with dtype array.array,
        this will return the maximum length
        """
        with self.__sketch_lock:
            length = 0
            if self.__cached_sketch is not None:
                if self.obj.dtype() == array.array:
                    # if we have a cached sketch, and this dtype is array.array,
                    # get the max value from that
                    length = int(self.__cached_sketch.element_length_summary().max())
            return length

    def __get_subsketch(self, url, handler):
        """
        Gives a sketch summary for the sub-column (if this column is a dict type)
        """
        subcol = url.split('/')[1]
        with self.__sketch_lock:
          sk = self.__cached_sketch
          dtype = self.obj.dtype()
          if dtype == array.array:
              subcol = int(subcol)
          if not(sk.sketch_ready()):
              return {
                  'progress': SArrayView.__sketch_progress(sk),
                  'complete': False
              }
          if self.__cached_sketch_with_subsketches is None:
              # figure out which subsketch keys to use
              sub_sketch_keys = None
              if dtype == dict:
                  sorted_frequent_items = sorted(sk.dict_key_summary().frequent_items().iteritems(), key=operator.itemgetter(1), reverse=True)
                  sorted_frequent_keys = map(operator.itemgetter(0), sorted_frequent_items)
                  sub_sketch_keys = sorted_frequent_keys[:50] # keep this limit in sync with maxValues in DictView
              elif dtype == array.array:
                  sub_sketch_keys = range(self.__max_element_length())

              # create the sketch with subsketches
              self.__cached_sketch_with_subsketches = self.obj.sketch_summary(
                  background=True,
                  sub_sketch_keys=sub_sketch_keys
                  )

          # get element sub sketch and write response
          subsketch = self.__cached_sketch_with_subsketches.element_sub_sketch(subcol)
          ret = SArrayView.__format_sketch(subsketch)
          handler.write(ret)

    @staticmethod
    def __sketch_progress(sk):
        if sk is None:
            return 0.
        return float(sk.num_elements_processed()) / float(sk.size()) if sk.size() > 0 else 1.

    @staticmethod
    def __format_frequent_items(items):
        # format values using _encode_value, do not truncate key
        ret = {}
        for k,v in items.iteritems():
            if type(k) == str:
                k = unicode(k, errors='replace')
            ret[k] = graphlab.canvas.views.base._encode_value(v)
        return ret

    @staticmethod
    def __format_sketch(sk, initial=False):
        numeric = False
        try:
            sk.min()
            numeric = True
        except:
            # TODO is there a better way to detect whether the sketch is over
            # numeric data than to use exceptions for control flow?
            pass
        data = {
            'numeric': numeric,
            'size': sk.size(),
            'num_undefined': sk.num_undefined(),
            'num_unique': sk.num_unique(),
            'frequent_items': SArrayView.__format_frequent_items(sk.frequent_items()),
            'progress': SArrayView.__sketch_progress(sk),
            'complete': sk.sketch_ready(),
            'initial': initial
        }
        if numeric:
          data.update({
            'min': _sanitize_number(sk.min()),
            'max': _sanitize_number(sk.max()),
            'mean': _sanitize_number(sk.mean()),
            'var': _sanitize_number(sk.var()),
            'std': _sanitize_number(sk.std()),
            'quantile': map(_compose(_sanitize_number, sk.quantile), _frange(0, 1.01, 0.01)) if sk.size() > 0 else []
          })
        return data

    def get_sketch(self, background=True, initial=False):
        """
        Returns a dictionary representation of a sketch summary. For vector
        types, this will collapse the summary down to an element-wise or
        value-wise (aggregate) summary.

        Parameters
        ----------
        background : bool
            Run the sketch on a background thread (if not already started) and return results immediately.

        initial: bool
            Perform the sketch on only the first 10,000 rows of the dataset to give results quickly.
        """
        with self.__sketch_lock:
          sa = self.obj

          if len(self.obj) <= _INITIAL_SKETCH_ROWS:
              # if the initial sketch is complete (covers 100% of the rows of this SArray), call it good
              # and do not bother with a full sketch (the browser will interpret initial=False as done)
              initial = False

          if initial:
              if self.__cached_sketch_initial is None:
                  self.__cached_sketch_initial = sa[:_INITIAL_SKETCH_ROWS].sketch_summary(background=background)
              sk = self.__cached_sketch_initial
          else:
              if self.__cached_sketch is None:
                  self.__cached_sketch = sa.sketch_summary(background=background)
              sk = self.__cached_sketch

          if issubclass(sa.dtype(), dict):
              key_sketch = SArrayView.__format_sketch(sk.dict_key_summary(), initial=initial)
              value_sketch = SArrayView.__format_sketch(sk.dict_value_summary(), initial=initial)
              return {
                  'keys': key_sketch,
                  'values': value_sketch,
                  'progress': SArrayView.__sketch_progress(sk),
                  'complete': sk.sketch_ready(),
                  'initial': initial,
                  'num_undefined': sk.num_undefined()
              }

          if issubclass(sa.dtype(), (list, array.array)):
              return SArrayView.__format_sketch(sk.element_summary(), initial=initial)

          return SArrayView.__format_sketch(sk, initial=initial)

    def get_js_file(self):
        return 'sarray'
