import graphlab.canvas
import graphlab.canvas.views as views
import graphlab.canvas.views.base
import graphlab.connect as _mt
import graphlab.data_structures.gframe

import re
import textwrap
import threading

class SFrameView(graphlab.canvas.views.base.BaseView):

    def __init__(self, obj, params):
        super(SFrameView, self).__init__(obj, params)
        _mt._get_metric_tracker().track('canvas.sframe.row.size', value=len(obj))
        _mt._get_metric_tracker().track('canvas.sframe.column.size', value=len(obj.column_names()))
        self.register_handler('get', 'head', self.__get_head)
        self.register_handler('get', 'rows', self.__get_rows)
        self.register_handler('get', 'sketch/.*', self.__get_sketch)
        self.register_handler('get', 'initialsketch', self.__get_initial_sketches)
        self.__child_views = {}
        self.__child_identifiers = None
        self.__initial_sketch_lock = threading.Lock()
        self.__initial_sketch_thread = None
        self.__initial_sketches = None

    def __expand_columns(self):
        # given an SFrame, expands columns into a structure like:
        # {'str': ['col1', 'col3'], 'int': ['col2']}
        columns = []
        for name in self.obj.column_names():
            dtype = self.obj[name].dtype().__name__
            columns.append({
                'name': name,
                'dtype': dtype
            })
        return columns

    def __get_row_values(self, start, end):
        # build a table (2d-array) of values,
        # like orient='values' in Pandas to_json
        # except we will substitute placeholder string values for
        # vector types (array, list, dict)
        return [[graphlab.canvas.views.base._encode_value(row[col]) \
                for col in self.obj.column_names()] \
                for row in self.obj[start:end]]

    def __get_head_values(self):
        return self.__get_row_values(0, 10)

    def __get_rows(self, url, handler):
        m = re.match('rows/(.*)/(.*)', url)
        start = int(m.group(1))
        end = int(m.group(2))
        handler.write({
            'values': self.__get_row_values(start, end)
        })

    def __get_head(self, url, handler):
        handler.write({
            'values': self.__get_head_values()
        })

    def __get_sketch(self, url, handler):
        m = re.match('sketch/(.*)', url)
        col = m.group(1).encode('utf-8')
        childView = self.child_views()[col]
        handler.write(childView.get_sketch())

    def __run_initial_sketches(self):
        for name in self.obj.column_names():
            with self.__initial_sketch_lock:
                self.__initial_sketches[name] = self.child_views()[name].get_sketch(initial=True, background=False)

    def __get_initial_sketches(self, url, handler):
        # kick off the initial sketches in a bg thread
        # if it has not been done yet, or if child identifiers have changed
        if self.__initial_sketches is None:
            self.__initial_sketches = {}
            self.__initial_sketch_thread = threading.Thread(target=self.__run_initial_sketches)
            self.__initial_sketch_thread.start()

        # query the initial sketches to construct response
        with self.__initial_sketch_lock:
            sketches = self.__initial_sketches
            progress = float(len(self.__initial_sketches)) / float(len(self.obj.column_names()))
            complete = len(self.__initial_sketches) == len(self.obj.column_names())
            handler.write({
              'sketches': sketches,
              'progress': progress,
              'complete': complete
            })

    def __get_child_identifiers(self):
        self.obj.__materialize__() # materialize this sframe so __get_content_identifier__ returns valid results
        column_names = set(self.obj.column_names())
        if type(self.obj) == graphlab.data_structures.gframe.GFrame:
            # ignore the special GFrame column names
            column_names -= set(['__id', '__src_id', '__dst_id'])
        return set([(name, self.obj[name].__get_content_identifier__()) for name in column_names])

    def __update_child_views(self):
        changed = False
        identifiers = self.__get_child_identifiers()
        if self.__child_identifiers != identifiers:
            # if the set of column names changes, regenerate all views
            self.__child_identifiers = identifiers
            self.__child_views = {k: views.get_view(self.obj[k]) for k in self.obj.column_names()}
            changed = True
        if changed:
            # invalidate existing cached data
            self.__initial_sketches = None
            # update in canvas State
            graphlab.canvas.get_target().state.update_child_views(self)

    def child_views(self):
        # lazily initialize SArrayView objects for children and store them here
        # so that we can share cached sketches with SArrayView
        self.__update_child_views()
        return self.__child_views

    def get_metadata(self):
        self.__update_child_views()
        return {
            'descriptives': {
                'rows': len(self.obj),
                'columns': len(self.obj.column_names())
            },
            'columns': self.__expand_columns(),
            'column_identifiers': list(self.__child_identifiers)
        }

    def get_staticdata(self):
        # TODO when the JS is refactored to allow per-view data (as opposed to global sketch/head)
        # then this should include the metadata from get_metadata as well
        self.__update_child_views()
        columns = self.obj.column_names()
        data = {
            'sketch': dict(map(lambda x: (x, self.child_views()[x].get_sketch(background=False)), columns)),
            'columns': self.__expand_columns(),
            'rows': self.__get_head_values()
        }
        return data

    def get_js_file(self):
        return 'sframe'
