"""
Query (etc.) Periscope MongoDBs
"""
import bson
import pymongo
import pprint
import csv
import sys

# Special constants
SAMPLE_RANGE_KEY = "_samples"
SAMPLE_KEY = "samples"

# Utility functions

def meta_parts(m):
    return {'params':m.get('params',{}),
            'subject':m.get('subject',{})}

def bin_to_str(d):
    """Recursively force all BinData items to be a string.
    """
    if isinstance(d, dict):
        for key, value in d.iteritems():
            d[key] = bin_to_str(value)
    elif isinstance(d, list):
        d = [bin_to_str(v) for v in d]
    elif isinstance(d, bson.binary.Binary):
        #print("{0} value is binary".format(d))
        d = str(d)
    return d

def stringize(items):
    r = [ ]
    for i in items:
        if isinstance(i,int):
            r.append("{0:d}".format(i))
        elif isinstance(i,float):
            r.append("{0:f}".format(i))
        elif i == "":
            r.append('"NA"')
        else:
            r.append(str(i))#'"' + str(i) + '"')
    return r

# --- #

class PeriscopeCollection:
    """MongoDB collection with Periscope data.
    """
    META_FIELD = 'meta'
    DATA_FIELD = 'data'
    ID_FIELD = '_id'
    PID_FIELD = '_pid'
    META_ID_FIELD = '_mid'
    TYPE_FIELD = 'event_type'
    SUBJ_FIELD = 'subject'
    PARAM_FIELD = 'params'
    DATA_VALUES_FIELD = 'values'
    TS_FIELD = 'ts'
    TS_VALUE_FIELD = '_ts'

    def __init__(self, collection):
        self._coll = collection
        self._meta, self._hdr = None, None
        self._reqmeta = set()

    def set_req_meta(self, items):
        """A list of required metadata items
        All entries missing these items will be skipped.
        """
        self._reqmeta = set(items)

    def init_meta(self):
        """Retrieve and store metadata id's internally.
        """
        d = { } # result dict
        # Get all non-empty metadata sections
        spec, fields = {'meta.subject':{'$exists':True}}, {'meta':1}
        for obj in self._coll.find(spec, fields):
            # create dictionary mapping { meta id : [ parent-id ] }
            for m in obj['meta']:
                #m = bin_to_str(m)
                if m.has_key(self.PID_FIELD):
                    pid = [m[self.PID_FIELD]]
                else:
                    pid = [ ]
                d[m[self.ID_FIELD]] = pid
        # Follow chain of parents for each leaf, add pid's for
        # all parents
        for key, pids in d.iteritems():
            pids0 = pids # save initial value
            # Walk up chain of parents
            while pids:
                parent = pids[-1] # select last one = furthest up, so far
                pids = d[parent]
                pids0.extend(pids) # add all parents
        self._meta = d

    def init_table_hdr(self):
        """Retrieve and store hdr info internally.
        """
        hdr, hdr_seen = [ ], set()
        spec, fields = {'meta.subject':{'$exists':True}}, {'meta':1}
        for obj in self._coll.find(spec, fields):
            for m in obj[self.META_FIELD]:
                #print("@@ add to hdr: {0}".format(m))
                event_type = m[self.TYPE_FIELD]
                for part in 'params', 'subject':
                    for k in m[part].keys():
                        if k != 'ts' and k not in hdr_seen:
                            hdr.append((event_type,part,k))
                        hdr_seen.add(k)
        self._hdr = [(None, 'data', 'id'), (None, 'data', 'event'), (None, 'data', 'mid')] + \
                    list(hdr) + \
                    [(None, 'data','ts'), (None, 'data','name'), (None, 'data','value')]

    def get_table_hdr(self, ns=0):
        if self._hdr is None:
            self.init_table_hdr()
        hdr_cols = [ ]
        for (e, p, k) in self._hdr:
            if ns >= 2:
                s = "{0}.{1}.{2}".format(e, p, k)
            elif ns == 1:
                s = "{0}.{1}".format(p, k)
            else:
                s = k
            hdr_cols.append(s)
        return hdr_cols

    def get_table_body(self):
        """Get iterator over rows of the table body.
        """
        rows = [ ]
        if self._meta is None:
            self.init_meta()
        if self._hdr is None:
            self.init_table_hdr()
        # Loop over all metadata items
        for meta_id, pids in self._meta.iteritems():
            ts = None
            #print("@@mid={0}".format(meta_id))
            # Pull metadata values,
            # including the values from the parents.
            subj, param = { }, { }
            all_ids = [meta_id] + pids
            #all_bin_ids = [bson.binary.Binary(x) for x in all_ids]
            spec =  {'meta._id':{'$in':all_ids}}
            #print("@@ spec={0}".format(spec))
            for obj in self._coll.find(spec):
                for meta in obj[self.META_FIELD]:
                    if meta[self.ID_FIELD] in all_ids:
                        subj.update(meta[self.SUBJ_FIELD])
                        param.update(meta[self.PARAM_FIELD])
            if self.TS_FIELD in subj:
                ts = subj[self.TS_FIELD]
                del subj[self.TS_FIELD]
            if self.TS_FIELD in param:
                ts = param[self.TS_FIELD]
                del param[self.TS_FIELD]
            #print("@@ metadata subj={s}, param={p}".format(s=subj,p=param))
            # Join with data items
            spec = {'data.' + self.META_ID_FIELD : meta_id}
            #print("@@ search for: {0}".format(spec))
            row_meta, row_meta_keys = [ ], set()
            for (et, part, key) in self._hdr:
                if et is not None:
                    if part == self.SUBJ_FIELD:
                        v = subj.get(key, '')
                        row_meta.append(v)
                        if v:
                            row_meta_keys.add(key)
                    elif part == self.PARAM_FIELD:
                        v = param.get(key, '')
                        row_meta.append(v)
                        if v:
                            row_meta_keys.add(key)
            # make sure all required metadata exists
            if not self._reqmeta.issubset(row_meta_keys):
                continue # missing required items
            # loop over data
            n, m = 0, 0
            for obj in self._coll.find(spec):
                n += 1
                # Build metadata part of row, once
                row = row_meta + [ meta_id ]
                # add each row of data
                for datum in obj[self.DATA_FIELD]:
                    data_id = datum[self.ID_FIELD]
                    etype = datum[self.TYPE_FIELD]
                    values_dict = datum[self.DATA_VALUES_FIELD]
                    ts_values = values_dict.get(self.TS_VALUE_FIELD, None)
                    for name, values in values_dict.iteritems():
                        if name == self.TS_VALUE_FIELD:
                            continue # skip '_ts', etc.
                        for i, value in enumerate(values):
                            if ts_values:
                                ts = ts_values[i]
                            assert ts,"No timestamp"
                            rows.append([data_id, etype] + row + [ts, name, value])
                            m += 1
            #print("@@ got {0} : {1}".format(n, m))
        return rows

def __test(db, coll):
    mongo = pymongo.Connection()
    ps = PeriscopeCollection(mongo[db][coll])

def usage():
    import os
    prog = os.path.basename(sys.argv[0])
    sys.stderr.write("usage: {prog} DATABASE COLLECTION\n".format(prog=prog))
    sys.exit(1)
    
if __name__ == '__main__':
    if len(sys.argv) != 3:
        usage()
    __test(*sys.argv[1:])
    sys.exit(0)
