"""
Load input in to a CouchDB database

See http://couchdb.apache.org for CouchDB
See http://couchdbkit.org for the Python API details
"""
import sys
import time
#
import couchdbkit
#
from netlogger.analysis.modules._base import Analyzer as BaseAnalyzer
from netlogger.analysis.modules._base import ConnectionException
from netlogger import util

class Analyzer(BaseAnalyzer):
    """Load records into a CouchDB database. !!EXPERIMENTAL!!

    Parameters: 
      - host {string,localhost*}: Server host
      - port {integer,5984*}: Server port
      - database {string,application*}: Name of database to create/use
      - batch_size {integer,1*}: Number of records to 'batch up' for
        one insert. A value of 0 or 1 means insert each record as it comes.
        For high-throughput, a large number like 10000 is appropriate.
      - batch_sec {float,0*}: If batch_size is greater than 1, this is
        the maximum amount of time between (batch) inserts when the data
        is continuously flowing. Note that interruptions in the input may
        cause the last partial batch of items to wait indefinitely.
        A value less than or equal to zero means to ignore this parameter.
    """
    def __init__(self, host="localhost", port=5984,
                 database='application', 
                 batch_size=1, batch_sec=0,
                 perf='no', **kw):
        BaseAnalyzer.__init__(self, **kw)
        # connect
        url = "http://%s:%d" % (host, port)
        self._server = couchdbkit.Server(url)
        try:
            self._server.info()
        except couchdbkit.RequestFailed:
            raise ConnectionException("Connection failed to CouchDB server "
                                      "running on %s:%d" % (host, port)) 
        # create/use database
        self._db = self._server.get_or_create_db(database)
        # set up batching
        try:
            batch_size = int(batch_size)
            if batch_size < 1:
                raise ValueError()
        except ValueError:
            raise ValueError("Illegal batch_size")
        try:
            batch_sec = float(batch_sec)
        except ValueError:
            raise ValueError("Illegal batch_sec")
        if batch_size > 1:
            self._batch = Batch(batch_size, batch_sec)
        else:
            self._batch = None
        # undocumented performance option
        self._perf = util.as_bool(perf)
        if self._perf:
            self._insert_time, self._insert_num = 0, 0

    def fix_key_formats(self, data):
        """Make sure key names are not illegal
        * cannot have a '.' anywhere will replace with '_'
        * cannot have $ as first symbol will remove
        """
        fixed_data = { }
        for key, value in data.items():
            if '.' in key:
                key = key.replace('.', '_')
            if key[0] == '$':
                key = key.lstrip('$')
            fixed_data[key] = value
        return fixed_data

    def process(self, data):
        """Insert 'data' into database
        """
        if self._dbg:
            self.log.debug("process_data.start")
        # fix keys
        data = self.fix_key_formats(data)
        # check on batched data first
        if self._batch and self._batch.ready():
            if self._perf:
                t = time.time()
                items = self._batch.get_all()
                self._db.bulk_save(items)
                self._insert_time += (time.time() - t)
                self._insert_num += len(items)
            else:
                self._db.bulk_save(self._batch.get_all())
        # try to set status to int
        if data.has_key('status'):
            try:
                data['status'] = int(data['status'])
            except ValueError:
                self.log.warn("bad_status", value=data['status'],
                              msg="not integer")
        # add '_id' to avoid round-trip to server
        data['_id'] = util.uuid1()
        # insert data
        self._insert(data)
        # done
        if self._dbg:
            self.log.debug("process_data.end")

    def _insert(self, data):
        """Insert data by either adding to batch or really
        inserting it into the datbase.
        """
        if self._batch:            
            self._batch.add(data)
        else:
            if self._perf:
                t = time.time()
                doc = self._db.save_doc(data)
                self._insert_time += (time.time() - t)
                self._insert_num += 1
            else:
                doc = self._db.save_doc(data)

    def finish(self):
        """Print perf on cleanup. Only shown if
        verbosity is at 'INFO' (one -v) or above.
        """
        if self._batch:
            if self._perf:
                t = time.time()
                items = self._batch.get_all()
                self._db.bulk_save(items)
                self._insert_time += (time.time() - t)
                self._insert_num += len(items)
            else:
                self._db.bulk_save(self._batch.get_all())
        if self._perf:
            self.log.info("performance", insert_time=self._insert_time,
                          insert_num=self._insert_num, 
                          mean_time=self._insert_time / self._insert_num)

class Batch:
    """Batch of items with a timeout.
    """
    def __init__(self, size, timeout):
        """Ctor.
        'size' is the max. number of items in the batch
        'timeout' is a timeout in seconds, after which the batch
                  is considered "ready" even if it is not full
        """
        self._items = [ ]
        self._size = size
        self._timeout = max(timeout,0)
        self._start_time = None

    def add(self, item):
        """Add an item to the batch.
        """
        # set age of batch on first insert
        if not self._items:
            self._start_time = time.time()
        # add item
        self._items.append(item)

    def ready(self):
        """Return True if the batch is ready to be emptied,
        according to its size and the current time.
        """
        if not self._items:
            res = False
        elif len(self._items) >= self._size:
            res = True
        elif (self._timeout > 0 and 
              time.time() - self._start_time >= self._timeout):
            res = True
        else:
            res = False
        return res

    def get_all(self):
        """Return and clear the batch.
        """
        copy = self._items
        self._items, self._start_time = [ ], None
        return copy
