#!/usr/bin/env python

import optparse, sys, os, pprint
import MetagenomeDB as mdb

p = optparse.OptionParser(description = """Part of the MetagenomeDB toolkit.
Annotate sequences with quality scores from a .QUAL file.""")

p.add_option("-i", "--input", dest = "input_fn", metavar = "FILENAME",
	help = "Path to a .QUAL file (mandatory).")

p.add_option("-C", "--collection", dest = "collection_name", metavar = "STRING",
	help = "Name of the collection to annotate (mandatory).")

g = optparse.OptionGroup(p, "Connection")

connection_parameters = {}
def declare_connection_parameter (option, opt, value, parser):
	connection_parameters[opt[2:]] = value

g.add_option("--host", dest = "connection_host", metavar = "HOSTNAME",
	type = "string", action = "callback", callback = declare_connection_parameter,
	help = """Host name or IP address of the MongoDB server (optional). Default:
'host' property in ~/.MetagenomeDB, or 'localhost' if not found.""")

g.add_option("--port", dest = "connection_port", metavar = "INTEGER",
	type = "string", action = "callback", callback = declare_connection_parameter,
	help = """Port of the MongoDB server (optional). Default: 'port' property
in ~/.MetagenomeDB, or 27017 if not found.""")

g.add_option("--db", dest = "connection_db", metavar = "STRING",
	type = "string", action = "callback", callback = declare_connection_parameter,
	help = """Name of the database in the MongoDB server (optional). Default:
'db' property in ~/.MetagenomeDB, or 'MetagenomeDB' if not found.""")

g.add_option("--user", dest = "connection_user", metavar = "STRING",
	type = "string", action = "callback", callback = declare_connection_parameter,
	help = """User for the MongoDB server connection (optional). Default:
'user' property in ~/.MetagenomeDB, or none if not found.""")

g.add_option("--password", dest = "connection_password", metavar = "STRING",
	type = "string", action = "callback", callback = declare_connection_parameter,
	help = """Password for the MongoDB server connection (optional). Default:
'password' property in ~/.MetagenomeDB, or none if not found.""")

p.add_option_group(g)

g = optparse.OptionGroup(p, "Other options")

g.add_option("-v", "--verbose", dest = "verbose", action = "store_true", default = False,
	help = "Increase the verbosity of MetagenomeDB.")

g.add_option("--no-progress-bar", dest = "display_progress_bar", action = "store_false", default = True,
	help = "Hide the progress bar.")

g.add_option("--dry-run", dest = "dry_run", action = "store_true", default = False,
	help = "Display current operation on screen rather than performing it.")

g.add_option("--version", dest = "display_version", action = "store_true", default = False,
	help = "Display the version of the MetagenomeDB toolkit.")

p.add_option_group(g)

(p, a) = p.parse_args()

def error (msg):
	msg = str(msg)
	if msg.endswith('.'):
		msg = msg[:-1]
	print >>sys.stderr, "ERROR: %s." % msg
	sys.exit(1)

if (p.display_version):
	print mdb.version
	sys.exit(0)

if (p.input_fn == None):
	error("An input .QUAL file must be provided")

if (not os.path.exists(p.input_fn)):
	error("File '%s' does not exist" % p.input_fn)

if (not p.collection_name):
	error("A collection name must be provided")

#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::

if (p.verbose):
	mdb.max_verbosity()

try:
	mdb.connect(**connection_parameters)
except Exception as msg:
	error(msg)

collection = mdb.Collection.find_one({"name": p.collection_name})
if (collection == None):
	error("Unknown collection '%s'" % p.collection_name)

try:
	from Bio import SeqIO
except:
	error("The BioPython library is not installed.\nTry 'easy_install biopython'")

def read():
	try:
		parser = SeqIO.parse(open(p.input_fn, 'rU'), "qual")
	except ValueError as msg:
		error(msg)

	return parser

print "Importing '%s' ..." % p.input_fn

print "  validating sequences ..."

n = 0
for record in read():
	sequence_name = record.id

	candidates = list(collection.list_sequences({"name": sequence_name}))

	if (len(candidates) == 0):
		error("Unknown query sequence '%s'" % sequence_name)

	if (len(candidates) > 1):
		error("Duplicate query sequence '%s'" % sequence_name)

	n += 1

if (n == 0):
	error("No information in the input")

print "  importing annotations ..."

class ProgressBar:
	def __init__ (self, upper = None):
		self.__min = 0.0
		self.__max = upper + 0.0

	def display (self, value):
		f = (value - self.__min) / (self.__max - self.__min) # fraction
		p = 100 * f # percentage
		s = int(round(80 * f)) # bar size

		sys.stdout.write(' ' * 2 + ('.' * s) + " %4.2f%%\r" % p)
		sys.stdout.flush()

	def clear (self):
		sys.stdout.write(' ' * (2 + 80 + 8) + "\r")
		sys.stdout.flush()

pb = ProgressBar(n)
n = 0

for record in read():
	try:
		sequence_o = list(collection.list_sequences({"name": record.id}))[0]

		sequence_o["quality"] = {
			"values": record.letter_annotations["phred_quality"],
			"scale": "PHRED"
		}

		assert (len(sequence_o["quality"]["values"]) == sequence_o["length"]) ###

		if (p.dry_run):
			print pprint.pformat(sequence_o.get_properties())
			continue

		sequence_o.commit()

	except (mdb.errors.DBConnectionError, mdb.errors.DBOperationError) as msg:
		error(msg)

	n += 1
	if (p.display_progress_bar):
		pb.display(n)

if (p.display_progress_bar):
	pb.clear()

print "    %s sequence%s annotated." % (n, {True: 's', False: ''}[n > 1])

if (p.dry_run):
	print "(dry run)"
