#!/usr/bin/env python

import optparse, sys, os, pprint

p = optparse.OptionParser(description = """Part of the MetagenomeDB toolkit.
Export nucleotide or aminoacid sequences from the database. Those sequences
can be in any format supported by Biopython (see http://biopython.org/wiki/SeqIO).""")

p.add_option("-C", "--collection-name", dest = "collection_name", metavar = "STRING",
	help = "Name of the collection to retrieve the sequences from (mandatory).")

p.add_option("-r", "--recursive", dest = "recursive", action = "store_true", default = False,
	help = """By default only the sequences belonging to the collection provided
are exported. If set, this option will force all sequences belonging to sub-collections
to be exported as well.""")

p.add_option("-o", "--output", dest = "output_fn", metavar = "FILENAME",
	help = "Destination for the sequences (optional). Default: standard output.")

p.add_option("-f", "--format", dest = "output_format", metavar = "STRING", default = "fasta",
	help = "Format of the sequences (optional). Default: %default")

p.add_option("-v", "--verbose", dest = "verbose", action = "store_true", default = False)
p.add_option("--dry-run", dest = "dry_run", action = "store_true", default = False)

g = optparse.OptionGroup(p, "Connection")

g.add_option("--host", dest = "connection_host", metavar = "HOSTNAME", default = "localhost",
	help = "Host name or IP address of the MongoDB server (optional). Default: %default")

g.add_option("--port", dest = "connection_port", metavar = "INTEGER", default = 27017,
	help = "Port of the MongoDB server (optional). Default: %default")

g.add_option("--db", dest = "connection_db", metavar = "STRING", default = "MetagenomeDB",
	help = "Name of the database in the MongoDB server (optional). Default: '%default'")

g.add_option("--user", dest = "connection_user", metavar = "STRING", default = '',
	help = "User for the MongoDB server connection (optional). Default: '%default'")

g.add_option("--password", dest = "connection_password", metavar = "STRING", default = '',
	help = "Password for the MongoDB server connection (optional). Default: '%default'")

p.add_option_group(g)

(p, a) = p.parse_args()

def error (msg):
	if str(msg).endswith('.'):
		msg = str(msg)[:-1]
	print >>sys.stderr, "ERROR: %s." % msg
	sys.exit(1)

if (not p.collection_name):
	error("A collection name must be provided")

#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::

import MetagenomeDB as mdb

if (p.verbose):
	mdb.max_verbosity()

if (p.connection_host or p.connection_port or p.connection_db or p.connection_user or p.connection_password):
	try:
		mdb.connect(p.connection_host, p.connection_port, p.connection_db, p.connection_user, p.connection_password)
	except Exception as msg:
		error(msg)

try:
	collection = mdb.Collection.find_one({"name": p.collection_name})

except Exception as msg:
	error(msg)

if (collection == None):
	error("Unknown collection '%s'" % p.collection_name)

if (p.output_fn == None):
	output_fh = sys.stdout
else:
	output_fh = open(p.output_fn, 'w')

collections = [collection]

if (p.recursive):
	def walk (c):
		for c_ in c.list_sub_collections():
			collections.append(c_)
			walk(c_)

	walk(collection)

#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::

try:
	from Bio import SeqIO
	from Bio.Seq import Seq
	from Bio.SeqRecord import SeqRecord
except:
	error("The BioPython library is not installed.\nTry 'easy_install biopython'")

print "Exporting '%s' (%s) ..." % (p.output_fn, p.output_format)

class ProgressBar:
	def __init__ (self, upper = None):
		self.__min = 0.0
		self.__max = upper + 0.0

	def display (self, value):
		f = (value - self.__min) / (self.__max - self.__min) # fraction
		p = 100 * f # percentage
		s = int(round(80 * f)) # bar size

		sys.stdout.write(' ' * 2 + ('.' * s) + " %4.2f%%\r" % p)
		sys.stdout.flush()

	def clear (self):
		sys.stdout.write(' ' * (2 + 80 + 8) + "\r")
		sys.stdout.flush()

N = 0

try:
	for collection in collections:
		n_sequences = collection.count_sequences()

		print "  from %scollection '%s' (%s sequences)" % ({True: '', False: "sub"}[N == 0], collection["name"], n_sequences)

		pb = ProgressBar(n_sequences)
		n = 0

		def sequences():
			global n

			for sequence in collection.list_sequences():
				record = SeqRecord(
					id = sequence["name"],
					seq = Seq(sequence["sequence"]),
					description = sequence.get_property("description", ''),
				)

				yield record

				n += 1
				pb.display(n)

			pb.clear()

		SeqIO.write(sequences(), output_fh, p.output_format)

		print "    %s sequences exported." % n
		N += n

	print "done (%s sequences exported total)." % N

except Exception as msg:
	error(msg)
