#!/usr/bin/env python

import optparse, sys, os, pprint
import MetagenomeDB as mdb

p = optparse.OptionParser(description = """Part of the MetagenomeDB toolkit.
Import objects (Sequence and/or Collection) into the database. Those
objects are provided as JSON- or CSV-formatted descriptions.""")

g = optparse.OptionGroup(p, "Input")

g.add_option("-i", "--input", dest = "input_fn", metavar = "FILENAME",
	help = "Name of the file containing a description of the objects to import, or '-' to read from the standard input (mandatory).")

g.add_option("-f", "--format", dest = "input_format", choices = ("json", "csv"), metavar = "STRING", default = "csv",
	help = "Format of the input file, either 'json' or 'csv' (optional). Default: %default")

p.add_option_group(g)

g = optparse.OptionGroup(p, "Errors handling")

g.add_option("--ignore-duplicates", dest = "ignore_duplicates", action = "store_true", default = False,
	help = "If set, ignore duplicate objects errors.")

g.add_option("--ignore-missing", dest = "ignore_missing", action = "store_true", default = False,
	help = "If set, ignore relationships that refer to missing objects.")

p.add_option_group(g)

p.add_option("-v", "--verbose", dest = "verbose", action = "store_true", default = False)
p.add_option("--dry-run", dest = "dry_run", action = "store_true", default = False)
p.add_option("--version", dest = "display_version", action = "store_true", default = False)

g = optparse.OptionGroup(p, "Connection")

connection_parameters = {}
def declare_connection_parameter (option, opt, value, parser):
	connection_parameters[opt[2:]] = value

g.add_option("--host", dest = "connection_host", metavar = "HOSTNAME",
	type = "string", action = "callback", callback = declare_connection_parameter,
	help = """Host name or IP address of the MongoDB server (optional). Default:
'host' property in ~/.MetagenomeDB, or 'localhost' if not found.""")

g.add_option("--port", dest = "connection_port", metavar = "INTEGER",
	type = "string", action = "callback", callback = declare_connection_parameter,
	help = """Port of the MongoDB server (optional). Default: 'port' property
in ~/.MetagenomeDB, or 27017 if not found.""")

g.add_option("--db", dest = "connection_db", metavar = "STRING",
	type = "string", action = "callback", callback = declare_connection_parameter,
	help = """Name of the database in the MongoDB server (optional). Default:
'db' property in ~/.MetagenomeDB, or 'MetagenomeDB' if not found.""")

g.add_option("--user", dest = "connection_user", metavar = "STRING",
	type = "string", action = "callback", callback = declare_connection_parameter,
	help = """User for the MongoDB server connection (optional). Default:
'user' property in ~/.MetagenomeDB, or none if not found.""")

g.add_option("--password", dest = "connection_password", metavar = "STRING",
	type = "string", action = "callback", callback = declare_connection_parameter,
	help = """Password for the MongoDB server connection (optional). Default:
'password' property in ~/.MetagenomeDB, or none if not found.""")

p.add_option_group(g)

(p, a) = p.parse_args()

def error (msg):
	if str(msg).endswith('.'):
		msg = str(msg)[:-1]
	print >>sys.stderr, "ERROR: %s." % msg
	sys.exit(1)

if (p.display_version):
	print mdb.version
	sys.exit(0)

if (p.input_fn == None):
	error("An input file must be provided")

if (p.input_fn != '-') and (not os.path.exists(p.input_fn)):
	error("File '%s' does not exist" % p.input_fn)

#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::

if (p.verbose):
	mdb.max_verbosity()

try:
	mdb.connect(**connection_parameters)
except Exception as msg:
	error(msg)

builder = {
	"collection": mdb.Collection,
	"sequence": mdb.Sequence,
}

class NotFound (Exception):
	pass

previous_sequences = {}

def fetch (xref):
	try:
		type = xref["_type"].lower()
		name = xref["name"]

		if (not type in builder):
			raise Exception("Unknown object type '%s'" % type)

		# the xref refers to a Collection
		if (type == "collection"):
			collection = mdb.Collection.find_one({"name": name})
			if (collection == None):
				raise Exception("Unknown collection '%s'" % name)

			return collection

		# the xref refers to a Sequence
		if (type == "sequence"):
			# first case: the xref refers to a Sequence associated to a collection
			if ("_collection" in xref):
				collection = xref["_collection"]

				if (mdb.Collection.find_one({"name": collection}) == None):
					raise Exception("Unknown collection '%s'" % collection)

				# we first list all sequences having this name,
				candidates = mdb.Sequence.find({"name": name})

				# then we filter out those that are not linked to this collection
				candidates = filter(lambda candidate: candidate.count_collections({"name": collection}) > 0, candidates)

				if (len(candidates) == 0):
					raise NotFound("Unknown sequence '%s' in collection '%s'" % (name, collection))

				if (len(candidates) > 1):
					raise Exception("Duplicate sequence '%s' in collection '%s'" % (name, collection))

				return candidates[0]

			# second case: we just defined this sequence
			elif (name in previous_sequences):
				return previous_sequences[name]
			
			else:
				raise NotFound("Unknown sequence '%s'" % name)

	except KeyError as msg:
		raise Exception("Invalid entry: %s" % msg)

def pull (map, key):
	value = map[key]
	del map[key]
	return value

try:
	n = {}
	for entry in mdb.tools.parser(p.input_fn, p.input_format):
		try:
			# we check the operator; must be REPLACE only
			entry_ = {}
			for key, (value, command) in mdb.tree.items(entry):
				if (command != mdb.tools.REPLACE):
					raise Exception("Only REPLACE commands are accepted")

				mdb.tree.set(entry_, key, value)

			entry = entry_

			# we extract the object type
			type = pull(entry, "_type").lower()

			if (not type in n):
				n[type] = 0

			# if the object to build is a relationship, we
			# first retrieve its source and target objects
			if (type == "relationship"):
				if (p.dry_run):
					print "import: relationship"
					for line in pprint.pformat(entry).split('\n'):
						print "  %s" % line
				else:
					source = fetch(pull(entry, "_source"))
					target = fetch(pull(entry, "_target"))

					source._connect_to(target, entry)
					source.commit()

			else:
				if (not type in builder):
					raise Exception("Unknown object type '%s'" % type)

				if (p.dry_run):
					print "import: %s" % type
					for line in pprint.pformat(entry).split('\n'):
						print "  %s" % line
				else:
					object = builder[type](entry)
					object.commit()

					if (type == "sequence"):
						previous_sequences[entry["name"]] = object

			n[type] += 1

		except (mdb.errors.DBConnectionError, mdb.errors.DBOperationError) as msg:
			error(msg)

		except mdb.errors.DuplicateObjectError as msg:
			if (p.ignore_duplicates):
				print >>sys.stderr, "WARNING: %s" % str(msg)
			else:
				error(msg)

		except NotFound as msg:
			if (p.ignore_missing):
				print >>sys.stderr, "WARNING: %s" % msg
			else:
				error(msg)

		except Exception as msg:
			error("Invalid entry: %s. Entry was:\n %s" % (msg, pprint.pformat(entry)))

	for type in sorted(n.keys()):
		print "%s %s%s added" % (n[type], type, {True: 's', False: ''}[n[type] > 1])

	if (p.dry_run):
		print "(dry run)"

except Exception as msg:
	error("Error when processing the input: %s." % msg)
