#!/usr/bin/env python

# Parameters for the script:

# name of the collection containing the reads
reads_collection_name = "NL10_0910_vRNA_MSU_WTA:reads-clust"

# name of the file to receive the graph in Cytoscape format
# see http://cytoscape.org/manual/Cytoscape2_8Manual.html#SIF%20Format
network_edges_fn = "network.sif"

# name of the file to receive the graph's edge annotations in Cytoscape format
# see http://www.cytoscape.org/manual/Cytoscape2_8Manual.html#Cytoscape%20Attribute%20File%20Format
network_edges_annotations_fn = "network.eda"

#:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::

import MetagenomeDB as mdb
import sys

mdb.connect(db = "myDB:R1:bbolduc", user = '')

read_collection_o = mdb.Collection.find_one({"name": reads_collection_name})

if (read_collection_o == None):
	print >>sys.stderr, "ERROR: no collection found with name '%s'" % reads_collection_name
	sys.exit(1)

# we create a dictionary to store the graph's edges
edges = {}

def order (a, b):
	if (a > b):
		return b, a
	else:
		return a, b

# for each read in the collection,
for read_o in read_collection_o.list_sequences():

	# we retrieve a list of all contigs this read is associated to
	contigs = list(read_o.list_related_sequences(mdb.Direction.OUTGOING, {"class": "contig"}))

	# if this list has more than one contig,
	if (len(contigs) > 1):

		# then for each pair of contigs in this list ...
		for i, contig_a_o in enumerate(contigs):
			contig_a_key = contig_a_o["name"]
			for contig_b_o in contigs[i+1:]:
				contig_b_key = contig_b_o["name"]

				# ... we store the pair in 'edges', together
				# with a count of how many times this pair has
				# been seen. This count is the number of reads
				# that connect those two contigs
				edge_key = order(contig_a_key, contig_b_key)

				if (edge_key in edges):
					edges[edge_key] += 1
				else:
					edges[edge_key] = 1

# finally we store those edges and their annotations
n = open(network_edges_fn, 'w')
e = open(network_edges_annotations_fn, 'w')
print >>e, "NumberOfSharedReads"

for (node_a, node_b), count in edges.iteritems():
	print >>n, "%s	share_reads	%s" % (node_a, node_b)
	print >>e, "%s (share_reads) %s = %s" % (node_a, node_b, count)
