#!/usr/bin/env python2
#
# Copyright John Reid 2013
#

"""
Read a BED file and get the regions in it.
"""

import logging
logging.basicConfig(level=logging.INFO,
                    format='%(asctime)-15s:%(levelname)s: %(message)s')
logger = logging.getLogger(__name__)

import pybedtools
import sys
from biopsy.data import ucsc
from optparse import OptionParser
from Bio import SeqIO
from itertools import imap


#
# Set up options and parse them
#
usage = 'USAGE: %prog [options] <species> <regions>'
parser = OptionParser(usage=usage)
options, args = parser.parse_args()
if len(args) not in (2, 3):
    parser.error('Incorrect number of arguments')
species, regions_file = args[:2]


#
# Get the genome of the species of interest
# if we have not already loaded it
#
if 'genome' not in locals():
    genome = ucsc.Genome(species)


def region_as_str(region):
    return '%s:%08d-%08d:%s:%s' % (
        region.chrom, region.start, region.end, region.name, region.score)


def record_for_region(region):
    chrom = genome[region.chrom]
    #assert region.stop <= len(chrom)
    record = chrom[region.start:region.stop]
    record.id = region_as_str(region)
    record.name = region.name
    record.description = region.score
    return record


#
# Parse the regions and write the sequences
#
if 2 == len(args) or '-' == args[2]:
    out = sys.stdout
else:
    fasta_out = args[2]
    out = open(fasta_out, 'w')
SeqIO.write(
    imap(record_for_region, pybedtools.BedTool(regions_file)),
    out,
    'fasta')
