#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright 2013 by Björn Johansson.  All rights reserved.
# This code is part of the Python-dna distribution and governed by its
# license.  Please see the LICENSE.txt file that should have been included
# as part of this package.

import re
import os
import urllib2
from urlparse import urlparse
from urlparse import urlunparse
from Bio import SeqIO
from Bio import Entrez
from Bio.SeqUtils.CheckSum import seguid

class Genbank():
    '''
    genbank(users_email,
            proxy = None,
            tool="biopython")
    '''

    def __init__(self, users_email, proxy = None, tool="biopython"):
        if not re.match("[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}",users_email,re.IGNORECASE):
            raise(ValueError("Not a valid user email!"))
        self.email=users_email #Always tell NCBI who you are

        if proxy:
            parsed = urlparse(proxy)
            scheme = parsed.scheme
            hostname = parsed.hostname
            test = urlunparse((scheme, hostname,'','','','',))
            try:
                response=urllib2.urlopen(test, timeout=1)
            except urllib2.URLError as err:
                print test
                raise(ValueError("could not contact proxy server."))
            self.proxy = urllib2.ProxyHandler({ scheme : parsed.geturl() })
        else:
            os.environ['http_proxy']=''
            self.proxy = urllib2.ProxyHandler()
        self.opener = urllib2.build_opener(self.proxy)
        urllib2.install_opener(self.opener)

    def test(self):
        result = self.nucleotide("L09137") # pUC19
        assert seguid(result.seq) == "71B4PwSgBZ3htFjJXwHPxtUIPYE"
        return True

    def nucleotide(self, item):
        '''
       nucleotide(item)

       item is a string containing one genbank acession number
       for a nucleotide file:

       A12345   = 1 letter  + 5 numerals
       AB123456 = 2 letters + 6 numerals

       http://www.dsimb.inserm.fr/~fuchs/M2BI/AnalSeq/Annexes/Sequences/Accession_Numbers.htm

       '''

        Entrez.email = self.email

        handle = Entrez.efetch(db ="nucleotide",
                               id = item,
                               rettype = "gb",
                               retmode = "text")

        return SeqIO.read(handle, "genbank")

if __name__=="__main__":
    import doctest
    doctest.testmod()

    #a=genbank("bjornjobb@gmail.com")
    #a.test()


#    a=genbank("bjornjobb@gmail.com","http://proxy.uminho.pt:3128")
#    a.test()
#    print a.proxy.proxies
#    a=genbank("bjornjobb@gmail.com","http://username:password@proxy.uminho.pt:1000")
#    print a.proxy.proxies
#    a=genbank("bjornjobb@gmail.com")
#    a.test()

