import tables
import os
from os import path
import viset.download
from viset.dataset import CategorizationViset, UrlViset

class ImageNet(CategorizationViset, UrlViset):
    def export(self, redo=False):            
        # Create new database
        (self.dbobj, dbname, dbfile, dburl, cachedir) = self.create(self.dbname, self.dbtype, self.dbimtype, self.dbversion)

        # Fetch textfile for construction
        if not path.exists(path.join(cachedir,self.TXTFILE)) or redo is True:
            viset.download.download_and_extract(self.URL, cachedir, sha1=self.SHA1)              

        # Write images (chunked)
        id_img = 0
        txtfile = path.join(cachedir,self.TXTFILE)
        for line in open(txtfile,'r'):
            try:
                name, url = line.rstrip().split('\t')
                id_wordnet, suffix = name.rstrip().split('_')      
                self.add_image(id_img, url, 1, id_img)  # id_anno=id_img                            
                id_img += 1
            except:
                print 'Warning: Ignoring malformed line ' + line
        self.write()
                
        # Write annotations (chunked)
        id_anno = 0
        txtfile = path.join(cachedir,self.TXTFILE)
        for line in open(txtfile,'r'):
            try:
                name, url = line.rstrip().split('\t')
                id_wordnet, suffix = name.rstrip().split('_')      
                self.add_categorization(int(id_anno), id_wordnet, id_anno)                
                id_anno += 1
            except:
                print 'Warning: Ignoring malformed line ' + line
        self.write()

        # Cleanup
        self.close()
        return dbfile


class ImageNetFall2011(ImageNet):
  URL = 'http://www.image-net.org/archive/imagenet_fall11_urls.tgz'
  SHA1 = 'f5fd118232b871727fe333778be81df6c6fec372'
  TXTFILE = 'fall11_urls.txt'
  dbname = 'imagenet_fall2011'

  
