import tables
import os
from os import path
import viset.download
from viset.dataset import CategorizationViset, UrlViset
from viset.util import wascached

class ImageNet(CategorizationViset, UrlViset):
    def export(self, redo=False):            
        # Create empty database
        self.create()

        # Fetch textfile for construction
        viset.download.cache_and_extract(self.URL, self.cachedir, sha1=self.SHA1)              

        # Write images (chunked)
        dbview = self.view()          
        id_img = 0
        txtfile = path.join(self.cachedir,self.TXTFILE)
        for line in open(txtfile,'r'):
            try:
                name, url = line.rstrip().split('\t')
                id_wordnet, suffix = name.rstrip().split('_')      
                self.add_image(id_img, url, 1, id_img)  # id_anno=id_img                            
                id_img += 1
            except:
                print '[viset.imagenet]: Warning: Ignoring malformed line "' + line[0:64] + ' ..."'
        self.write()
                
        # Write annotations (chunked)
        id_anno = 0
        txtfile = path.join(self.cachedir,self.TXTFILE)
        for line in open(txtfile,'r'):
            try:
                name, url = line.rstrip().split('\t')
                id_wordnet, suffix = name.rstrip().split('_')      
                self.dbview.add_categorization(int(id_anno), id_wordnet, id_anno)                
                id_anno += 1
            except:
                print '[viset.imagenet]: Warning: Ignoring malformed line ' + line[0:64] + ' ...'				
        self.write()

        # Cleanup
        self.close()
        return self.dbfile


class ImageNetFall2011(ImageNet):
  URL = 'http://www.image-net.org/archive/imagenet_fall11_urls.tgz'
  SHA1 = 'f5fd118232b871727fe333778be81df6c6fec372'
  TXTFILE = 'fall11_urls.txt'
  _dbname = 'imagenet_fall2011'

  
