import tables
import os
from os import path
#import viset.download  
from viset.dataset import Viset
import httplib2
from BeautifulSoup import BeautifulSoup, SoupStrainer # EXTERNAL
import urlparse
import string
import viset.util

class LabelMe(Viset):
  def __init__(self):
    self.cachedir = path.join(self.cacheroot, self.dbname)      
    if not path.exists(self.cachedir):
      os.makedirs(self.cachedir)
    self.dbfile = path.join(self.cacheroot, self.dbname+'.h5')
    self.imstoragetype = 'url'

  def scrape(self, baseurl, keeptest):
    http = httplib2.Http()
    (status, response) = http.request(baseurl)
    filelist = []
    for link in BeautifulSoup(response, parseOnlyThese=SoupStrainer('a')):
      if (hasattr(link, 'href')) and (link.text not in ['Name','Last modified','Size','Description','Parent Directory']):
        dirurl = urlparse.urljoin(baseurl, link['href']) # relative link
        #        print 'Directory: ' + dirurl
        (status, response) = http.request(dirurl)
        for filelink in BeautifulSoup(response, parseOnlyThese=SoupStrainer('a')):
          if (hasattr(filelink, 'href')) and (filelink.text not in ['Name','Last modified','Size','Description','Parent Directory']):
            fileurl = urlparse.urljoin(dirurl, filelink['href'])
            if keeptest(fileurl): 
              #              print 'File: ' + fileurl              
              filelist.append(fileurl) 
    return filelist
  
  def export(self, redo=False):    
    imlist = self.scrape(self.IMURL,viset.util.isimg)
    annolist = self.scrape(self.ANNOURL,viset.util.isxml)    
    print imlist
    print annolist
    
    # Format dataset
    #super(ImageNet,self).format(self.dbfile, self.dbname, 'DetectionPolygon', self.imstoragetype)
      
    return self.dbfile


class LabelMe3(LabelMe):
  IMURL = 'http://people.csail.mit.edu/brussell/research/LabelMe/Images/'
  ANNOURL = 'http://people.csail.mit.edu/brussell/research/LabelMe/Annotations/'
  SHA1 = None
  dbname = 'labelme3'




  ## Many annotations in polygon XML format for each image
  ## store as a finite list of empty columns for max 100 points

  ## Each annotation is scraped from xml document
  ## images are defined by URL from scraping entry

  ## add a DetectionPolygonTable 

  ## http://stackoverflow.com/questions/1080411/retrieve-links-from-web-page-using-python-and-beautiful-soup


  ## matplotlib supports polygons
  ## http://matplotlib.org/api/pyplot_api.html#matplotlib.pyplot.fill

  ## https://github.com/mpitid/pylabelme
