#-*- coding:utf-8 -*-
'''
File: profile.py
Author: yuokada
Description: Get voice actor/actoress infomation from http://lain.gr.jp/voicedb/
'''

from BeautifulSoup import BeautifulSoup
import urllib
import urllib2
import pickle
import os.path

class VoiceDB(object):
    """Get From VoiceDB
    """
    VDB_FILE = "voicedb.pick"
    PICKLE_PROTO = 2

    def __init__(self, load=False):
        super(VoiceDB, self).__init__()
        self.base_url= "http://lain.gr.jp/voicedb/profile/"
        if load is True:
            self.vdb_dic = self.load_from_pickle()
        else:
            self.vdb_dic = False

    def get_voicedb(self,voice_id):
        target_page = self.base_url + str(voice_id)
        try:
            html = urllib2.urlopen(target_page).read()
            # urllib2.HTTPError
            soup = BeautifulSoup(html)
            person = soup.find("div", attrs= {"id":"db"})
            item_list = []
            for item in person.findAll():
                if item.name in ("dd,dt"):
                    value =  item.getText()
                    item_list.append(value)
            actor  = self._mapping_l2d(item_list)
            # bind media info
            actor["media"] = self._get_media_info(soup)
            actor["costar"] = self._get_costars(soup)
            return actor
        except urllib2.HTTPError, e:
            return {}

    def _get_media_info(self, soup):
        """docstring for _get_media_info"""
        media_info_table = soup.find("div", attrs={"id":"detail-item"}) \
                               .find("", attrs={"id":"toc_media"})
        if media_info_table is None:
            return {}
        media_info_table = media_info_table.findNext().findAll()
        appearance_table = []
        for node in media_info_table:
            if not node.name in ('img', 'a'):
                item =  node.getText()
                appearance_table.append(item)

        media_info = {}
        for key,j in zip(appearance_table[0::2], appearance_table[1::2]):
            if media_info.has_key(key):
                media_info[key].append(j)
            else:
                media_info[key] = [j]
        return media_info

    def _get_costars(self, soup):
        """ profileページに掲載されている頻繁に共演する声優情報を取得
        """
        costars_html = soup.find("ul", attrs={"id":"costar"}) \
                           .findAll("li")
        costars_list = []
        for node in costars_html :
            link =  node.find("a")
            voice_id = dict(link.attrs)['href'].split('/')[-1]
            actor_name = link.text
            costars_list.append((int(voice_id), actor_name))
        return costars_list

    def _mapping_l2d(self, item_list):
        """ (deprecated)リストからデータ構造にマッピングを行う
        """
        actor = {}
        for key, value in zip(item_list[0::2], item_list[1::2]):
            dic_key = self._mapping_h2k(key)
            actor[dic_key] = value
        return actor

    def _mapping_h2k(self, key):
        """ リストからデータ構造にマッピングを行う
        """
        header2key = {
                u"名前"     : "name"       ,
                u"旧名"     : "oldname"       ,
                u"よみ"     : "yomi"       ,
                u"本名"     : "real_name",
                u"ローマ字" : "roma"       ,
                u"ニックネーム" : "nickname"   ,
                u"誕生日"   : "birthday"   ,
                u"年齢"     : "age"        ,
                u"出身地"   : "prefecture" ,
                u"身長"     : "hight"      ,
                u"体重"     : "weight"     ,
                u"スリーサイズ"     : "three_size"     ,
                u"血液型"   : "blood_type" ,
                u"星座"     : "asterism"   ,
                u"所属"     : "office"     ,
                u"趣味"     : "hobby"      ,
                u"特技"     : "skill"      ,
                u"好きなもの": "like"      ,
                u"嫌いなもの": "dislike"      ,
                u"ファンクラブ": "fan_club"      ,
                u"タグ": "tag"      ,
                u"命日": "death"      ,
                u"享年": "age_at_death"      ,
                u"別名": "another_name"      ,
                u"詳細": "detail"      ,
                u"トラックバックURL" : "trackback",
                }
        return header2key[key]

    def get_max_voiceid(self):
        """docstring for get_max_voiceid"""
        if not self.vdb_dic :
            self.vdb_dic = self.load_from_pickle()
        max_id = max(self.vdb_dic.keys())
        return max_id

    # した３つのメソッドは別クラスでもいいかも
    def _get_data_dirpath(self):
        thisfilepath = os.path.abspath(__file__)
        parentdir = os.path.dirname(thisfilepath)
        return parentdir + os.path.sep + "data" + os.path.sep

    def load_from_pickle(self):
        """docstring for dump_by_pickle 内部関数にする
        """
        dirpath = self._get_data_dirpath()
        vdb_filepath = dirpath + self.VDB_FILE
        fh = open(vdb_filepath, "r")
        result = pickle.load(fh)
        fh.close()
        return result

    def dump_by_pickle(self, obj):
        """docstring for dump_by_pickle
        """
        dirpath = self._get_data_dirpath()
        vdb_filepath = dirpath + self.VDB_FILE
        fh = open(vdb_filepath, "w")
        result = pickle.dump(obj, fh, self.PICKLE_PROTO)
        fh.close()
        return result
