#!/usr/bin/env python
# *-* coding: utf-8 *-*

__appname__ = "subdl"
__version__ = "1.0.9"
__author__ = "Dhiraj Thakur <dhirajt@github.com>"
__license__ = "GPLv3"

import argparse
import os
import re
import requests
import sys
import zipfile


from BeautifulSoup import BeautifulSoup
from operator import itemgetter
from HTMLParser import HTMLParser 
from requests.exceptions import ConnectionError


media_extensions = ["3g2",  "3gp", "3gp2", "3gpp", "60d", "ajp", "asf", "asx",
                    "avchd", "avi", "bik", "bix", "box", "cam", "dat", "divx",
                    "dmf", "dv", "dvr-ms", "evo", "flc", "fli", "flic", "flv",
                    "flx", "gvi", "gvp", "h264", "m1v", "m2p", "m2ts", "m2v",
                    "m4e", "m4v", "mjp", "mjpeg", "mjpg", "mkv", "moov",
                    "mov", "movhd", "movie", "movx", "mp4", "mpe", "mpeg",
                    "mpg", "mpv", "mpv2", "mxf", "nsv", "nut", "ogg", "ogm",
                    "omf", "ps", "qt", "ram", "rm", "rmvb", "swf", "ts", "vfw",
                    "vid", "video", "viv", "vivo", "vob", "vro", "wm", "wmv",
                    "wmx", "wrap", "wvx", "wx", "x264", "xvid" ]

words_exclude = ('xvid|divx|480p|480i|320p|320i|720p|720i|1080p|1080i|dvdscr|'
                 'ts|pdtv|hdtv|dvdrip|x264|subs|subtitles')

subs_extensions = ['srt','sub','smi','txt','ssa','ass','mpl']

supported_langs = {
    "all": "ALL",
    "afr": "Afrikaans",     
    "alb": "Albanian",
    "ara": "Arabic",            
    "arm": "Armenian",      
    "baq": "Basque",
    "ben": "Bengali",
    "bos": "Bosnian",
    "pob": "Portuguese-BR",
    "bre": "Breton",
    "bul": "Bulgarian",
    "bur": "Burmese",            
    "cat": "Catalan",
    "chi": "Chinese",
    "hrv": "Croatian",          
    "cze": "Czech",
    "dan": "Danish",
    "dut": "Dutch",
    "eng": "English",
    "epo": "Esperanto",
    "est": "Estonian",
    "fin": "Finnish",
    "fre": "French",
    "glg": "Galician",
    "geo": "Georgian",
    "ger": "German",
    "ell": "Greek",
    "heb": "Hebrew",
    "hin": "Hindi",
    "hun": "Hungarian",
    "ice": "Icelandic",
    "ind": "Indonesian",
    "ita": "Italian",
    "jpn": "Japanese",
    "kaz": "Kazakh",
    "khm": "Khmer",
    "kor": "Korean",
    "lav": "Latvian",
    "lit": "Lithuanian",
    "ltz": "Luxembourgish",
    "mac": "Macedonian",
    "may": "Malay",
    "mal": "Malayalam",
    "mon": "Mongolian",
    "nor": "Norwegian",
    "oci": "Occitan",
    "per": "Farsi",
    "pol": "Polish",
    "por": "Portuguese",
    "rum": "Romanian",
    "rus": "Russian",
    "scc": "Serbian",
    "sin": "Sinhalese",
    "slo": "Slovak",
    "slv": "Slovenian",
    "spa": "Spanish",
    "swa": "Swahili",
    "swe": "Swedish",            
    "syr": "Syriac",
    "tgl": "Tagalog",
    "tam": "Tamil",         
    "tel": "Telugu",            
    "tha": "Thai",
    "tur": "Turkish",
    "ukr": "Ukrainian",
    "urd": "Urdu",           
    "vie": "Vietnamese",     
}


def main():
    parser = argparse.ArgumentParser(
                        description='Download movie/tv-series subtitles',
                        prog='subdl')
    parser.add_argument('-d','--dir', default='.', nargs='?',
                        help='folder to search for movies')
    parser.add_argument('-l','--lang', default='eng', type=unicode, nargs='?',
                        help='language of the subtitles')
    parser.add_argument('-n','--name', default='', type=unicode, nargs='?',
                        help='specify movie/tv-series name')
    parser.add_argument('-c','--choose', action='store_true',
                        help='choose subs from a list')
    args = parser.parse_args()
    
    try:
        find_opensubtitles(name=args.name,
                           lang=args.lang,
                           directory=args.dir,
                           choose=args.choose)
    except KeyboardInterrupt:
        print '\nExit'

def find_opensubtitles(name,lang,directory,choose):

    if lang not in supported_langs:
        print 'Error: language not supported.'
        sys.exit(0)

    if not name:
        current_dir_items = os.listdir(directory)

        files = [ f for f in current_dir_items if os.path.isfile(os.path.join(directory,f)) ]

        media_files = [ f for f in files if f.split('.')[-1] in media_extensions ]

        choice = 0
        if len(media_files) > 1:
            print ('Found multiple media files. Choose the file to search subs for:\n'
                   '(You can use a * as a choice to download subs for all these files)\n')
            for index,item in enumerate(media_files):
                print '%s. %s' % (index+1,item)
            while choice not in range(1,len(media_files)+1):
                try:
                    choice = raw_input('Choice (number or *) : ')
                    if choice == '*':
                        break
                    choice = int(choice)
                except ValueError:
                    choice = 0
        
        if media_files:
            if choice == '*':
                print '\nSubtitles for all media files will be downloaded ...'
                name = [ clean_name(movie.lower()) for movie in media_files ]
            else:
                name = [ clean_name(media_files[choice-1].lower()) ]

    if not name:
        print ("Error: Couldn't find a name to search for. Try adding a name like \n"
               "subdl --name='the help' \n")
        sys.exit(0)

    for movie in name:
        sub_link = find_subs(name=movie,lang=lang,choose=choose)
        if sub_link:
            save_subs(sub_link,directory)

def get_yify_links(imdb_id,lang):
    print 'Searching yifysubtitles.com ...'
    response = requests.get('http://www.yifysubtitles.com/movie-imdb/'+imdb_id)
    
    soup = BeautifulSoup(response.text)
    language = supported_langs[lang].lower()

    subs_list = soup.find('ul',attrs={'class':'other-subs'}).findAll('li')

    subs_rating = {}
    movie_text = ''

    for subs_li in subs_list:
        rating = int(subs_li.find('span',attrs={'title':'rating'}).text)
        
        down_link = subs_li.find('a',attrs={'class':'subtitle-download'})['href'] +'.zip'
        down_link = down_link.replace('subtitles','subtitle')

        if not movie_text:
            movie_text = str(subs_li.find('span',attrs={'class':'subdesc'}).text)
            movie_text = movie_text.replace('subtitle ','') + ' YIFY'

        if language in down_link:
            subs_rating[down_link] = rating

    subs_data = []
    if subs_rating:
        max_rating = max(subs_rating.values())
        link_prefix = 'http://www.yifysubtitles.com'
        subs_data = [[movie_text,str(link_prefix+item),movie_text] for item in\
         subs_rating if subs_rating[item] == max_rating]

    return subs_data

    
def find_subs(name,lang='eng',link=None,choose=False):
    base_url = 'http://www.opensubtitles.org'
    search_url = 'http://www.opensubtitles.org/en/search2'

    if link:
        link = base_url+link

    print '\nSearching opensubtitles.org ...\n'
    try:
        if link:
            pg_response = requests.get(link)
        else:
            pg_response = requests.get(base_url,params={'MovieName':name,
                                                        'SubLanguageID':lang})
    except ConnectionError:
        print "\nError: Can't connect to the internet"
        sys.exit(0)

    soup = BeautifulSoup(pg_response.text)

    result_table = soup.find('table',id='search_results')
    if not result_table:
        print ("""Sorry couldn't find any subs for \n"%s"\nMaybe check the filename or try"""
               """ adding a name like subdl --name='the help'\n""") % name
        return False

    table_rows = result_table.findAll('tr')
    result_rows = [ item for item in table_rows if 'name' in item.get('id','') ]
    
    name_rows = []
    imdb_id = ''
    for index, item in enumerate(result_rows):
        if item.findAll('td')[1].a['href'][-3:] == lang:
            text = item.findAll('td')[0].findAll(text=True)
            if index == 0:
                imdb_link = item.findAll('td')[7].a['href']
                imdb_link = imdb_link.replace('/redirect/http://www.imdb.com/title','')
                imdb_id = imdb_link.strip('/')
            if len(text)==4:
                name_rows.append(text[1].replace('\n','').replace('\t',''))
            else:
                name_rows.append(u'')

    yify_links = []
    if 'yify' in name.lower() and imdb_id:
        yify_links = get_yify_links(imdb_id,lang)

    html = HTMLParser()
    subs_links = [ [ html.unescape(item.findAll('td')[0].a.string),
                     base_url+item.findAll('td')[4].a['href'],
                     html.unescape(name_rows[index])
                   ] 
                   for index,item in enumerate(result_rows)
                   if item.findAll('td')[1].a['href'][-3:] == lang ]

    subs_links = yify_links + subs_links

    choice = 0
    if choose:
        print '\nChoose a subtitle to download for "%s" :' % name

        for index,item in enumerate(subs_links):
            print '%s. %10s - %s' % (index+1,
                            item[0].replace('\n','').replace('\t',''),
                            item[2])

        while choice not in range(1,len(subs_links)+1):
            try:
                choice = int(raw_input('Choice (number) : '))
            except ValueError:
                    choice = 0
        choice -= 1

    if subs_links:
        return subs_links[choice][1]

def clean_name(name):
    replaced_name = name.replace('.',' ').replace('_',' ').replace('-',' ').split(' ')[:-1]
    search_key = ' '.join(replaced_name)
    to_exclude = re.findall(words_exclude,search_key)
    name = ' '.join([ word for word in replaced_name if word not in to_exclude ])
    return name

def save_subs(link,directory):
    if not link:
        return False
    print '\nDownloading subtitle ...'
    pg_response = requests.get(link)
    filename = pg_response.headers.get('content-disposition').split('=')[1].strip('"')  
    filename = os.path.join(directory,filename)

    print 'Downloaded file %s' % filename
    with open(filename,'w') as zipf:
        zipf.write(pg_response.content)

    print '\nExtracting file(s) ...'

    with zipfile.ZipFile(filename, "r") as zipf:
        if not zipf.namelist():
            print '\nSorry, no subtitle found in zip file!'
            return 
        
        for item in zipf.namelist():
            if item.split('.')[-1] in subs_extensions:
                file_loc = zipf.extract(item,path=directory)
                print item
    print 'Done.'


if __name__ == '__main__':
    main()
