
import re
import sys
import os

import requests
from bs4 import BeautifulSoup as soup

from .database import SQLiteStorage as Database

# Import the correct urllib, depending on python version
if sys.version_info[0] == 3:
    # Python 3
    from urllib.parse import quote_plus
    from urllib.parse import unquote_plus
else:
    # Python 2
    from urllib import quote_plus
    from urllib import unquote_plus

# Define the correct method to load arbitrary python modules
# See: http://stackoverflow.com/questions/67631/how-to-import-a-module-given-the-full-path
if sys.version_info[0] == 3 and sys.version_info[1] > 2:
    # Python > 3.2
    from importlib.machinery import SourceFileLoader
    def load_config(path):
        name, full_path = get_config_path(path)
        loader = SourceFileLoader(name, full_path)
        config = loader.load_module()
        return config
else:
    # Python < 3.3
    import imp
    def load_config(path):
        name, full_path = get_config_path(path)
        config = imp.load_source(name, full_path)
        return config


class Scraper():
    def __init__(self, base_url):
        self.base_url = base_url
        self.episode_re = re.compile(r'dn=(.+)S(\d+)E(\d+)')

    def good_link(self, url):
        tmp = url.lower()
        if not tmp.startswith('magnet:'):
            return False

        # TODO: replace with regexp, that can be customized
        if tmp.find('x264') < 0 and tmp.find('hdtv') < 0:
            return False

        regexp = self.episode_re.search(url)
        if regexp is None:
            return False
        return regexp

    def clean_title(self, title):
        tmp = unquote_plus(title)
        tmp = tmp.replace('.', ' ')
        tmp = tmp.strip()
        return tmp

    def run(self, search):
        url = self.base_url.format(SEARCH=quote_plus(search))
        raw_data = requests.get(url).content
        parsed_data = soup(raw_data)
        matched_links = []

        for link in parsed_data.body.find_all('a'):
            url = link.get('href')
            regexp = self.good_link(url)
            if not regexp:
                continue

            # Prevents duplicates, which may have used a similar name
            title = self.clean_title(regexp.group(1))
            if title.lower() != search.lower():
                continue

            matched_links.append(dict(
                title = title,
                season = int(regexp.group(2)),
                episode = int(regexp.group(3)),
                url = url.strip(),
            ))

        return matched_links


def get_config_path(path):
    '''Load the config file.'''
    full_path = os.path.expanduser(path)
    name, ext = os.path.splitext(os.path.basename(full_path))
    return name, full_path


def yank_episodes(config):
    '''Yank new episodes.'''
    if config.DEBUG:
        print('Checking for new episodes...')
    db = Database(config.DATABASE_FILE)
    scrap = Scraper(config.BASE_URL)

    for show in config.TV_SHOWS:
        new_items = 0
        episodes = scrap.run(show)
        for ep in episodes:
            title = '-'.join((ep['title'], str(ep['season']), str(ep['episode'])))
            url = ep['url']
            if db.insert(title, url):
                new_items += 1
                if not config.INITIATE_DB:
                    config.NEW_ITEM(title, url)
        db.commit()
        if config.DEBUG:
            print('{} new items for: {}'.format(new_items, show))

