"""
This module defines top level utility functions for GraphLab.
"""
import urllib
import urllib2
import sys
import os
from zipfile import ZipFile
import bz2
import tarfile
import decorator

from graphlab.connect.aws._ec2 import get_credentials as _get_aws_credentials
import graphlab.connect.main as glconnect
import graphlab.connect.server as server

import graphlab.version_info
from pkg_resources import parse_version


import logging

__LOGGER__ = logging.getLogger(__name__)


def make_internal_url(url):
    """
    Takes a user input url string and translates into url relative to the server process.
    - URL to a local location begins with "local://" or has no "*://" modifier.
      If the server is local, returns the absolute path of the url.
      For example: "local:///tmp/foo" -> "/tmp/foo" and "./foo" -> os.path.abspath("./foo").
      If the server is not local, raise NotImplementedError.
    - URL to a server location begins with "remote://".
      Returns the absolute path after the "remote://" modifier.
      For example: "remote:///tmp/foo" -> "/tmp/foo".
    - URL to a s3 location begins with "s3://":
      Returns the s3 URL with credentials filled in using graphlab.aws.get_aws_credential().
      For example: "s3://mybucket/foo" -> "s3://$AWS_ACCESS_KEY_ID:$AWS_SECRET_ACCESS_KEY:mybucket/foo".
    - URL to other remote locations, e.g. http://, will remain as is.

    Parameters
    ----------
    string
        A URL (as described above).

    Raises
    ------
    ValueError
        If a bad url is provided.
    """
    if not url:
        raise ValueError('Invalid url: %s' % url)

    # The final file path on server.
    path_on_server = None

    # Try to split the url into (protocol, path).
    urlsplit = url.split("://")
    if len(urlsplit) == 2:
        protocol, path = urlsplit
        if not path:
            raise ValueError('Invalid url: %s' % url)
        if protocol in ['http', 'https']:
            # protocol is a remote url not on server, just return
            return url
        elif protocol == 'hdfs':
            if isinstance(glconnect.get_server(), server.LocalServer) and not server._get_hadoop_class_path():
                raise ValueError("HDFS URL is not supported because Hadoop not found. Please make hadoop available from PATH or set the environment variable HADOOP_HOME and try again.")
            else:
                return url
        elif protocol == 's3':
            if len(path.split(":")) == 3:
            # s3 url already contains secret key/id pairs, just return
                return url
            else:
            # s3 url does not contain secret key/id pair, query the environment variables
                (k, v) = _get_aws_credentials()
                return 's3://' + k + ':' + v + ':' + path
        elif protocol == 'remote':
        # url for files on the server
            path_on_server = path
        elif protocol == 'local':
        # url for files on local client, check if we are connecting to local server
            if (isinstance(glconnect.get_server(), server.LocalServer)):
                path_on_server = path
            else:
                raise ValueError('Cannot use local URL when connecting to a remote server.')
        else:
            raise ValueError('Invalid url protocol %s. Supported url protocols are: remote://, local://, s3://, https:// and hdfs://' % protocol)
    elif len(urlsplit) == 1:
        # url for files on local client, check if we are connecting to local server
        if (isinstance(glconnect.get_server(), server.LocalServer)):
            path_on_server = url
        else:
            raise ValueError('Cannot use local URL when connecting to a remote server.')
    else:
        raise ValueError('Invalid url: %s' % url)

    if path_on_server:
        return os.path.abspath(os.path.expanduser(path_on_server))
    else:
        raise ValueError('Invalid url: %s' % url)


def download_dataset(url_str, extract=True, force=False, output_dir="."):
    """Download a remote dataset and extract the contents.

    Parameters
    ----------

    url_str : string
        The URL to download from

    extract : bool
        If true, tries to extract compressed file (zip/gz/bz2)

    force : bool
        If true, forces to retry the download even if the downloaded file already exists.

    output_dir : string
        The directory to dump the file. Defaults to current directory.
    """
    fname = output_dir + "/" + url_str.split("/")[-1]
    #download the file from the web
    if not os.path.isfile(fname) or force:
        print "Downloading file from: ", url_str
        urllib.urlretrieve(url_str, fname)
        if extract and fname[-3:] == "zip":
            print "Decompressing zip archive", fname
            ZipFile(fname).extractall(output_dir)
        elif extract and fname[-6:] == ".tar.gz":
            print "Decompressing tar.gz archive", fname
            tarfile.TarFile(fname).extractall(output_dir)
        elif extract and fname[-7:] == ".tar.bz2":
            print "Decompressing tar.bz2 archive", fname
            tarfile.TarFile(fname).extractall(output_dir)
        elif extract and fname[-3:] == "bz2":
            print "Decompressing bz2 archive: ", fname
            outfile = open(fname.split(".bz2")[0], "w")
            print "Output file: ", outfile
            for line in bz2.BZ2File(fname, "r"):
                outfile.write(line)
            outfile.close()
    else:
        print "File is already downloaded."


__GLCREATE_CURRENT_VERSION_URL__ = "http://graphlab.com/files/glcreate_current_version"

def get_newest_version(timeout=5, _url=__GLCREATE_CURRENT_VERSION_URL__):
    """
    Returns the version of GraphLab Create currently available from graphlab.com.
    Will raise an exception if we are unable to reach the graphlab.com servers.

    timeout: int
        How many seconds to wait for the remote server to respond

    url: string
        The URL to go to to check the current version.
    """
    request = urllib2.urlopen(url=_url, timeout=timeout)
    version = request.read()
    __LOGGER__.debug("current_version read %s" % version)
    return version


def perform_version_check(configfile=(os.path.join(os.path.expanduser("~"), ".graphlab", "config")),
                          _url=__GLCREATE_CURRENT_VERSION_URL__,
                          _outputstream=sys.stderr):
    """
    Checks if currently running version of GraphLab is less than the version
    available from graphlab.com. Prints a message if the graphlab.com servers
    are reachable, and the current version is out of date. Does nothing
    otherwise.

    If the configfile contains a key "skip_version_check" in the Product
    section with non-zero value, this function does nothing.

    Also returns True if a message is printed, and returns False otherwise.
    """
    skip_version_check = False
    try:
        if (os.path.isfile(configfile)):
            import ConfigParser
            config = ConfigParser.ConfigParser()
            config.read(configfile)
            section = 'Product'
            key = 'skip_version_check'
            skip_version_check = config.getboolean(section, key)
            __LOGGER__.debug("skip_version_check=%s" % str(skip_version_check))
    except:
        # eat all errors
        pass

    # skip version check set. Quit
    if not skip_version_check:
        try:
            latest_version = get_newest_version(timeout=1, _url=_url).strip()
            if parse_version(latest_version) > parse_version(graphlab.version_info.version):
                msg = ("A newer version of GraphLab Create (v%s) is available! "
                       "Your current version is v%s.\n"
                       "You can use pip to upgrade the graphlab-create package. "
                       "For more information see http://graphlab.com/products/create/upgrade.") % (latest_version, graphlab.version_info.version)
                _outputstream.write(msg)
                return True
        except:
            # eat all errors
            pass
    return False


# Decorator which can be applied to any API function to avoid huge stack trace.
@decorator.decorator
def shallow_throw(func, *args, **kargs):
    try:
        return func(*args, **kargs)
    except Exception, error:
        raise error
