#!/usr/bin/env python
import StringIO
import codecs
import gzip
import sys

try:
    from urlparse import parse_qs  # Python 2.6+
except ImportError:
    from cgi import parse_qs

import web    


__all__ = [
    "cut",
    "strip_bom",
    "split_by_token",

    "get_wsgi_arg",
]


def cut(src, start_token, end_token=None):
    """ Return sub of src from start_token to end_token.
      >>> src = "blah=foobar;"
      >>> cut(src, "blah=", ";")
      'foobar'
      >>> cut('tel:13800013800', 'tel:')
      '13800013800'
      >>> cut("X: 600", "", ": ")
      'X'
      >>> cut("X: 600", ": ")
      '600'
    """
    start_pos = src.find(start_token)
    if start_pos == -1:
        return src

    buf = src[start_pos + len(start_token):]

    if end_token:
        end_pos = buf.find(end_token)
        return buf[:end_pos]
    else:
        return buf


def strip_bom(text):
    if web.safestr(text[:3]) == codecs.BOM_UTF8:
        text = text[3:]

    return text


def gzip_decompress_string(res):
    """
    not test yet
    """
    gz = gzip.GzipFile(fileobj = StringIO.StringIO(res), mode = "r")
    return gz.read()


def split_by_token(str, token):
    """
    >>> split_by_token('uname -a', '|')
    ['uname -a']

    >>> split_by_token("file --mime-type /tmp/CFXn.jpeg | awk '{print $2}'", '|')
    ['file --mime-type /tmp/CFXn.jpeg', "awk '{print $2}'"]
    """
    buf = str[:]
    splits = []

    while buf.find(token) != -1:
        end = buf.index(token)
        s = buf[: end]
        splits.append(s.strip())
        buf = buf[end + 1:]

    splits.append(buf.strip())

    return splits

# From Tornado 2.3, tornado/escape.py
# python 3 changed things around enough that we need two separate
# implementations of url_unescape.  We also need our own implementation
# of parse_qs since python 3's version insists on decoding everything.
if sys.version_info[0] < 3:
    def url_unescape(value, encoding='utf-8'):
        """Decodes the given value from a URL.

        The argument may be either a byte or unicode string.

        If encoding is None, the result will be a byte string.  Otherwise,
        the result is a unicode string in the specified encoding.
        """
        if encoding is None:
            return urllib.unquote_plus(utf8(value))
        else:
            return unicode(urllib.unquote_plus(utf8(value)), encoding)

    parse_qs_bytes = parse_qs
else:
    def url_unescape(value, encoding='utf-8'):
        """Decodes the given value from a URL.

        The argument may be either a byte or unicode string.

        If encoding is None, the result will be a byte string.  Otherwise,
        the result is a unicode string in the specified encoding.
        """
        if encoding is None:
            return urllib.parse.unquote_to_bytes(value)
        else:
            return urllib.unquote_plus(to_basestring(value), encoding=encoding)

    def parse_qs_bytes(qs, keep_blank_values=False, strict_parsing=False):
        """Parses a query string like urlparse.parse_qs, but returns the
        values as byte strings.

        Keys still become type str (interpreted as latin1 in python3!)
        because it's too painful to keep them as byte strings in
        python3 and in practice they're nearly always ascii anyway.
        """
        # This is gross, but python3 doesn't give us another way.
        # Latin1 is the universal donor of character encodings.
        result = parse_qs(qs, keep_blank_values, strict_parsing,
                          encoding='latin1', errors='strict')
        encoded = {}
        for k,v in result.iteritems():
            encoded[k] = [i.encode('latin1') for i in v]
        return encoded

def get_wsgi_arg(env, name):
    args = parse_qs_bytes(env["QUERY_STRING"])
    val = args.get(name)
    if val:
        return val[-1]
    return None


if __name__ == "__main__":
    import doctest
    doctest.testmod()
