#!/usr/bin/env python3

# http://inamidst.com/saxo/
# Created by Sean B. Palmer

import re
import time

import saxo

regex_twitter_username = re.compile(r"^[a-zA-Z0-9_]{1,15}$")
regex_twitter_link = re.compile(r"^https?://twitter.com/\S+$")
regex_twitter_p = re.compile(r"(?ims)(<p class=\"js-tweet-text.*?</p>)")
regex_twitter_div = re.compile(r'(?ims)<div class="tweet-text".*?</div></div>')
regex_twitter_tag = re.compile(r"(?ims)<[^>]+>")
regex_twitter_anchor = re.compile(r"(?ims)(<a.*?</a>)")
regex_twitter_exp = re.compile(r"(?ims)data-url=[\"'](.*?)[\"']")
regex_twitter_whiteline = re.compile(r"(?ims)[ \t]+[\r\n]+")
regex_twitter_breaks = re.compile(r"(?ims)[\r\n]+")
regex_twitter_b = re.compile(r"(?ims)<b>(.+?)</b>")
regex_twitter_follow = re.compile(r'action="/([^/]+)/follow"')
regex_twitter_canonical = \
    re.compile(r'rel="canonical" href="https?://twitter.com/([^/\">]+)')
tco = '<span class="tco-ellipsis"><span class="invisible">\xA0</span>…</span>'

class NotFound(Exception):
    ...

def page(url, username=None):
    page = saxo.request(url, follow=True)
    if page["status"] == 404:
        raise NotFound("No such tweet")
    text = page["text"]
    text = text.replace(tco, "")

    username = None
    retweeted = None
    tweet = None

    if not username:
        username = "?"
        for username in regex_twitter_canonical.findall(text):
            username = username
            break
        if username == "?":
            for username in regex_twitter_follow.findall(text):
                username = username
                break

    shims = ['<div class="main-tweet-container">',
             '<div class="content clearfix">']
    for shim in shims:
        if shim in text:
            text = text.split(shim, 1).pop()

    def expand(tweet):
        def replacement(match):
            anchor = match.group(1)
            for link in regex_twitter_exp.findall(anchor):
                return link
            return regex_twitter_tag.sub("", anchor)
        return regex_twitter_anchor.sub(replacement, tweet)

    for paragraph in regex_twitter_p.findall(text):
        preamble = text.split('p class="js-tweet-text', 1)[0][-512:]
        for rt in regex_twitter_b.findall(preamble):
            if rt != username:
                retweeted = rt

        paragraph = expand(paragraph)
        paragraph = regex_twitter_tag.sub("", paragraph)
        paragraph = paragraph.strip()
        paragraph = regex_twitter_whiteline.sub(" ", paragraph)
        tweet = regex_twitter_breaks.sub(" ", paragraph)
        break

    for div in regex_twitter_div.findall(text):
        div = div.split(">", 1).pop()
        div = expand(div)
        div = regex_twitter_tag.sub("", div)
        div = div.strip()
        div = regex_twitter_whiteline.sub(" ", div)
        tweet = regex_twitter_breaks.sub(" ", div)
        break

    if not tweet:
        raise Exception("Couldn't get a tweet from %s" % page["url"])
    return username, retweeted, tweet

def format_tweet(username, retweeted, tweet):
    if retweeted:
        return "%s (@%s, RT @%s)" % (tweet, username, retweeted)
    return "%s (@%s)" % (tweet, username)

def get_tweet_basic(username=None, tweet=None, url=None):
    if username:
        page = saxo.request("http://dpk.io/services/tw/" + username)
        return page["text"]

    elif tweet:
        url = "https://twitter.com/twitter/status/" + tweet
        username, retweeted, tweet = page(url)
        return format_tweet(username, retweeted, tweet)

    elif url:
        username, retweeted, tweet = page(url)
        return format_tweet(username, retweeted, tweet)

    raise ValueError("Needed username, id, or url")

def get_tweet(username=None, tweet=None, url=None):
    try: return get_tweet_basic(username=username, tweet=tweet, url=url)
    except NotFound:
        return "No such tweet"

@saxo.pipe
def tw(arg):
    if not arg:
        arg = saxo.env("url")
        if arg is None:
            return "Show a tweet from a link, username, or tweet id"

    if arg.startswith("@"):
        arg = arg[1:]

    if set(arg) <= set("0123456789"):
        tweet = get_tweet(tweet=arg)
    elif regex_twitter_username.match(arg):
        tweet = get_tweet(username=arg)
    elif regex_twitter_link.match(arg):
        tweet = get_tweet(url=arg)
    else:
        return "Expected a link, a username, or a tweet id"

    return tweet
