#!/usr/bin/env python3

# Copyright 2012-4, Sean B. Palmer
# Source: http://inamidst.com/saxo/

import re
import saxo
import time

regex_twitter_username = re.compile(r"^[a-zA-Z0-9_]{1,15}$")
regex_twitter_link = re.compile(r"^https?://twitter.com/\S+$")
regex_twitter_p = re.compile(r"(?ims)(<p class=\"js-tweet-text.*?</p>)")
regex_twitter_div = re.compile(r'(?ims)<div class="tweet-text".*?</div></div>')
regex_twitter_tag = re.compile(r"(?ims)<[^>]+>")
regex_twitter_anchor = re.compile(r"(?ims)(<a.*?</a>)")
regex_twitter_exp = re.compile(r"(?ims)data-url=[\"'](.*?)[\"']")
regex_twitter_whiteline = re.compile(r"(?ims)[ \t]+[\r\n]+")
regex_twitter_breaks = re.compile(r"(?ims)[\r\n]+")
regex_twitter_b = re.compile(r"(?ims)<b>(.+?)</b>")
regex_twitter_follow = re.compile(r'action="/([^/]+)/follow"')
regex_twitter_canonical = \
    re.compile(r'rel="canonical" href="https?://twitter.com/([^/\">]+)')

def page(url, username=None):
    if "/status" in url:
        url = url.replace("//twitter", "//mobile.twitter")

    page = saxo.request(url, follow=True)
    text = page["text"]

    username = None
    retweeted = None
    tweet = None

    if not username:
        username = "?"
        for username in regex_twitter_canonical.findall(text):
            username = username
            break
        if username == "?":
            for username in regex_twitter_follow.findall(text):
                username = username
                break

    shims = ['<div class="main-tweet-container">',
             '<div class="content clearfix">']
    for shim in shims:
        if shim in text:
            text = text.split(shim, 1).pop()

    def expand(tweet):
        def replacement(match):
            anchor = match.group(1)
            for link in regex_twitter_exp.findall(anchor):
                return link
            return regex_twitter_tag.sub("", anchor)
        return regex_twitter_anchor.sub(replacement, tweet)

    for paragraph in regex_twitter_p.findall(text):
        preamble = text.split('p class="js-tweet-text', 1)[0][-512:]
        for rt in regex_twitter_b.findall(preamble):
            if rt != username:
                retweeted = rt

        paragraph = expand(paragraph)
        paragraph = regex_twitter_tag.sub("", paragraph)
        paragraph = paragraph.strip()
        paragraph = regex_twitter_whiteline.sub(" ", paragraph)
        tweet = regex_twitter_breaks.sub(" ", paragraph)
        break

    for div in regex_twitter_div.findall(text):
        div = div.split(">", 1).pop()
        div = expand(div)
        div = regex_twitter_tag.sub("", div)
        div = div.strip()
        div = regex_twitter_whiteline.sub(" ", div)
        tweet = regex_twitter_breaks.sub(" ", div)
        break

    if not tweet:
        raise Exception("Couldn't get a tweet from %s" % page["url"])
    return username, retweeted, tweet

def format_tweet(username, retweeted, tweet):
    if retweeted:
        return "%s (@%s, RT @%s)" % (tweet, username, retweeted)
    return "%s (@%s)" % (tweet, username)

def get_tweet(username=None, tweet=None, url=None):
    if username:
        url = "https://twitter.com/" + username + "?" + str(time.time())
        username, retweeted, tweet = page(url, username=username)
        return format_tweet(username, retweeted, tweet)

    elif tweet:
        url = "https://twitter.com/twitter/status/" + tweet
        username, retweeted, tweet = page(url)
        return format_tweet(username, retweeted, tweet)

    elif url:
        username, retweeted, tweet = page(url)
        return format_tweet(username, retweeted, tweet)

    raise ValueError("Needed username, id, or url")

@saxo.pipe
def tw(arg):
    if not arg:
        arg = saxo.env("url")
        if arg is None:
            return "Show a tweet from a link, username, or tweet id"

    if arg.startswith("@"):
        arg = arg[1:]

    if set(arg) <= set("0123456789"):
        tweet = get_tweet(tweet=arg)
    elif regex_twitter_username.match(arg):
        tweet = get_tweet(username=arg)
    elif regex_twitter_link.match(arg):
        tweet = get_tweet(url=arg)
    else:
        return "Expected a link, a username, or a tweet id"

    return tweet
