#!/usr/bin/env python3

# Copyright 2009-13, Sean B. Palmer
# Source: http://inamidst.com/saxo/

import re
import urllib.parse
import saxo

uri = "http://en.wiktionary.org/w/index.php?title=%s&printable=yes"
r_tag = re.compile(r"<[^>]+>")
r_ul = re.compile(r"(?ims)<ul>.*?</ul>")

def text(html):
    text = r_tag.sub("", html).strip()
    text = text.replace("\n", " ")
    text = text.replace("\r", "")
    text = text.replace("(intransitive", "(intr.")
    text = text.replace("(transitive", "(trans.")
    return text

def wiktionary(word):
    page = saxo.request(uri % urllib.parse.quote(word))
    te = r_ul.sub("", page["text"])

    mode = None
    etymology = None
    definitions = {}
    for line in te.splitlines():
        if 'id="Etymology"' in line:
            mode = "etymology"
        elif 'id="Noun"' in line:
            mode = "noun"
        elif 'id="Verb"' in line:
            mode = "verb"
        elif 'id="Adjective"' in line:
            mode = "adjective"
        elif 'id="Adverb"' in line:
            mode = "adverb"
        elif 'id="Interjection"' in line:
            mode = "interjection"
        elif 'id="Particle"' in line:
            mode = "particle"
        elif 'id="Preposition"' in line:
            mode = "preposition"
        elif 'id="' in line:
            mode = None

        elif (mode == "etmyology") and ("<p>" in line):
            etymology = text(line)
        elif (mode is not None) and ("<li>" in line):
            definitions.setdefault(mode, []).append(text(line))

        if "<hr" in line:
            break
    return etymology, definitions

parts = ("preposition", "particle", "noun", "verb",
         "adjective", "adverb", "interjection")

def format(word, definitions, number=2):
    result = word
    for part in parts:
        if part in definitions:
            defs = definitions[part][:number]
            result += " \u2014 " + ("%s: " % part)
            n = ["%s. %s" % (i + 1, e.strip(" .")) for i, e in enumerate(defs)]
            result += ", ".join(n)
    return result.strip(" .,")

@saxo.pipe
def w(word):
    if not word:
        return "Nothing to define"

    etymology, definitions = wiktionary(word)
    if not definitions:
        return "Couldn't get any definitions for %s" % word

    result = format(word, definitions)
    if len(result) < 150:
        result = format(word, definitions, 3)
    if len(result) < 150:
        result = format(word, definitions, 5)

    if len(result) > 300:
        result = result[:295] + '[...]'
    return result
