#!/usr/bin/env python3

# http://inamidst.com/saxo/
# Created by Sean B. Palmer

import os
import saxo

surrogates = {"D800", "DB7F", "DB80", "DBFF", "DC00", "DFFF"}

def create_table(db):
    db["saxo_unicode"].create(
        ("hexcode", str),
        ("codepoint", int),
        ("name", str),
        ("current", str),
        ("ancient", str),
        ("category", str),
        ("character", str),
        ("display", str))

def populate_table_web(db):
    url = "http://www.unicode.org/Public/UNIDATA/UnicodeData.txt"
    page = saxo.request(url)

    for line in page["text"].splitlines():
        a, b, c, d, e, f, g, h, i, j, k, l, m, n, o = line.split(";")

        hexcode = a
        category = c
        codepoint = int(a, 16)

        # Skip surrogates
        if a in surrogates:
            character = ""
        else:
            character = chr(codepoint)

        if c.startswith("M"):
            # TODO: Just Mn?
            display = "\u25CC" + character
        elif c.startswith("C") and not c.endswith("o"):
            # Co is Private_Use, allow those
            if 0 <= codepoint <= 0x1F:
                display = chr(codepoint + 0x2400)
            else:
                display = "<%s>" % c
        else:
            display = character

        if b != "<control>":
            name = b
        else:
            name = k or b
        current = b
        ancient = k

        db["saxo_unicode"].insert((hexcode, codepoint, name, current,
            ancient, category, character, display), commit=False)

    db.commit()

def delete_table(db):
    del db["saxo_unicode"]

def say(msg):
    sender = saxo.env("sender")
    saxo.client("send", "PRIVMSG", sender, msg)

@saxo.command(authorised=True, private=True)
def update_unicode_data(irc):
    path = os.path.join(saxo.env("base"), "database.sqlite3")
    with saxo.database(path) as db:
        if "saxo_unicode" in db:
            delete_table(db)
        create_table(db)
        say("Downloading UnicodeData.txt from unicode.org...")
        populate_table_web(db)
    return "Updated saxo_unicode in database.sqlite3"
