import StringIO
import base64
import httplib
import urllib2
import cgi
import re

class Document(object):
    __slots__ = ["attrs", "dtexts", "htexts", "kwords", "score",
                 "id"]
    def __init__(self, draft = None):
        self.attrs = {}
        self.dtexts = []
        self.htexts = []
        self.kwords = {}
        self.score = -1
        self.id = -1

        if draft:
            lines = draft.splitlines()
            splitPos = lines.index("", 1)
            for line in lines[:splitPos]:
                if line.startswith("%"):
                    if line.startswith("%VECTOR\t"):
                        fields = line[len("%VECTOR\t"):].split("\t")
                        keys = fields[::2]
                        values = fields[1::2]
                        self.kwords = dict(zip(keys, values))
                    elif line.startswith("%SCORE\t"):
                        self.score = int(line.split("\t")[1])
                else:
                    line = _normalize(line)
                    fields = line.split("=", 1)
                    if len(fields) == 2:
                        self.attrs[fields[0]] = fields[1]

            for line in lines[splitPos+1:]:
                if line:
                    if line[0] == "\t":
                        self.htexts.append(line[1:])
                    else:
                        self.dtexts.append(line)
                        

    def add_attr(self, name, value):
        name = _normalize(name)
        value = _normalize(value)
        self.attrs[name] = value

    def add_text(self, text):
        text = _normalize(text)
        if text:
            self.dtexts.append(text)
    
    def add_hidden_text(self, text):
        text = _normalize(text)
        if text:
            self.htexts.append(text)

    def set_keywords(self, kwords):
        self.kwords = kwords
    
    def set_score(self, score):
        self.score = score
        
    def attr(self, name):
        return self.attrs[name]

    def attr_names(self):
        l = self.attrs.keys()
        l.sort()
        return l
    
    def cat_texts(self):
        return " ".join(self.dtexts)

    def dump_draft(self):
        buf = StringIO.StringIO()
        for key in self.attr_names():
            buf.write("%s=%s\n" % (key, self.attrs[key]))
        if self.kwords:
            buf.write("%%VECTOR")
            for key, value in self.kwords.iteritems():
                buf.write("\t%s\t%s" % (key, value))
            buf.write("\n")
        if self.score >= 0:
            buf.write("%%SCORE\t\d\n", self.score)
        buf.write("\n")
        if self.dtexts:
            buf.write("\n".join(self.dtexts))
            buf.write("\n")
        if self.htexts:
            buf.write("\t")
            buf.write("\n\t".join(self.htexts))
            buf.write("\n")
        return buf.getvalue()



class Condition(object):
    # option: check every N-gram key
    SURE = 1 << 0
    # option: check N-gram keys skipping by one
    USUAL = 1 << 1
    # option: check N-gram keys skipping by two
    FAST = 1 << 2
    # option: check N-gram keys skipping by three
    AGITO = 1 << 3
    # option: without TF-IDF tuning
    NOIDF = 1 << 4
    # option: with the simplified phrase
    SIMPLE = 1 << 10
    # option: with the rough phrase
    ROUGH = 1 << 11
    # option: with the union phrase
    UNION = 1 << 15
    # option: with the intersection phrase
    ISECT = 1 << 16

    def __init__(self):
        self.phrase = None
        self.attrs = []
        self.order = None
        self.max = -1
        self.skip = 0
        self.options = 0
        self.auxiliary = 32
        self.distinct = None
        self.mask = 0

    def set_phrase(self, phrase):
        phrase = _normalize(phrase)
        self.phrase = phrase

    def add_attr(self, expr):
        expr = _normalize(expr)
        self.attrs.append(expr)
    
    def set_order(self, expr):
        expr = _normalize(expr)
        self.order = expr

    def set_max(self, max):
        if max >= 0:
            self.max = max

    def set_skip(self, skip):
        if skip >= 0:
            self.skip = skip

    def set_options(self, options):
        self.options |= options

    def set_auxiliary(self, min):
        self.auxiliary = min

    def set_distinct(self, name):
        name = _normalize(name)
        self.distinct = name

    def set_mask(self, mask):
        self.mask = mask

class ResultDocument(object):
    def __init__(self, uri, attrs, snippet, keywords):
        self.attrs = attrs
        self.uri = uri
        self.snippet = snippet
        self.keywords = keywords

    def attr_names(self):
        l = self.attrs.keys()
        l.sort()
        return l

    def attr(self, name):
        return self.attrs[name]

class NodeResult(object):
    def __init__(self, docs, hints):
        self.docs = docs
        self.hints = hints

    def doc_num(self):
        return len(docs)

    def get_doc(self, index):
        if index < 0 or index < self.docNum():
            return None
        return self.docs[index]

    def hint(self, key):
        return self.hints[key]


class Transport(object):
    def __init__(self):
        self.url = None
        self.pxhost = None
        self.pxport = 0
        self.timeout = 0
        self.auth = None
    
    def send(self, command, headers = {}, body=None, autoContentType = True):
        return self._send(command, headers, body)
        try:
            return self._send(command, headers, body)
        except urllib2.HTTPError, e:
            print e
            return e.code, None
        except Exception, e:
            print e
            return -1, None

    def _send(self, command, headers = {}, body=None, autoContentType=True):
        if not self.url:
            return -1, None
        url = self.url + "/" + command
        if not headers.has_key("Content-Type") and autoContentType:
            headers["Content-Type"] = "application/x-www-form-urlencoded"
        if self.auth:
            encodedAuth = base64.encodestring(self.auth).replace("\n", "")
            headers["Authorization"] = "Basic " + encodedAuth
        if body:
            headers["Content-Length"] = "%d" % len(body)
        headers["User-Agent"] = "HyperEstraierForPython/1.0.0"
        
        # TODO: set timeout
        request = urllib2.Request(url)
        if self.pxhost and self.pxport:
            request.set_proxy(self.pxhost, self.pxport)
        request.add_data(body)
        for k,v in headers.iteritems():
            request.add_header(k, v)

        response = urllib2.urlopen(request)
        result = response.read()
        code = response.code
        
        return code, result

class Node(object):
    def __init__(self):
        self.transport = Transport()
        self.status = 0
        self.name = None
        self.label = None
        self.dnum = -1
        self.wnum = -1
        self.size = -1.0
        self.admins = None
        self.users = None
        self.links = None
        self.wwidth = 480
        self.hwidth = 96
        self.awidth = 96

    def set_url(self, url):
        self.transport.url = url
    
    def set_proxy(self, host, port):
        self.transport.pxhost = host
        self.transport.pxport = port

    def set_timeout(self, sec):
        self.transport.timeout = sec

    def set_auth(self, name, password):
        self.transport.auth = ":".join((name, password))
    
    def sync(self):
        self.status = -1
        self.status, result = self.transport.send("sync")
        
        return self.status == 200

    def optimize(self):
        self.status = -1
        self.status, result = self.transport.send("optimize")
        
        return self.status == 200

    def put_doc(self, doc):
        self.status = -1
        self.status, result = self.transport.send("put_doc",
                                                  {"Content-Type":"text/x-estraier-draft"},
                                                  doc.dump_draft())
        
        return self.status == 200

    def out_doc(self, id):
        self.status = -1
        self.status, result = self.transport.send("out_doc",
                                                  body = "id=" + str(id))
        
        return self.status == 200

    def out_doc_by_uri(self, uri):
        self.status = -1
        body = "uri=" + cgi.escape(uri)
        self.status, result = self.transport.send("out_doc",
                                                  body = body)
        
        return self.status == 200

    def edit_doc(self, doc):
        self.status = -1
        self.status, result = self.transport.send("edit_doc",
                                                  {"Content-Type":"text/x-estraier-draft"},
                                                  doc.dump_draft())
        
        return self.status == 200

    def get_doc(self, id):
        self.status = -1
        body = "id=" + str(id)
        self.status, result = self.transport.send("get_doc", body = body)
        
        if self.status == 200:
            return Document(result)
        return None

    def get_doc_by_uri(self, uri):
        self.status = -1
        body = "uri=" + cgi.escape(uri)
        self.status, result = self.transport.send("get_doc", body = body)
        
        if self.status == 200:
            return Document(result)
        return None

    def get_doc_attr(self, id, name):
        self.status = -1
        body = "id=" + str(id) + "&attr=" + cgi.escape(name)
        self.status, result = self.transport.send("get_doc_attr", body = body)
        
        if self.status == 200:
            return result.strip()
        return None

    def get_doc_attr_by_uri(self, uri, name):
        self.status = -1
        body = "uri=" + cgi.escape(uri) + "&attr=" + cgi.escape(name)
        self.status, result = self.transport.send("get_doc_attr", body = body)
        
        if self.status == 200:
            return result.strip()
        return None

    def etch_doc(self, id):
        self.status = -1
        body = "id=" + str(id)
        self.status, result = self.transport.send("etch_doc", body = body)
        
        if self.status == 200:
            kwords = {}
            for line in result.splitlines():
                pair = line.split("\t")
                if len(pair) > 1:
                    kwords[pair[0]] = pair[1]
            return kwords
        return None

    def etch_doc_by_uri(self, uri):
        self.status = -1
        body = "uri=" + cgi.escape(uri)
        self.status, result = self.transport.send("etch_doc", body = body)
        
        if self.status == 200:
            kwords = {}
            for line in result.splitlines():
                pair = line.split("\t")
                if len(pair) > 1:
                    kwords[pair[0]] = pair[1]
            return kwords
        return None

    def uri_to_id(self, uri):
        self.status = -1
        body = "uri=" + cgi.escape(uri)
        self.status, result = self.transport.send("uri_to_id", body = body)
        
        if self.status == 200:
            return int(result)
        return None
    
    def get_name(self):
        if not self.name:
            self.set_info()
        return self.name
    
    def get_label(self):
        if not self.label:
            self.set_info()
        return self.label

    def get_doc_num(self):
        if self.dnum < 0:
            self.set_info()
        return self.dnum

    def get_word_num(self):
        if self.wnum < 0:
            self.set_info()
        return self.wnum

    def get_size(self):
        if self.size < 0:
            self.set_info()
        return self.size

    def get_cache_usage(self):
        self.status = -1
        self.status, result = self.transport.send("cacheusage",
                                                  autoContentType = False)
        if self.status != 200 or not result:
            return -1.0
        return float(result)

    def get_admins(self):
        if not self.admins:
            self.set_info()
        return self.admins

    def get_users(self):
        if not self.users:
            self.set_info()
        return self.users

    def get_links(self):
        if not self.links:
            self.set_info()
        return self.links

    def search(self, condition, depth = 0):
        self.status = -1
        body = _condToQuery(condition, depth,
                                self.wwidth, self.hwidth, self.awidth)
        self.status, result = self.transport.send("search", body=body)
        if self.status != 200 or not result:
            return None
        lines = result.splitlines()
        docs = []
        hints = {}
        nres = NodeResult(docs, hints)
        border = lines.pop(0)
        isend = False
        while len(lines):
            line = lines.pop(0)
            if line.startswith(border):
                if line[len(border)+1:] == ":END":
                    isend = True
                lines.insert(0, line)
                break
            elems = line.split("\t", 1)
            if len(elems) == 2:
                hints[elems[0]] = elems[1]

        while not isend and len(lines):
            line = lines.pop(0)
            if line.startswith(border):
                rdattrs = {}
                sb = []
                rdvector = ""
                while len(lines):
                    rdline = lines.pop(0).strip()
                    if not rdline:
                        break
                    if rdline.startswith("%"):
                        vecelem = rdline.split("\t", 1)
                        if len(vecelem) == 2 and vecelem[0] == "%VECTOR":
                            rdvector = vecelem[1]
                    else:
                        elems = rdline.split("=", 1)
                        if len(elems)==2:
                            rdattrs[elems[0]] = elems[1]
                while len(lines):
                    rdline = lines.pop(0)
                    if not rdline:
                        break
                    sb.append(rdline)
                rduri = rdattrs.setdefault("@uri", None)
                rdsnippet = "\n".join(sb)
                if rduri:
                    rdoc = ResultDocument(rduri, rdattrs, rdsnippet, rdvector)
                    docs.append(rdoc)
            if line.endswith(":END"):
                isend = True
                break
                

        if isend:
            return nres
        return None

    def set_snippet_width(self, wwidth, hwidth, awidth):
        self.wwidth = wwidth
        if hwidth >= 0:
            self.hwidth = hwidth
        if awidth >= 0:
            self.awidth = awidth

    def set_user(self, user, mode):
        self.status = -1
        body = "name=" + cgi.escape(name) + "&mode=" + str(mode)
        self.status, result = self.transport.send("_set_user", body=body)
        return self.status == 200

    def set_link(self, url, label, credit):
        self.status = -1
        body = "url=" + cgi.escape(url) + "&label=" + label
        if credit >= 0:
            body += "&credit=" + str(credit)
        self.status, result = self.transport.send("_set_link", body=body)
        return self.status == 200

    def set_info(self):
        self.status = -1
        self.status, result = self.transport.send("inform",
                                                  autoContentType = False)
        if self.status != 200 or not result:
            return
        lines = result.splitlines()
        line = lines.pop(0)
        elems = line.split("\t")
        if len(elems) != 5:
            return
        self.name = elems[0]
        self.label = elems[1]
        self.dnum = int(elems[2])
        self.wnum = int(elems[2])
        self.size = int(elems[2])
        

        self.admins = []
        self.users = []
        self.links = []
        currentScope = (self.admins, lambda x: x)
        remainQueue = [(self.users, lambda x: x),
                       (self.links, lambda x: x.split("\t", 3))]
        for line in lines:
            if not line:
                if len(remainQueue):
                    currentScope = remainQueue.pop(0)
                else:
                    break
            else:
                currentScope[0].append(currentScope[1](line))


SNIP_PATTERN = re.compile("[ \t\r\n\v\f]+")
SPC_PATTERN = re.compile(" +")
def _normalize(value):
    value = SNIP_PATTERN.sub(" ", value)
    value = SPC_PATTERN.sub(" ", value)
    return value

def _condToQuery(cond, depth, wwidth, hwidth, awidth):
    buf = []
    if cond.phrase:
        buf.append(("phrase", cgi.escape(cond.phrase)))
    for i in range(0, len(cond.attrs)):
        buf.append(("attr" + str(i+1), cgi.escape(cond.attrs[i])))
    if cond.order:
        buf.append(("order", cgi.escape(cond.order)))
    if cond.max >= 0:
        buf.append(("max", str(cond.max)))
    else:
        buf.append(("max", str(1<<30)))
    if cond.options > 0:
        buf.appends(("options", str(cond.options)))
    buf.append(("auxiliary", str(cond.auxiliary)))
    if cond.distinct:
        buf.append(("distinct", cgi.escape(cond.distinct)))
    if depth>0:
        buf.append(("depth", str(depth)))
    buf.append(("wwidth", str(wwidth)))
    buf.append(("hwidth", str(hwidth)))
    buf.append(("awidth", str(awidth)))
    buf.append(("skip", str(cond.skip)))
    buf.append(("mask", str(cond.mask)))
    
    return "&".join(["%s=%s" % (e[0], e[1]) for e in buf])
