#include <Python.h>
#include <math.h>
#include "jellyfish.h"

struct jellyfish_state {
    PyObject *unicodedata_normalize;
};

#if PY_MAJOR_VERSION >= 3
#define GETSTATE(m) ((struct jellyfish_state*)PyModule_GetState(m))
#else
#define GETSTATE(m) (&_state)
static struct jellyfish_state _state;
#endif

#if PY_MAJOR_VERSION >= 3
#define UTF8_BYTES(s) (PyBytes_AS_STRING(s))
#else
#define UTF8_BYTES(s) (PyString_AS_STRING(s))
#endif

/* Returns a new reference to a PyString (python < 3) or
 * PyBytes (python >= 3.0).
 *
 * If passed a PyUnicode, the returned object will be NFKD UTF-8.
 * If passed a PyString or PyBytes no conversion is done.
 */
static inline PyObject* normalize(PyObject *mod, PyObject *pystr) {
    PyObject *unicodedata_normalize;
    PyObject *normalized;
    PyObject *utf8;

#if PY_MAJOR_VERSION < 3
    if (PyString_Check(pystr)) {
        Py_INCREF(pystr);
        return pystr;
    }
#else
    if (PyBytes_Check(pystr)) {
        Py_INCREF(pystr);
        return pystr;
    }
#endif

    if (PyUnicode_Check(pystr)) {
        unicodedata_normalize = GETSTATE(mod)->unicodedata_normalize;
        normalized = PyObject_CallFunction(unicodedata_normalize,
                                           "sO", "NFKD", pystr);
        if (!normalized) {
            return NULL;
        }
        utf8 = PyUnicode_AsUTF8String(normalized);
        Py_DECREF(normalized);
        return utf8;
    }

    PyErr_SetString(PyExc_TypeError, "expected str or unicode");
    return NULL;
}

static PyObject * jellyfish_jaro_winkler(PyObject *self, PyObject *args)
{
    const char *s1, *s2;
    double result;

    if (!PyArg_ParseTuple(args, "ss", &s1, &s2)) {
        return NULL;
    }

    result = jaro_winkler(s1, s2, false);
    if (isnan(result)) {
        PyErr_NoMemory();
        return NULL;
    }

    return Py_BuildValue("d", result);
}

static PyObject * jellyfish_jaro_distance(PyObject *self, PyObject *args)
{
    const char *s1, *s2;
    double result;

    if (!PyArg_ParseTuple(args, "ss", &s1, &s2)) {
        return NULL;
    }

    result = jaro_distance(s1, s2);
    if (isnan(result)) {
        PyErr_NoMemory();
        return NULL;
    }

    return Py_BuildValue("d", result);
}

static PyObject * jellyfish_hamming_distance(PyObject *self, PyObject *args)
{
    const char *s1, *s2;
    unsigned result;

    if (!PyArg_ParseTuple(args, "ss", &s1, &s2)) {
        return NULL;
    }

    result = hamming_distance(s1, s2);

    return Py_BuildValue("I", result);
}

static PyObject* jellyfish_levenshtein_distance(PyObject *self, PyObject *args)
{
    const char *s1, *s2;
    int result;

    if (!PyArg_ParseTuple(args, "ss", &s1, &s2)) {
        return NULL;
    }

    result = levenshtein_distance(s1, s2);
    if (result == -1) {
        // levenshtein_distance only returns failure code (-1) on
        // failed malloc
        PyErr_NoMemory();
        return NULL;
    }

    return Py_BuildValue("i", result);
}

static PyObject* jellyfish_damerau_levenshtein_distance(PyObject *self,
                                                     PyObject *args)
{
    const char *s1, *s2;
    int result;

    if (!PyArg_ParseTuple(args, "ss", &s1, &s2)) {
        return NULL;
    }

    result = damerau_levenshtein_distance(s1, s2);
    if (result == -1) {
        PyErr_NoMemory();
        return NULL;
    }

    return Py_BuildValue("i", result);
}

static PyObject* jellyfish_soundex(PyObject *self, PyObject *args)
{
    PyObject *pystr;
    PyObject *normalized;
    PyObject* ret;
    char *result;

    if (!PyArg_ParseTuple(args, "O", &pystr)) {
        return NULL;
    }

    normalized = normalize(self, pystr);
    if (!normalized) {
        return NULL;
    }

    result = soundex(UTF8_BYTES(normalized));
    Py_DECREF(normalized);

    if (!result) {
        // soundex only fails on bad malloc
        PyErr_NoMemory();
        return NULL;
    }

    ret = Py_BuildValue("s", result);
    free(result);

    return ret;
}

static PyObject* jellyfish_metaphone(PyObject *self, PyObject *args)
{
    PyObject *pystr;
    PyObject *normalized;
    PyObject *ret;
    char *result;

    if (!PyArg_ParseTuple(args, "O", &pystr)) {
        return NULL;
    }

    normalized = normalize(self, pystr);
    if (!normalized) {
        return NULL;
    }

    result = metaphone((const char*)UTF8_BYTES(normalized));
    Py_DECREF(normalized);

    if (!result) {
        // metaphone only fails on bad malloc
        PyErr_NoMemory();
        return NULL;
    }

    ret = Py_BuildValue("s", result);
    free(result);

    return ret;
}

static PyObject* jellyfish_match_rating_codex(PyObject *self, PyObject *args)
{
    const char *str;
    char *result;
    PyObject *ret;

    if (!PyArg_ParseTuple(args, "s", &str)) {
        return NULL;
    }

    result = match_rating_codex(str);
    if (!result) {
        PyErr_NoMemory();
        return NULL;
    }

    ret = Py_BuildValue("s", result);
    free(result);

    return ret;
}

static PyObject* jellyfish_match_rating_comparison(PyObject *self,
                                                   PyObject *args)
{
    const char *str1, *str2;
    int result;

    if (!PyArg_ParseTuple(args, "ss", &str1, &str2)) {
        return NULL;
    }

    result = match_rating_comparison(str1, str2);
    if (result == -1) {
        PyErr_NoMemory();
        return NULL;
    }

    if (result) {
        Py_RETURN_TRUE;
    } else {
        Py_RETURN_FALSE;
    }
}

static PyObject* jellyfish_nysiis(PyObject *self, PyObject *args)
{
    const char *str;
    char *result;
    PyObject *ret;

    if (!PyArg_ParseTuple(args, "s", &str)) {
        return NULL;
    }

    result = nysiis(str);
    if (!result) {
        PyErr_NoMemory();
        return NULL;
    }

    ret = Py_BuildValue("s", result);
    free(result);

    return ret;
}

static PyObject* jellyfish_porter_stem(PyObject *self, PyObject *args)
{
    const char *str;
    char *result;
    PyObject *ret;
    struct stemmer *z;
    int end;

    if (!PyArg_ParseTuple(args, "s", &str)) {
        return NULL;
    }

    z = create_stemmer();
    if (!z) {
        PyErr_NoMemory();
        return NULL;
    }

    result = strdup(str);
    if (!result) {
        free_stemmer(z);
        PyErr_NoMemory();
        return NULL;
    }

    end = stem(z, result, strlen(result) - 1);
    result[end + 1] = '\0';

    ret = Py_BuildValue("s", result);

    free(result);
    free_stemmer(z);

    return ret;
}

static PyMethodDef jellyfish_methods[] = {
    {"jaro_winkler", jellyfish_jaro_winkler, METH_VARARGS,
     "jaro_winkler(string1, string2)\n\n"
     "Do a Jaro-Winkler string comparison between string1 and string2."},

    {"jaro_distance", jellyfish_jaro_distance, METH_VARARGS,
     "jaro_distance(string1, string2)\n\n"
     "Get a Jaro string distance metric for string1 and string2."},

    {"hamming_distance", jellyfish_hamming_distance, METH_VARARGS,
     "hamming_distance(string1, string2)\n\n"
     "Compute the Hamming distance between string1 and string2."},

    {"levenshtein_distance", jellyfish_levenshtein_distance, METH_VARARGS,
     "levenshtein_distance(string1, string2)\n\n"
     "Compute the Levenshtein distance between string1 and string2."},

    {"damerau_levenshtein_distance", jellyfish_damerau_levenshtein_distance,
     METH_VARARGS,
     "damerau_levenshtein_distance(string1, string2)\n\n"
     "Compute the Damerau-Levenshtein distance between string1 and string2."},

    {"soundex", jellyfish_soundex, METH_VARARGS,
     "soundex(string)\n\n"
     "Calculate the soundex code for a given name."},

    {"metaphone", jellyfish_metaphone, METH_VARARGS,
     "metaphone(string)\n\n"
     "Calculate the metaphone representation of a given string."},

    {"match_rating_codex", jellyfish_match_rating_codex, METH_VARARGS,
     "match_rating_codex(string)\n\n"
     "Calculate the Match Rating Approach representation of a given string."},

    {"match_rating_comparison", jellyfish_match_rating_comparison, METH_VARARGS,
     "match_rating_comparison(string, string)\n\n"
     "Compute the Match Rating Approach similarity between string1 and"
     "string2."},

    {"nysiis", jellyfish_nysiis, METH_VARARGS,
     "nysiis(string)\n\n"
     "Compute the NYSIIS (New York State Identification and Intelligence\n"
     "System) code for a string."},

    {"porter_stem", jellyfish_porter_stem, METH_VARARGS,
     "porter_stem(string)\n\n"
     "Return the result of running the Porter stemming algorithm on "
     "a single-word string."},

    {NULL, NULL, 0, NULL}
};

#if PY_MAJOR_VERSION >= 3
#define INITERROR return NULL

static struct PyModuleDef moduledef = {
    PyModuleDef_HEAD_INIT,
    "strfry",
    NULL,
    sizeof(struct jellyfish_state),
    jellyfish_methods,
    NULL,
    NULL,
    NULL,
    NULL
};

PyObject* PyInit_jellyfish(void)
#else

#define INITERROR return

PyMODINIT_FUNC initjellyfish(void)
#endif
{
    PyObject *unicodedata;

#if PY_MAJOR_VERSION >= 3
    PyObject *module = PyModule_Create(&moduledef);
#else
    PyObject *module = Py_InitModule("jellyfish", jellyfish_methods);
#endif

    if (module == NULL) {
        INITERROR;
    }

    unicodedata = PyImport_ImportModule("unicodedata");
    if (!unicodedata) {
        INITERROR;
    }

    GETSTATE(module)->unicodedata_normalize =
        PyObject_GetAttrString(unicodedata, "normalize");
    Py_DECREF(unicodedata);

#if PY_MAJOR_VERSION >= 3
    return module;
#endif
}
