/**
 * C version of Pegasus invocation record parser
 * Implementation file.
 */

#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/time.h>
#include "ks.h"

#ifndef WITH_MAIN
#define WITH_MAIN 0
#endif

/* global returned event string */
char ks_event[4096];

/* error handling variables */
static char err_buf[4096];
static int err_flag = 0;

/*
 * Report error and jump to 'error' label
 */
#define NL_KS_ERROR(MSG) do {                           \
        sprintf(err_buf,"Parse error: %s\n", (MSG));    \
        err_flag = 1;                                   \
        goto error;                                     \
    } while(0)


/* function declarations */
static int parse_one_event(const char *buf);

/**
 * Extract one-event data from a buffer.
 */
char *parseBuffer(const char *buf) 
{
    err_flag = 0; /* set global error flag */
    
    return parse_one_event(buf) == 0 ? ks_event : NULL;
}

/*
 * Find position and length of value part of
 * key="value" in buf.
 * Return NULL if not found, else return pointer to
 * start of value (len will have length of value).
 */
static inline
char *find_nvp_value(const char *buf, const char *key, int *len)
{
    char *s;
    char *vstart, *vend;
    int klen = strlen(key);

    if ((s = strstr(buf, key)) == NULL)
        goto not_found;
    /* make sure '=' follows key */
    if (*(s + klen) != '=')
        goto not_found;
    /* assume key="value"; look for close-quote */
    vstart = s + klen + 2;
    if ((vend = strchr(vstart, '"')) == NULL)
        goto not_found;
    /* calculate value length */
    *len = vend - vstart;
    /* return offset of value's start in buffer */
    return vstart;
 not_found:
    return NULL;
}

/* 
 * Assign value of attribute KEY found in BUF to SLOT.
 * If not found, set first character of SLOT to NUL.
 */
#define ADD_ATTR_OLD(BUF, KEY, SLOT) do {                               \
    char *x;                                                            \
    if ((x = find_nvp_value(BUF, KEY, &value_len)) != NULL) {           \
        memcpy((SLOT), x, value_len);                                   \
        *((SLOT) + value_len) = '\0';                                   \
    }                                                                   \
    else {                                                              \
        (SLOT)[0] = '\0';                                               \
    }                                                                   \
} while(0)


/* 
 * If KEY is found on the left-hand side of an XML attribute
 * KEY="VALUE" then add to ks_event at offs the pair NAME=VALUE and
 * modify offs to the new end of buffer (saving the old position in save_offs).
 *
 * If not found, do nothing.
 */
#define ADD_ATTR(BUF, KEY, NAME, NAMELEN) do {                          \
        char *x;                                                        \
        if ((x = find_nvp_value(BUF, KEY, &value_len)) != NULL) {       \
            char *b = ks_event + offs;                                  \
            memcpy(b, NAME, NAMELEN);                                   \
            *(b + NAMELEN) = '=';                                       \
            memcpy(b + NAMELEN + 1, x, value_len);                      \
            *(b + NAMELEN + value_len + 1) = ' ';                       \
            save_offs = offs;                                           \
            offs += NAMELEN + value_len + 2;                            \
            attrval_p = b + NAMELEN + 1;                               \
            attrval_len = value_len;                                    \
        }                                                               \
} while(0)

/*
 * Extract one-event data from a buffer.
 *
 * Returns 0 if it went OK, and no failed statcalls
 * Returns -1 on error.
 */
int parse_one_event(const char *buf)
{
    int statcall_err; /* flag for error under statcall element */
    int offs, save_offs; /* offset in output buffer */
    char *attrval_p; /* value of last attribute added with ADD_ATTR macro */
    int attrval_len; /* length of attrval_p value */
    char *p; /* generic position in buffer */
    char *ts_p; /* timestamp value pointer */
    int ts_len;
    char *root_p; /* position of attrs after <invocation> tag */
    char *root_end_p; /* position of end of <invocation> tag */
    char *attr_p; /* position of an XML attribute */
    int value_len; /* generic value length */
    char *mainjob_p; /* position of attrs after 'mainjob' tag */
    char *status_p; /* position of attrs after 'status' tag */
    char *usage_p; /* position of attrs after 'usage' tag */
    char *regular_p; /* position of attrs after 'regular' tag */
    char *statcall_p; /* position of statcall tag */

    offs = 0;
    /* find outer tag */
    if ((p = strstr(buf, "<invocation")) == NULL)
        NL_KS_ERROR("no invocation element");
    root_p = p + 11;
    /* NUL-terminate tag to speed up not-found attr. searches */
    if ((root_end_p = strchr(root_p, '>')) == NULL)
        NL_KS_ERROR("no '>' at end of root tag");
    *root_end_p = '\0';
    /* get value for 'ts' */
    ADD_ATTR(root_p, "start", "ts", 2);
    if (offs == save_offs)
        NL_KS_ERROR("no 'start={DATE}' attr");    
    /* save timestamp */
    ts_p = attrval_p, ts_len = attrval_len;
    /* add event name and level */
    memcpy(ks_event + offs, "event=pegasus.invocation level=Info ", 36);
    offs += 36;
    /* add duration attribute */
    ADD_ATTR(root_p, "duration", "duration", 8);
    if (offs == save_offs)
        NL_KS_ERROR("no duration attribute under root element");
    /* add optional attributes */    
    ADD_ATTR(root_p, "hostname", "host", 4);
    ADD_ATTR(root_p, "user", "user", 4);
    ADD_ATTR(root_p, "transformation", "type", 4);
    ADD_ATTR(root_p, "wf-label", "workflow.id", 11);
    /* find mainjob tag */
    if ((p = strstr(root_end_p + 1, "<mainjob")) == NULL)
        NL_KS_ERROR("no mainjob element");
    mainjob_p = p + 8;
    /* find mainjob/usage tag */
    if ((p = strstr(mainjob_p, "<usage")) == NULL)
        NL_KS_ERROR("no usage element under mainjob element");
    usage_p = p + 6;
    /* add nsignals */
     ADD_ATTR(mainjob_p, "nsignals", "nsignals", 8);
    if (offs == save_offs)
        NL_KS_ERROR("no 'nsignals' attribute under mainjob/usage element");
    /* find mainjob/status tag */
    if ((p = strstr(mainjob_p, "<status")) == NULL)
        NL_KS_ERROR("no status element under mainjob element");
    status_p = p + 7;
    /* find mainjob/status/exitcode tag */
    if ((p = strstr(status_p, "<regular")) == NULL)
        NL_KS_ERROR("no regular element under mainjob/status elements");
    regular_p = p + 8;
    /* add exitcode */
    ADD_ATTR(mainjob_p, "exitcode", "status", 6);
    if (offs == save_offs)
        NL_KS_ERROR("no exitcode attribute under mainjob/status/regular "
                    "element");
    /* add statcall errors, if any */
    statcall_p = regular_p + 1;
    statcall_err = 0;
    p = strstr(statcall_p, "<statcall");
    while (p) {
        char *err;
        int tmp;
        char *next_p = strstr(p + 9, "<statcall");
        if (err = find_nvp_value(p + 9, "error", &tmp)) {
            if (*err != '0') {
                char *pp; /* statcall sub-elements */
                statcall_err = 1;
                ks_event[offs++] = '\n';
                memcpy(ks_event + offs, "ts=", 3);
                offs += 3;
                memcpy(ks_event + offs, ts_p, ts_len);
                offs += ts_len;
                ks_event[offs++] = ' ';
                /* add event name and level */
                memcpy(ks_event + offs, 
                       "event=pegasus.invocation.stat.error level=Info ", 47);
                offs += 47;
                /* add error code */
                memcpy(ks_event + offs, "status=", 7);
                offs += 7;
                memcpy(ks_event + offs, err, tmp);
                offs += tmp;
                ks_event[offs++] = ' ';                
                /* add file name or fifo name */                
                if ((pp = strstr(err, "<file")) &&
                    (!next_p || (pp < next_p))) {
                    ADD_ATTR(pp, "name", "file", 4);
                }
                else if ((pp = strstr(err, "<fifo")) &&  
                         (!next_p || (pp < next_p))) {
                    ADD_ATTR(pp, "name", "file", 4);
                }
                /* look for statinfo subelement */
                if ((pp = strstr(err, "<statinfo")) && 
                    (!next_p || (pp < next_p))) {
                    /* add statinfo attrs */
                    ADD_ATTR(pp, "user", "user", 4);
                    ADD_ATTR(pp, "group", "group", 5);
                }
                else {
                    /* add some default values for attrs */
                    memcpy(ks_event + offs, "user=unknown group=unknown ", 28);
                    offs += 28;
                }
            }
        }
        p = next_p;
    }
    if (statcall_err) {
        /* extra trailing space */
        ks_event[offs++] = ' ';
    }
    /* ~ cleanup ~ */
    /* change last space to '\n' */
    ks_event[offs-1] = '\n';
    ks_event[offs] = '\0';
    /* reset NUL-terminated tag */
    *root_end_p = '>';

    return 0;
 error:
    return -1;
}

/* Error-handling functions */

int err_occurred(void)
{
    return err_flag;
}

const char *err_message(void)
{
    return err_buf;
}

/* main(), for testing */

#if WITH_MAIN
int
main(int argc, char **argv)
{
    FILE *fp;
    char buf[65536];
    char s[1024];
    int result;
    int i, N=1000;
    struct timeval tv1, tv2;
    double usec;

    if (argc < 2) {
        fprintf(stderr, "usage: %s input-file\n", argv[0]);
        return 1;
    }
    /* slurp data file */
    fp = fopen(argv[1], "r");
    buf[0] = '\0';
    while (fgets(s, sizeof(s), fp)) {
        strcat(buf, s);
    }
    /* parse data */
    gettimeofday(&tv1,0);
    for (i=0; i < N; i++) {
        result = parse_one_event(buf);
    }
    gettimeofday(&tv2,0);
    usec = (tv2.tv_sec - tv1.tv_sec)*1e6 + (tv2.tv_usec - tv1.tv_usec);

    /* report result */
    fprintf(stderr, "usec/iter = %lf, iter/sec = %lf\n", usec/N, 1e6*N/usec);
    if (result < 0) {
        fprintf(stderr, "error\n");
        goto error;
    }
    fprintf(stderr,"success\n");
    return 0;

 error:    
    return -1;
}
#endif /* WITH_MAIN */
