'''
Routines to construction the finite state machine for the LEMON parser
generator.
'''

from ccruft import iterlinks

from action import *
from configlist import *
from error import *
from plink import *
from set import *
from struct import *
from table import *



def FindRulePrecedences(xp):
    '''Find a precedence symbol of every rule in the grammar.'''

    # Those rules which have a precedence symbol coded in the input
    # grammar using the "[symbol]" construct will already have the
    # rp->precsym field filled.  Other rules take as their precedence
    # symbol the first RHS symbol with a defined precedence.  If there
    # are not RHS symbols with a defined precedence, the precedence
    # symbol field is left blank.

    for rp in iterlinks(xp.rule):
        if rp.precsym is None:
            for i in range(rp.nrhs):
                if rp.precsym is not None:
                    break
                sp = rp.rhs[i]
                if sp.type == MULTITERMINAL:
                    for j in range(sp.nsubsym):
                        if sp.subsym[j].prec >= 0:
                            rp.precsym = sp.subsym[j]
                            break

                elif sp.prec >= 0:
                    rp.precsym = rp.rhs[i]

    return


def FindFirstSets(lemp):
    '''Find all nonterminals which will generate the empty string.
    Then go back and compute the first sets of every nonterminal.  The
    first set is the set of all terminal symbols which can begin a
    string generated by that nonterminal.
    '''

    for i in range(lemp.nsymbol):
        lemp.symbols[i]._lambda = False

    for i in range(lemp.nterminal, lemp.nsymbol):
        lemp.symbols[i].firstset = SetNew()


    # First compute all lambdas
    progress = 1
    while progress:
        progress = 0
        for rp in iterlinks(lemp.rule):
            if rp.lhs._lambda:
                continue
            for i in range(rp.nrhs):
                sp = rp.rhs[i]
                if sp.type != TERMINAL or not sp._lambda:
                    break
            else:
                rp.lhs._lambda = True
                progress = 1

    # Now compute all first sets
    progress = 1
    while progress:
        progress = 0
        for rp in iterlinks(lemp.rule):
            s1 = rp.lhs
            for i in range(rp.nrhs):
                s2 = rp.rhs[i]
                if s2.type == TERMINAL:
                    progress += SetAdd(s1.firstset, s2.index)
                    break
                elif s2.type == MULTITERMINAL:
                    for j in range(s2.nsubsym):
                        progress += SetAdd(s1.firstset, s2.subsym[j].index)
                    break
                elif s1 == s2:
                    if not s1._lambda:
                        break
                else:
                    progress += SetUnion(s1.firstset, s2.firstset)
                    if not s2._lambda:
                        break
    return


def FindStates(lemp):
    '''Compute all LR(0) states for the grammar.  Links are added to
    between some states so that the LR(1) follow sets can be computed
    later.
    '''

    Configlist_init()

    # Find the start symbol
    if lemp.start:
        sp = Symbol_find(lemp.start)
        if sp is None:
            ErrorMsg(lemp.filename, 0,
                     'The specified start symbol "%s" '
                     'is not in a nonterminal of the grammar.  '
                     '"%s" will be used as the start symbol instead.',
                     lemp.start, lemp.rule.lhs.name)
            lemp.errorcnt += 1
            sp = lemp.rule.lhs
    else:
        sp = lemp.rule.lhs

    # Make sure the start symbol doesn't occur on the right-hand side
    # of any rule.  Report an error if it does.  (YACC would generate
    # a new start symbol in this case.)
    for rp in iterlinks(lemp.rule):
        for i in range(rp.nrhs):
            if rp.rhs[i] == sp: # FIX ME:  Deal with multiterminals
                ErrorMsg(lemp.filename, 0,
                         'The start symbol "%s" '
                         'occurs on the right-hand side of a rule. '
                         'This will result in a parser '
                         'which does not work properly.',
                         sp.name)
                lemp.errorcnt += 1

    # The basis configuration set for the first state is all rules
    # which have the start symbol as their left-hand side.
    for rp in iterlinks(sp.rule, 'nextlhs'):
        rp.lhsStart = 1
        newcfp = Configlist_addbasis(rp, 0)
        SetAdd(newcfp.fws, 0)

    # Compute the first state.  All other states will be computed
    # automatically during the computation of the first one.  The
    # returned pointer to the first state is not used.
    getstate(lemp)
    return


def getstate(lemp):
    '''Return a state which is described by the configuration list
    which has been built from calls to Configlist_add.
    '''
    
    # Extract the sorted basis of the new state.  The basis was
    # constructed by prior calls to "Configlist_addbasis()".
    Configlist_sortbasis()
    bp = Configlist_basis()

    # Get a state with the same basis
    stp = State_find(bp)
    if stp:
        # A state with the same basis already exists!  Copy all the
        # follow-set propagation links from the state under
        # construction into the preexisting state, then return a
        # pointer to the preexisting state.
        x = bp
        y = stp.bp
        while x and y:
            y.bplp = Plink_copy(y.bplp, x.bplp)
            x.fplp = x.bplp = None
            x = x.bp
            y = y.bp

        Configlist_return()

    else:
        # This really is a new state.  Construct all the details.
        Configlist_closure(lemp)     # Compute the configuration closure
        Configlist_sort()            # Sort the configuration closure
        cfp = Configlist_return()    # Get a pointer to the config list
        stp = State_new()            # A new state structure
        stp.bp = bp                  # Remember the configuration basis
        stp.cfp = cfp                # Remember the configuration closure
        stp.statenum = lemp.nstate   # Every state gets a sequence number
        lemp.nstate += 1
        stp.ap = None                # No actions, yet.
        State_insert(stp, stp.bp)    # Add to the state table
        buildshifts(lemp, stp)       # Recursively compute successor states

    return stp


def same_symbol(a, b):
    '''Return true if two symbols are the same.'''
    if a == b:
        return True
    if a.type != MULTITERMINAL:
        return False
    if b.type != MULTITERMINAL:
        return False
    if a.nsubsym != b.nsubsym:
        return False
    for i in range(a.nsubsym):
        if a.subsym[i] != b.subsym[i]:
            return False
    return True


def buildshifts(lemp, stp):
    '''Construct all successor states to the given state.  A
    "successor" state is any state which can be reached by a shift
    action.
    '''

    # stp:     The state from which successors are computed
    # cfp:     For looping thru the config closure of "stp"
    # bcfp:    For the inner loop on config closure of "stp"
    # sp:      Symbol following the dot in configuration "cfp"
    # bsp:     Symbol following the dot in configuration "bcfp"
    # newstp:  A pointer to a successor state

    # Each configuration becomes complete after it contibutes to a
    # successor state.  Initially, all configurations are incomplete.
    for cfp in iterlinks(stp.cfp):
        cfp.status = INCOMPLETE

    # Loop through all configurations of the state "stp"
    for cfp in iterlinks(stp.cfp):
        if cfp.status == COMPLETE:
            continue # Already used by inner loop
        if cfp.dot >= cfp.rp.nrhs:
            continue # Can't shift this config
        Configlist_reset() # Reset the new config set
        sp = cfp.rp.rhs[cfp.dot] # Symbol after the dot

        # For every configuration in the state "stp" which has the
        # symbol "sp" following its dot, add the same configuration to
        # the basis set under construction but with the dot shifted
        # one symbol to the right.
        for bcfp in iterlinks(cfp):
            if bcfp.status == COMPLETE:
                continue # Already used
            if bcfp.dot >= bcfp.rp.nrhs:
                continue # Can't shift this one
            bsp = bcfp.rp.rhs[bcfp.dot] # Get symbol after dot
            if not same_symbol(bsp, sp):
                continue # Must be same as for "cfp"
            bcfp.status = COMPLETE # Mark this config as used
            new = Configlist_addbasis(bcfp.rp, bcfp.dot + 1)
            new.bplp = Plink_add(new.bplp, bcfp)

        # Get a pointer to the state described by the basis
        # configuration set constructed in the preceding loop
        newstp = getstate(lemp)

        # The state "newstp" is reached from the state "stp" by a
        # shift action on the symbol "sp"
        if sp.type == MULTITERMINAL:
            for i in range(sp.nsubsym):
                stp.ap = Action_add(stp.ap, SHIFT, sp.subsym[i], newstp)
        else:
            stp.ap = Action_add(stp.ap, SHIFT, sp, newstp)

    return


def FindLinks(lemp):
    '''Construct the propagation links.'''

    # Housekeeping detail: Add to every propagate link a pointer back
    # to the state to which the link is attached.
    for i in range(lemp.nstate):
        stp = lemp.sorted[i]
        for cfp in iterlinks(stp.cfp):
            cfp.stp = stp

    # Convert all backlinks into forward links.  Only the forward
    # links are used in the follow-set computation.
    for i in range(lemp.nstate):
        stp = lemp.sorted[i]
        for cfp in iterlinks(stp.cfp):
            for plp in iterlinks(cfp.bplp):
                other = plp.cfp
                other.fplp = Plink_add(other.fplp, cfp)

    return


def FindFollowSets(lemp):
    '''Compute all followsets.
    
    A followset is the set of all symbols which can come immediately
    after a configuration.
    '''

    for i in range(lemp.nstate):
        for cfp in iterlinks(lemp.sorted[i].cfp):
            cfp.status = INCOMPLETE

    progress = 1
    while progress:
        progress = 0
        for i in range(lemp.nstate):
            for cfp in iterlinks(lemp.sorted[i].cfp):
                if cfp.status == COMPLETE:
                    continue
                for plp in iterlinks(cfp.fplp):
                    change = SetUnion(plp.cfp.fws, cfp.fws)
                    if change:
                        plp.cfp.status = INCOMPLETE
                        progress = 1
                cfp.status = COMPLETE

    return


def FindActions(lemp):
    '''Compute the reduce actions, and resolve conflicts.'''

    # Add all of the reduce actions 
    # A reduce action is added for each element of the followset of
    # a configuration which has its dot at the extreme right.
    for i in range(lemp.nstate): # Loop over all states
        stp = lemp.sorted[i]
        for cfp in iterlinks(stp.cfp): # Loop over all configurations
            if cfp.rp.nrhs == cfp.dot: # Is dot at extreme right?
                for j in range(lemp.nterminal):
                    if SetFind(cfp.fws, j):
                        # Add a reduce action to the state "stp" which
                        # will reduce by the rule "cfp->rp" if the
                        # lookahead symbol is "lemp->symbols[j]"
                        stp.ap = Action_add(stp.ap, REDUCE, lemp.symbols[j], cfp.rp)

    # Add the accepting token
    if lemp.start:
        sp = Symbol_find(lemp.start)
        if sp is None:
            sp = lemp.rule.lhs
    else:
        sp = lemp.rule.lhs

    # Add to the first state (which is always the starting state of
    # the finite state machine) an action to ACCEPT if the lookahead
    # is the start nonterminal.
    lemp.sorted[0].ap = Action_add(lemp.sorted[0].ap, ACCEPT, sp, None)

    # Resolve conflicts
    for i in range(lemp.nstate):
        stp = lemp.sorted[i]
        stp.ap = Action_sort(stp.ap)
        ap = stp.ap
        while ap and ap.next:
            nap = ap.next
            while nap and nap.sp == ap.sp:
                # The two actions "ap" and "nap" have the same
                # lookahead.  Figure out which one should be used.
                lemp.nconflict += resolve_conflict(ap, nap)
                nap = nap.next
            ap = ap.next

    # Report an error for each rule that can never be reduced.
    for rp in iterlinks(lemp.rule):
        rp.canReduce = False
    for i in range(lemp.nstate):
        for ap in iterlinks(lemp.sorted[i].ap):
            if ap.type == REDUCE:
                ap.x.rp.canReduce = True
    for rp in iterlinks(lemp.rule):
        if rp.canReduce:
            continue
        ErrorMsg(lemp.filename, rp.ruleline, "This rule can not be reduced.\n")
        lemp.errorcnt += 1

    return


def resolve_conflict(apx, apy):
    """Resolve a conflict between the two given actions.  If the
    conflict can't be resolved, return non-zero.
    """

    # NO LONGER TRUE:
    #   To resolve a conflict, first look to see if either action
    #   is on an error rule.  In that case, take the action which
    #   is not associated with the error rule.  If neither or both
    #   actions are associated with an error rule, then try to
    #   use precedence to resolve the conflict.
    #
    # If either action is a SHIFT, then it must be apx.  This function
    # won't work if apx->type==REDUCE and apy->type==SHIFT.

    errcnt = 0
    assert apx.sp == apy.sp # Otherwise there would be no conflict
    if apx.type == SHIFT and apy.type == SHIFT:
        apy.type = SSCONFLICT
        errcnt += 1

    if apx.type == SHIFT and apy.type == REDUCE:
        spx = apx.sp
        spy = apy.x.rp.precsym
        if spy is None or spx.prec < 0 or spy.prec < 0:
            # Not enough precedence information.
            apy.type = SRCONFLICT
            errcnt += 1
        elif spx.prec > spy.prec: # Lower precedence wins
            apy.type = RD_RESOLVED
        elif spx.prec < spy.prec:
            apx.type = SH_RESOLVED
        elif spx.prec == spy.prec and spx.assoc == RIGHT:  # Use operator
            apy.type = RD_RESOLVED                         # associativity
        elif spx.prec == spy.prec and spx.assoc == LEFT:   # to break tie
            apx.type = SH_RESOLVED
        else:
            assert spx.prec == spy.prec and spx.assoc == NONE
            apy.type = SRCONFLICT
            errcnt += 1

    elif apx.type == REDUCE and apy.type == REDUCE:
        spx = apx.x.rp.precsym
        spy = apy.x.rp.precsym
        if (spx is None or spy is None or spx.prec < 0 or
            spy.prec < 0 or spx.prec == spy.prec):
            apy.type = RRCONFLICT
            errcnt += 1
        elif spx.prec > spy.prec:
            apy.type = RD_RESOLVED
        elif spx.prec < spy.prec:
            apx.type = RD_RESOLVED

    else:
        okTypes = (SH_RESOLVED, RD_RESOLVED, SSCONFLICT, SRCONFLICT, RRCONFLICT)
        assert (apx.type in okTypes) or (apy.type in okTypes)
        # The REDUCE/SHIFT case cannot happen because SHIFTs come
        # before REDUCEs on the list.  If we reach this point it must
        # be because the parser conflict had already been resolved.

    return errcnt

