/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.parser.lexparser;

import edu.stanford.nlp.ling.CategoryWordTag;
import edu.stanford.nlp.ling.CategoryWordTagFactory;
import edu.stanford.nlp.ling.HasTag;
import edu.stanford.nlp.ling.HasWord;
import edu.stanford.nlp.ling.Label;
import edu.stanford.nlp.ling.TaggedWord;
import edu.stanford.nlp.ling.Word;
import edu.stanford.nlp.parser.lexparser.TreebankLangParserParams;
import edu.stanford.nlp.stats.ClassicCounter;
import edu.stanford.nlp.trees.BobChrisTreeNormalizer;
import edu.stanford.nlp.trees.DiskTreebank;
import edu.stanford.nlp.trees.HeadFinder;
import edu.stanford.nlp.trees.LabeledScoredTreeFactory;
import edu.stanford.nlp.trees.ModCollinsHeadFinder;
import edu.stanford.nlp.trees.PennTreeReader;
import edu.stanford.nlp.trees.PennTreebankLanguagePack;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.TreeFactory;
import edu.stanford.nlp.trees.TreeReader;
import edu.stanford.nlp.trees.TreeReaderFactory;
import edu.stanford.nlp.trees.TreeTransformer;
import edu.stanford.nlp.trees.Treebank;
import edu.stanford.nlp.trees.TreebankLanguagePack;
import java.io.Reader;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;

public class TreeBinarizer
implements TreeTransformer {
    private static final boolean DEBUG = false;
    private HeadFinder hf;
    private TreeFactory tf;
    private TreebankLanguagePack tlp;
    private boolean insideFactor;
    private boolean markovFactor;
    private int markovOrder;
    private boolean useWrappingLabels;
    private double selectiveSplitThreshold;
    private boolean markFinalStates;
    private boolean unaryAtTop;
    private boolean doSelectiveSplit = false;
    private ClassicCounter<String> stateCounter = new ClassicCounter();

    public void setDoSelectiveSplit(boolean doSelectiveSplit) {
        this.doSelectiveSplit = doSelectiveSplit;
        if (!doSelectiveSplit) {
            this.stateCounter = new ClassicCounter();
        }
    }

    private static String join(List<Tree> treeList) {
        StringBuilder sb = new StringBuilder();
        Iterator<Tree> i = treeList.iterator();
        while (i.hasNext()) {
            Tree t = i.next();
            sb.append(t.label().value());
            if (!i.hasNext()) continue;
            sb.append(" ");
        }
        return sb.toString();
    }

    private static void localTreeString(Tree t, StringBuilder sb, int level) {
        sb.append("\n");
        for (int i = 0; i < level; ++i) {
            sb.append("  ");
        }
        sb.append("(").append(t.label());
        if (level == 0 || TreeBinarizer.isSynthetic(t.label().value())) {
            for (int c = 0; c < t.numChildren(); ++c) {
                TreeBinarizer.localTreeString(t.getChild(c), sb, level + 1);
            }
        }
        sb.append(")");
    }

    protected static boolean isSynthetic(String label) {
        return label.indexOf(64) > -1;
    }

    Tree binarizeLocalTree(Tree t, int headNum, TaggedWord head) {
        if (this.markovFactor) {
            String topCat = t.label().value();
            CategoryWordTag newLabel = new CategoryWordTag(topCat, head.word(), head.tag());
            t.setLabel(newLabel);
            Tree t2 = this.insideFactor ? this.markovInsideBinarizeLocalTreeNew(t, headNum, 0, t.numChildren() - 1, true) : this.markovOutsideBinarizeLocalTree(t, head, headNum, topCat, new LinkedList<Tree>(), false);
            return t2;
        }
        if (this.insideFactor) {
            return this.insideBinarizeLocalTree(t, headNum, head, 0, 0);
        }
        return this.outsideBinarizeLocalTree(t, t.label().value(), t.label().value(), headNum, head, 0, "", 0, "");
    }

    private Tree markovOutsideBinarizeLocalTree(Tree t, TaggedWord head, int headLoc, String topCat, LinkedList<Tree> ll, boolean doneLeft) {
        String word = head.word();
        String tag = head.tag();
        ArrayList<Tree> newChildren = new ArrayList<Tree>(2);
        if (headLoc == 0) {
            if (!doneLeft) {
                if (this.tlp.isStartSymbol(topCat)) {
                    return this.markovOutsideBinarizeLocalTree(t, head, headLoc, topCat, new LinkedList<Tree>(), true);
                }
                String headStr = t.getChild(headLoc).label().value();
                String subLabelStr = "@" + topCat + ": " + headStr + " ]";
                CategoryWordTag subLabel = new CategoryWordTag(subLabelStr, word, tag);
                Tree subTree = this.tf.newTreeNode(subLabel, t.getChildrenAsList());
                newChildren.add(this.markovOutsideBinarizeLocalTree(subTree, head, headLoc, topCat, new LinkedList<Tree>(), true));
                return this.tf.newTreeNode(t.label(), newChildren);
            }
            int len = t.numChildren();
            if (len == 1) {
                return this.tf.newTreeNode(t.label(), Collections.singletonList(t.getChild(0)));
            }
            ll.addFirst(t.getChild(len - 1));
            if (ll.size() > this.markovOrder) {
                ll.removeLast();
            }
            String headStr = t.getChild(headLoc).label().value();
            String rightStr = (len > this.markovOrder - 1 ? "... " : "") + TreeBinarizer.join(ll);
            String subLabelStr = "@" + topCat + ": " + headStr + " " + rightStr;
            CategoryWordTag subLabel = new CategoryWordTag(subLabelStr, word, tag);
            Tree subTree = this.tf.newTreeNode(subLabel, t.getChildrenAsList().subList(0, len - 1));
            newChildren.add(this.markovOutsideBinarizeLocalTree(subTree, head, headLoc, topCat, ll, true));
            newChildren.add(t.getChild(len - 1));
            return this.tf.newTreeNode(t.label(), newChildren);
        }
        if (headLoc > 0) {
            ll.addLast(t.getChild(0));
            if (ll.size() > this.markovOrder) {
                ll.removeFirst();
            }
            String headStr = t.getChild(headLoc).label().value();
            String leftStr = TreeBinarizer.join(ll) + (headLoc > this.markovOrder - 1 ? " ..." : "");
            String subLabelStr = "@" + topCat + ": " + leftStr + " " + headStr + " ]";
            CategoryWordTag subLabel = new CategoryWordTag(subLabelStr, word, tag);
            Tree subTree = this.tf.newTreeNode(subLabel, t.getChildrenAsList().subList(1, t.numChildren()));
            newChildren.add(t.getChild(0));
            newChildren.add(this.markovOutsideBinarizeLocalTree(subTree, head, headLoc - 1, topCat, ll, false));
            return this.tf.newTreeNode(t.label(), newChildren);
        }
        return t;
    }

    private Tree markovInsideBinarizeLocalTreeNew(Tree t, int headLoc, int left, int right, boolean starting) {
        Tree[] children = t.children();
        if (starting) {
            if (left == headLoc && right == headLoc) {
                return t;
            }
            if (this.unaryAtTop) {
                Tree result = this.tf.newTreeNode(t.label(), Collections.singletonList(this.markovInsideBinarizeLocalTreeNew(t, headLoc, left, right, false)));
                return result;
            }
        }
        List<Tree> newChildren = null;
        if (left == headLoc && right == headLoc) {
            newChildren = Collections.singletonList(children[headLoc]);
        } else if (left < headLoc) {
            newChildren = new ArrayList<Tree>(2);
            newChildren.add(children[left]);
            newChildren.add(this.markovInsideBinarizeLocalTreeNew(t, headLoc, left + 1, right, false));
        } else if (right > headLoc) {
            newChildren = new ArrayList<Tree>(2);
            newChildren.add(this.markovInsideBinarizeLocalTreeNew(t, headLoc, left, right - 1, false));
            newChildren.add(children[right]);
        } else {
            System.err.println("UHOH, bad parameters passed to markovInsideBinarizeLocalTree");
        }
        Label label = starting ? t.label() : this.makeSyntheticLabel(t, left, right, headLoc, this.markovOrder);
        if (this.doSelectiveSplit) {
            double stateCount = this.stateCounter.getCount(label.value());
            if (stateCount < this.selectiveSplitThreshold) {
                label = starting && !this.unaryAtTop ? t.label() : this.makeSyntheticLabel(t, left, right, headLoc, this.markovOrder - 1);
            }
        } else {
            this.stateCounter.incrementCount(label.value(), 1.0);
        }
        Tree result = this.tf.newTreeNode(label, newChildren);
        return result;
    }

    private Label makeSyntheticLabel(Tree t, int left, int right, int headLoc, int markovOrder) {
        Label result = this.useWrappingLabels ? this.makeSyntheticLabel2(t, left, right, headLoc, markovOrder) : TreeBinarizer.makeSyntheticLabel1(t, left, right, headLoc, markovOrder);
        return result;
    }

    private static Label makeSyntheticLabel1(Tree t, int left, int right, int headLoc, int markovOrder) {
        String topCat = t.label().value();
        Tree[] children = t.children();
        String leftString = left == 0 ? "[ " : " ";
        String rightString = right == children.length - 1 ? " ]" : " ";
        for (int i = 0; i < markovOrder; ++i) {
            if (left < headLoc) {
                leftString = leftString + children[left].label().value() + " ";
                ++left;
                continue;
            }
            if (right <= headLoc) break;
            rightString = " " + children[right].label().value() + rightString;
            --right;
        }
        if (right > headLoc) {
            rightString = "..." + rightString;
        }
        if (left < headLoc) {
            leftString = leftString + "...";
        }
        String labelStr = "@" + topCat + "| " + leftString + "[" + t.getChild(headLoc).label().value() + "]" + rightString;
        String word = ((HasWord)((Object)t.label())).word();
        String tag = ((HasTag)((Object)t.label())).tag();
        return new CategoryWordTag(labelStr, word, tag);
    }

    private Label makeSyntheticLabel2(Tree t, int left, int right, int headLoc, int markovOrder) {
        String finalPiece;
        String topCat = t.label().value();
        Tree[] children = t.children();
        int i = 0;
        if (this.markFinalStates) {
            if (headLoc != 0 && left == 0) {
                finalPiece = " " + children[left].label().value() + "[";
                ++left;
                ++i;
            } else if (headLoc == 0 && right > headLoc && right == children.length - 1) {
                finalPiece = " " + children[right].label().value() + "]";
                --right;
                ++i;
            } else {
                finalPiece = "";
            }
        } else {
            finalPiece = "";
        }
        String middlePiece = "";
        while (i < markovOrder) {
            if (left < headLoc) {
                middlePiece = " " + children[left].label().value() + "<" + middlePiece;
                ++left;
            } else {
                if (right <= headLoc) break;
                middlePiece = " " + children[right].label().value() + ">" + middlePiece;
                --right;
            }
            ++i;
        }
        if (right > headLoc || left < headLoc) {
            middlePiece = " ..." + middlePiece;
        }
        String headStr = t.getChild(headLoc).label().value();
        int leng = 4 + topCat.length() + headStr.length() + middlePiece.length() + finalPiece.length();
        StringBuilder sb = new StringBuilder(leng);
        sb.append("@").append(topCat).append("| ").append(headStr).append("_").append(middlePiece).append(finalPiece);
        String labelStr = sb.toString();
        String word = ((HasWord)((Object)t.label())).word();
        String tag = ((HasTag)((Object)t.label())).tag();
        return new CategoryWordTag(labelStr, word, tag);
    }

    private Tree insideBinarizeLocalTree(Tree t, int headNum, TaggedWord head, int leftProcessed, int rightProcessed) {
        String word = head.word();
        String tag = head.tag();
        ArrayList<Tree> newChildren = new ArrayList<Tree>(2);
        if (t.numChildren() <= leftProcessed + rightProcessed + 2) {
            Tree leftChild = t.getChild(leftProcessed);
            newChildren.add(leftChild);
            if (t.numChildren() == leftProcessed + rightProcessed + 1) {
                String finalCat = t.label().value();
                return this.tf.newTreeNode(new CategoryWordTag(finalCat, word, tag), newChildren);
            }
            Tree rightChild = t.getChild(leftProcessed + 1);
            newChildren.add(rightChild);
            String labelStr = t.label().value();
            if (leftProcessed != 0 || rightProcessed != 0) {
                labelStr = "@ " + leftChild.label().value() + " " + rightChild.label().value();
            }
            return this.tf.newTreeNode(new CategoryWordTag(labelStr, word, tag), newChildren);
        }
        if (headNum > leftProcessed) {
            Tree leftChild = t.getChild(leftProcessed);
            Tree rightChild = this.insideBinarizeLocalTree(t, headNum, head, leftProcessed + 1, rightProcessed);
            newChildren.add(leftChild);
            newChildren.add(rightChild);
            String labelStr = "@ " + leftChild.label().value() + " " + rightChild.label().value().substring(2);
            if (leftProcessed == 0 && rightProcessed == 0) {
                labelStr = t.label().value();
            }
            return this.tf.newTreeNode(new CategoryWordTag(labelStr, word, tag), newChildren);
        }
        Tree leftChild = this.insideBinarizeLocalTree(t, headNum, head, leftProcessed, rightProcessed + 1);
        Tree rightChild = t.getChild(t.numChildren() - rightProcessed - 1);
        newChildren.add(leftChild);
        newChildren.add(rightChild);
        String labelStr = "@ " + leftChild.label().value().substring(2) + " " + rightChild.label().value();
        if (leftProcessed == 0 && rightProcessed == 0) {
            labelStr = t.label().value();
        }
        return this.tf.newTreeNode(new CategoryWordTag(labelStr, word, tag), newChildren);
    }

    private Tree outsideBinarizeLocalTree(Tree t, String labelStr, String finalCat, int headNum, TaggedWord head, int leftProcessed, String leftStr, int rightProcessed, String rightStr) {
        ArrayList<Tree> newChildren = new ArrayList<Tree>(2);
        CategoryWordTag label = new CategoryWordTag(labelStr, head.word(), head.tag());
        if (t.numChildren() - leftProcessed - rightProcessed <= 2) {
            newChildren.add(t.getChild(leftProcessed));
            if (t.numChildren() - leftProcessed - rightProcessed == 2) {
                newChildren.add(t.getChild(leftProcessed + 1));
            }
            return this.tf.newTreeNode(label, newChildren);
        }
        if (headNum > leftProcessed) {
            Tree leftChild = t.getChild(leftProcessed);
            String childLeftStr = leftStr + " " + leftChild.label().value();
            String childLabelStr = "@" + finalCat + " :" + childLeftStr + " ..." + rightStr;
            Tree rightChild = this.outsideBinarizeLocalTree(t, childLabelStr, finalCat, headNum, head, leftProcessed + 1, childLeftStr, rightProcessed, rightStr);
            newChildren.add(leftChild);
            newChildren.add(rightChild);
            return this.tf.newTreeNode(label, newChildren);
        }
        Tree rightChild = t.getChild(t.numChildren() - rightProcessed - 1);
        String childRightStr = " " + rightChild.label().value() + rightStr;
        String childLabelStr = "@" + finalCat + " :" + leftStr + " ..." + childRightStr;
        Tree leftChild = this.outsideBinarizeLocalTree(t, childLabelStr, finalCat, headNum, head, leftProcessed, leftStr, rightProcessed + 1, childRightStr);
        newChildren.add(leftChild);
        newChildren.add(rightChild);
        return this.tf.newTreeNode(label, newChildren);
    }

    @Override
    public Tree transformTree(Tree t) {
        Tree result;
        if (t == null) {
            return null;
        }
        String cat = t.label().value();
        if (t.isLeaf()) {
            Word label = new Word(cat);
            return this.tf.newLeaf(label);
        }
        if (t.isPreTerminal()) {
            Tree childResult = this.transformTree(t.getChild(0));
            String word = childResult.value();
            ArrayList<Tree> newChildren = new ArrayList<Tree>(1);
            newChildren.add(childResult);
            return this.tf.newTreeNode(new CategoryWordTag(cat, word, cat), newChildren);
        }
        Tree headChild = this.hf.determineHead(t);
        if (headChild == null && !t.label().value().startsWith(this.tlp.startSymbol())) {
            System.err.println("### No head found for:");
            t.pennPrint();
        }
        int headNum = -1;
        Tree[] kids = t.children();
        ArrayList<Tree> newChildren = new ArrayList<Tree>(kids.length);
        for (int childNum = 0; childNum < kids.length; ++childNum) {
            Tree child = kids[childNum];
            Tree childResult = this.transformTree(child);
            if (child == headChild) {
                headNum = childNum;
            }
            newChildren.add(childResult);
        }
        if (t.label().value().startsWith(this.tlp.startSymbol())) {
            CategoryWordTag label = (CategoryWordTag)t.label();
            result = this.tf.newTreeNode(label, newChildren);
        } else {
            String word = ((HasWord)((Object)headChild.label())).word();
            String tag = ((HasTag)((Object)headChild.label())).tag();
            CategoryWordTag label = new CategoryWordTag(cat, word, tag);
            result = this.tf.newTreeNode(label, newChildren);
            TaggedWord head = new TaggedWord(word, tag);
            result = this.binarizeLocalTree(result, headNum, head);
        }
        return result;
    }

    public TreeBinarizer(HeadFinder hf, TreebankLanguagePack tlp, boolean insideFactor, boolean markovFactor, int markovOrder, boolean useWrappingLabels, boolean unaryAtTop, double selectiveSplitThreshold, boolean markFinalStates) {
        this.hf = hf;
        this.tlp = tlp;
        this.tf = new LabeledScoredTreeFactory(new CategoryWordTagFactory());
        this.insideFactor = insideFactor;
        this.markovFactor = markovFactor;
        this.markovOrder = markovOrder;
        this.useWrappingLabels = useWrappingLabels;
        this.unaryAtTop = unaryAtTop;
        this.selectiveSplitThreshold = selectiveSplitThreshold;
        this.markFinalStates = markFinalStates;
    }

    public static void main(String[] args) {
        Treebank treebank;
        int i;
        TreebankLangParserParams tlpp = null;
        TreeReaderFactory trf = new TreeReaderFactory(){

            @Override
            public TreeReader newTreeReader(Reader in) {
                return new PennTreeReader(in, new LabeledScoredTreeFactory(new CategoryWordTagFactory()), new BobChrisTreeNormalizer());
            }
        };
        String fileExt = "mrg";
        HeadFinder hf = new ModCollinsHeadFinder();
        TreebankLanguagePack tlp = new PennTreebankLanguagePack();
        boolean insideFactor = false;
        boolean mf = false;
        int mo = 1;
        boolean uwl = false;
        boolean uat = false;
        double sst = 20.0;
        boolean mfs = false;
        for (i = 0; i < args.length && args[i].startsWith("-"); ++i) {
            if (args[i].equalsIgnoreCase("-tlp") && i + 1 < args.length) {
                try {
                    tlp = (TreebankLanguagePack)Class.forName(args[i + 1]).newInstance();
                }
                catch (Exception e) {
                    System.err.println("Couldn't instantiate: " + args[i + 1]);
                    throw new RuntimeException(e);
                }
                ++i;
                continue;
            }
            if (args[i].equalsIgnoreCase("-tlpp") && i + 1 < args.length) {
                try {
                    tlpp = (TreebankLangParserParams)Class.forName(args[i + 1]).newInstance();
                }
                catch (Exception e) {
                    System.err.println("Couldn't instantiate: " + args[i + 1]);
                    throw new RuntimeException(e);
                }
                ++i;
                continue;
            }
            if (args[i].equalsIgnoreCase("-insideFactor")) {
                insideFactor = true;
                continue;
            }
            if (args[i].equalsIgnoreCase("-markovOrder") && i + 1 < args.length) {
                mo = Integer.parseInt(args[++i]);
                continue;
            }
            System.err.println("Unknown option:" + args[i]);
        }
        if (i >= args.length) {
            System.err.println("usage: java TreeBinarizer [-tlpp class|-markovOrder int|...] treebankPath");
            System.exit(0);
        }
        if (tlpp != null) {
            treebank = tlpp.memoryTreebank();
            tlp = tlpp.treebankLanguagePack();
            fileExt = tlp.treebankFileExtension();
            hf = tlpp.headFinder();
        } else {
            treebank = new DiskTreebank(trf);
        }
        treebank.loadPath(args[i], fileExt, true);
        TreeBinarizer tt = new TreeBinarizer(hf, tlp, insideFactor, mf, mo, uwl, uat, sst, mfs);
        for (Tree t : treebank) {
            Tree newT = tt.transformTree(t);
            System.out.println("Original tree:");
            t.pennPrint();
            System.out.println("Binarized tree:");
            newT.pennPrint();
            System.out.println();
        }
    }
}

