/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.pipeline;

import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.Annotator;
import edu.stanford.nlp.pipeline.DeprecatedAnnotations;
import edu.stanford.nlp.util.XMLUtils;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Stack;
import java.util.regex.Pattern;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public class CleanXmlAnnotator
implements Annotator {
    private final Pattern xmlTagMatcher;
    public static final String DEFAULT_XML_TAGS = ".*";
    private final Pattern sentenceEndingTagMatcher;
    public static final String DEFAULT_SENTENCE_ENDERS = "";
    private final boolean allowFlawedXml;
    public static final boolean DEFAULT_ALLOW_FLAWS = true;

    public CleanXmlAnnotator() {
        this(DEFAULT_XML_TAGS, DEFAULT_SENTENCE_ENDERS, true);
    }

    public CleanXmlAnnotator(String xmlTagsToRemove, String sentenceEndingTags, boolean allowFlawedXml) {
        this.allowFlawedXml = allowFlawedXml;
        if (xmlTagsToRemove != null) {
            this.xmlTagMatcher = Pattern.compile(xmlTagsToRemove);
            this.sentenceEndingTagMatcher = sentenceEndingTags != null && sentenceEndingTags.length() > 0 ? Pattern.compile(sentenceEndingTags) : null;
        } else {
            this.xmlTagMatcher = null;
            this.sentenceEndingTagMatcher = null;
        }
    }

    @Override
    public void annotate(Annotation annotation) {
        if (annotation.has(DeprecatedAnnotations.WordsPLAnnotation.class)) {
            List wordsList = (List)annotation.get(DeprecatedAnnotations.WordsPLAnnotation.class);
            ArrayList<CoreLabel> allTokens = new ArrayList<CoreLabel>();
            for (List words : wordsList) {
                allTokens.addAll(words);
                ((CoreLabel)words.get(words.size() - 1)).set(CoreAnnotations.ForcedSentenceEndAnnotation.class, true);
            }
            List<CoreLabel> newTokens = this.process(allTokens);
            ArrayList<List<CoreLabel>> newWordsList = new ArrayList<List<CoreLabel>>();
            newWordsList.add(newTokens);
            annotation.set(DeprecatedAnnotations.WordsPLAnnotation.class, newWordsList);
        } else if (annotation.has(CoreAnnotations.TokensAnnotation.class)) {
            List tokens = (List)annotation.get(CoreAnnotations.TokensAnnotation.class);
            List<CoreLabel> newTokens = this.process(tokens);
            annotation.set(CoreAnnotations.TokensAnnotation.class, newTokens);
        }
    }

    public List<CoreLabel> process(List<CoreLabel> tokens) {
        CoreLabel lastToken;
        Stack<String> enclosingTags = new Stack<String>();
        List currentTagSet = null;
        int matchDepth = 0;
        ArrayList<CoreLabel> newTokens = new ArrayList<CoreLabel>();
        StringBuilder removedText = new StringBuilder();
        for (CoreLabel token : tokens) {
            String word = token.word().trim();
            XMLUtils.XMLTag tag = XMLUtils.parseTag(word);
            if (tag == null) {
                token.setWord(XMLUtils.unescapeStringForXML(token.word()));
                if (matchDepth > 0 || this.xmlTagMatcher == null || this.xmlTagMatcher.matcher(DEFAULT_SENTENCE_ENDERS).matches()) {
                    newTokens.add(token);
                }
                if (removedText.length() > 0) {
                    boolean added = false;
                    String before = (String)token.get(CoreAnnotations.BeforeAnnotation.class);
                    if (before != null) {
                        token.set(CoreAnnotations.BeforeAnnotation.class, removedText + before);
                        added = true;
                    }
                    if (added && newTokens.size() > 1) {
                        CoreLabel previous = (CoreLabel)newTokens.get(newTokens.size() - 2);
                        String after = (String)previous.get(CoreAnnotations.AfterAnnotation.class);
                        if (after != null) {
                            previous.set(CoreAnnotations.AfterAnnotation.class, after + removedText);
                        } else {
                            previous.set(CoreAnnotations.AfterAnnotation.class, removedText.toString());
                        }
                    }
                    removedText = new StringBuilder();
                }
                if (currentTagSet == null) {
                    currentTagSet = Collections.unmodifiableList(new ArrayList(enclosingTags));
                }
                token.set(CoreAnnotations.XmlContextAnnotation.class, currentTagSet);
                continue;
            }
            String currentRemoval = (String)token.get(CoreAnnotations.BeforeAnnotation.class);
            if (currentRemoval != null) {
                removedText.append(currentRemoval);
            }
            if ((currentRemoval = (String)token.get(CoreAnnotations.CurrentAnnotation.class)) != null) {
                removedText.append(currentRemoval);
            }
            if (token == tokens.get(tokens.size() - 1) && (currentRemoval = (String)token.get(CoreAnnotations.AfterAnnotation.class)) != null) {
                removedText.append(currentRemoval);
            }
            if (this.sentenceEndingTagMatcher != null && this.sentenceEndingTagMatcher.matcher(tag.name).matches() && newTokens.size() > 0) {
                CoreLabel previous = (CoreLabel)newTokens.get(newTokens.size() - 1);
                previous.set(CoreAnnotations.ForcedSentenceEndAnnotation.class, true);
            }
            if (this.xmlTagMatcher == null || tag.isSingleTag) continue;
            currentTagSet = null;
            if (tag.isEndTag) {
                block20: {
                    String lastTag;
                    do {
                        if (enclosingTags.size() == 0) {
                            throw new IllegalArgumentException("Got a close tag " + tag.name + "which does not match " + "any open tag");
                        }
                        lastTag = (String)enclosingTags.pop();
                        if (this.xmlTagMatcher.matcher(lastTag).matches()) {
                            --matchDepth;
                        }
                        if (lastTag.equals(tag.name)) break block20;
                    } while (this.allowFlawedXml);
                    throw new IllegalArgumentException("Mismatched tags... " + tag.name + " closed a " + lastTag + " tag.");
                }
                if (matchDepth < 0) {
                    throw new AssertionError((Object)"Programming error?  We think there have been more close tags than open tags");
                }
                continue;
            }
            enclosingTags.push(tag.name);
            if (!this.xmlTagMatcher.matcher(tag.name).matches()) continue;
            ++matchDepth;
        }
        if (enclosingTags.size() > 0 && !this.allowFlawedXml) {
            throw new IllegalArgumentException("Unclosed tags, starting with " + (String)enclosingTags.pop());
        }
        if (newTokens.size() > 0 && removedText.length() > 0 && (lastToken = (CoreLabel)newTokens.get(newTokens.size() - 1)).get(CoreAnnotations.CurrentAnnotation.class) != null) {
            lastToken.set(CoreAnnotations.AfterAnnotation.class, removedText.toString());
        }
        return newTokens;
    }
}

