/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.dcoref;

import edu.stanford.nlp.dcoref.CoNLL2011DocumentReader;
import edu.stanford.nlp.dcoref.Dictionaries;
import edu.stanford.nlp.dcoref.Document;
import edu.stanford.nlp.dcoref.Mention;
import edu.stanford.nlp.dcoref.MentionExtractor;
import edu.stanford.nlp.dcoref.SieveCoreferenceSystem;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.parser.lexparser.LexicalizedParser;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.semgraph.SemanticGraph;
import edu.stanford.nlp.trees.semgraph.SemanticGraphCoreAnnotations;
import edu.stanford.nlp.trees.semgraph.SemanticGraphFactory;
import edu.stanford.nlp.util.CollectionValuedMap;
import edu.stanford.nlp.util.CoreMap;
import edu.stanford.nlp.util.Pair;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Properties;
import java.util.logging.Level;
import java.util.logging.Logger;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public class CoNLLMentionExtractor
extends MentionExtractor {
    private final CoNLL2011DocumentReader reader;
    private final String corpusPath;
    private final boolean replicateCoNLL;
    private static final Logger logger = SieveCoreferenceSystem.logger;
    private final boolean collapse = true;
    private final boolean ccProcess = false;
    private final boolean includeExtras = false;
    private final boolean lemmatize = true;
    private final boolean threadSafe = true;

    public CoNLLMentionExtractor(LexicalizedParser p, Dictionaries dict, Properties props, SieveCoreferenceSystem.Semantics semantics) throws Exception {
        super(dict, semantics);
        this.corpusPath = props.getProperty("dcoref.conll2011");
        this.replicateCoNLL = Boolean.parseBoolean(props.getProperty("dcoref.replicate.conll", "false"));
        CoNLL2011DocumentReader.Options options = new CoNLL2011DocumentReader.Options();
        options.annotateTokenCoref = false;
        options.annotateTokenSpeaker = true;
        options.annotateTokenNer = this.replicateCoNLL;
        options.annotateTokenPos = this.replicateCoNLL;
        options.setFilter(".*_auto_conll$");
        this.reader = new CoNLL2011DocumentReader(this.corpusPath, options);
        this.stanfordProcessor = this.loadStanfordProcessor(props);
    }

    @Override
    public Document nextDoc() throws ClassNotFoundException {
        ArrayList<List<CoreLabel>> allWords = new ArrayList<List<CoreLabel>>();
        ArrayList<Tree> allTrees = new ArrayList<Tree>();
        CoNLL2011DocumentReader.Document conllDoc = this.reader.getNextDocument();
        if (conllDoc == null) {
            return null;
        }
        Annotation anno = conllDoc.getAnnotation();
        List sentences = (List)anno.get(CoreAnnotations.SentencesAnnotation.class);
        for (CoreMap sentence : sentences) {
            if (!this.replicateCoNLL) {
                sentence.remove(CoreAnnotations.TreeAnnotation.class);
                continue;
            }
            Tree tree = (Tree)sentence.get(CoreAnnotations.TreeAnnotation.class);
            try {
                SemanticGraph deps = SemanticGraphFactory.makeFromTree(tree, true, false, false, true, true);
                sentence.set(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class, deps);
            }
            catch (Exception e) {
                logger.log(Level.WARNING, "Exception caught during extraction of Stanford dependencies. Will ignore and continue...", e);
            }
        }
        String preSpeaker = null;
        String curSpeaker = null;
        int utterance = -1;
        for (CoreLabel token : (List)anno.get(CoreAnnotations.TokensAnnotation.class)) {
            if (!token.containsKey(CoreAnnotations.SpeakerAnnotation.class)) {
                token.set(CoreAnnotations.SpeakerAnnotation.class, "");
            }
            if (!(curSpeaker = (String)token.get(CoreAnnotations.SpeakerAnnotation.class)).equals(preSpeaker)) {
                ++utterance;
                preSpeaker = curSpeaker;
            }
            token.set(CoreAnnotations.UtteranceAnnotation.class, utterance);
        }
        this.stanfordProcessor.annotate(anno);
        for (CoreMap sentence : (List)anno.get(CoreAnnotations.SentencesAnnotation.class)) {
            allWords.add((List<CoreLabel>)sentence.get(CoreAnnotations.TokensAnnotation.class));
            allTrees.add((Tree)sentence.get(CoreAnnotations.TreeAnnotation.class));
        }
        List<List<Mention>> allGoldMentions = this.extractGoldMentions(conllDoc);
        List<List<Mention>> allPredictedMentions = this.mentionFinder.extractPredictedMentions(anno, this.maxID, this.dictionaries);
        try {
            CoNLLMentionExtractor.recallErrors(allGoldMentions, allPredictedMentions, anno);
        }
        catch (IOException e) {
            throw new RuntimeException(e);
        }
        Document doc = this.arrange(anno, allWords, allTrees, allPredictedMentions, allGoldMentions, true);
        doc.conllDoc = conllDoc;
        return doc;
    }

    private static void recallErrors(List<List<Mention>> goldMentions, List<List<Mention>> predictedMentions, Annotation doc) throws IOException {
        List coreMaps = (List)doc.get(CoreAnnotations.SentencesAnnotation.class);
        int numSentences = goldMentions.size();
        for (int i = 0; i < numSentences; ++i) {
            CoreMap coreMap = (CoreMap)coreMaps.get(i);
            List words = (List)coreMap.get(CoreAnnotations.TokensAnnotation.class);
            Tree tree = (Tree)coreMap.get(CoreAnnotations.TreeAnnotation.class);
            List<Mention> goldMentionsSent = goldMentions.get(i);
            List<Pair<Integer, Integer>> goldMentionsSpans = CoNLLMentionExtractor.extractSpans(goldMentionsSent);
            for (Pair<Integer, Integer> mentionSpan : goldMentionsSpans) {
                logger.finer("RECALL ERROR\n");
                logger.finer(coreMap + "\n");
                for (int x = ((Integer)mentionSpan.first).intValue(); x < (Integer)mentionSpan.second; ++x) {
                    logger.finer(((CoreLabel)words.get(x)).value() + " ");
                }
                logger.finer("\n" + tree + "\n");
            }
        }
    }

    private static List<Pair<Integer, Integer>> extractSpans(List<Mention> listOfMentions) {
        ArrayList<Pair<Integer, Integer>> mentionSpans = new ArrayList<Pair<Integer, Integer>>();
        for (Mention mention : listOfMentions) {
            Pair<Integer, Integer> mentionSpan = new Pair<Integer, Integer>(mention.startIndex, mention.endIndex);
            mentionSpans.add(mentionSpan);
        }
        return mentionSpans;
    }

    public List<List<Mention>> extractGoldMentions(CoNLL2011DocumentReader.Document conllDoc) {
        List sentences = (List)conllDoc.getAnnotation().get(CoreAnnotations.SentencesAnnotation.class);
        ArrayList<List<Mention>> allGoldMentions = new ArrayList<List<Mention>>();
        CollectionValuedMap<String, CoreMap> corefChainMap = conllDoc.getCorefChainMap();
        for (int i = 0; i < sentences.size(); ++i) {
            allGoldMentions.add(new ArrayList());
        }
        int maxCorefClusterId = -1;
        for (String corefIdStr : corefChainMap.keySet()) {
            int id = Integer.parseInt(corefIdStr);
            if (id <= maxCorefClusterId) continue;
            maxCorefClusterId = id;
        }
        int newMentionID = maxCorefClusterId + 1;
        for (String corefIdStr : corefChainMap.keySet()) {
            int id = Integer.parseInt(corefIdStr);
            int clusterMentionCnt = 0;
            Iterator i$ = corefChainMap.get(corefIdStr).iterator();
            while (i$.hasNext()) {
                CoreMap m = (CoreMap)i$.next();
                Mention mention = new Mention();
                mention.goldCorefClusterID = id;
                if (++clusterMentionCnt == 1) {
                    mention.mentionID = id;
                    mention.originalRef = -1;
                } else {
                    mention.mentionID = newMentionID++;
                    mention.originalRef = id;
                }
                if (this.maxID < mention.mentionID) {
                    this.maxID = mention.mentionID;
                }
                int sentIndex = (Integer)m.get(CoreAnnotations.SentenceIndexAnnotation.class);
                CoreMap sent = (CoreMap)sentences.get(sentIndex);
                mention.startIndex = (Integer)m.get(CoreAnnotations.TokenBeginAnnotation.class) - (Integer)sent.get(CoreAnnotations.TokenBeginAnnotation.class);
                mention.endIndex = (Integer)m.get(CoreAnnotations.TokenEndAnnotation.class) - (Integer)sent.get(CoreAnnotations.TokenBeginAnnotation.class);
                mention.originalSpan = (List)m.get(CoreAnnotations.TokensAnnotation.class);
                mention.dependency = (SemanticGraph)((Object)((CoreMap)sentences.get(sentIndex)).get(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class));
                ((List)allGoldMentions.get(sentIndex)).add(mention);
            }
        }
        return allGoldMentions;
    }
}

