/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.sequences;

import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.objectbank.XMLBeginEndIterator;
import edu.stanford.nlp.process.PTBTokenizer;
import edu.stanford.nlp.process.WordToSentenceProcessor;
import edu.stanford.nlp.sequences.DocumentReaderAndWriter;
import edu.stanford.nlp.sequences.SeqClassifierFlags;
import java.io.PrintWriter;
import java.io.Reader;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public class TrueCasingDocumentReaderAndWriter
implements DocumentReaderAndWriter<CoreLabel> {
    private static final long serialVersionUID = 1731527027473052481L;
    private static final Pattern sgml = Pattern.compile("<[^>]*>");
    private static final Pattern allLower = Pattern.compile("[^A-Z]*?[a-z]+[^A-Z]*?");
    private static final Pattern allUpper = Pattern.compile("[^a-z]*?[A-Z]+[^a-z]*?");
    private static final Pattern startUpper = Pattern.compile("[A-Z].*");
    private static WordToSentenceProcessor<CoreLabel> wts = new WordToSentenceProcessor();
    public static Set<String> knownWords;

    @Override
    public void init(SeqClassifierFlags flags) {
    }

    public static boolean known(String s) {
        return knownWords.contains(s.toLowerCase());
    }

    @Override
    public Iterator<List<CoreLabel>> getIterator(Reader r) {
        Matcher m;
        CoreLabel w;
        ArrayList<CoreLabel> document;
        PTBTokenizer<CoreLabel> ptb;
        ArrayList<List<CoreLabel>> documents = new ArrayList<List<CoreLabel>>();
        String s = IOUtils.slurpReader(r);
        HashSet<String> wordsSeenOnce = new HashSet<String>();
        HashSet<String> wordsSeenMultiple = new HashSet<String>();
        XMLBeginEndIterator xmlIter = new XMLBeginEndIterator(new StringReader(s), "TEXT");
        while (xmlIter.hasNext()) {
            ptb = PTBTokenizer.newPTBTokenizer(new StringReader((String)xmlIter.next()), false, true);
            document = new ArrayList<CoreLabel>();
            HashSet<String> hashSet = new HashSet<String>();
            while (ptb.hasNext()) {
                w = (CoreLabel)ptb.next();
                hashSet.add(w.word().toLowerCase());
                m = sgml.matcher(w.word());
                if (m.matches()) {
                    if (document.size() <= 0) continue;
                    documents.addAll(wts.process(document));
                    document = new ArrayList();
                    continue;
                }
                document.add(w);
            }
            if (document.size() > 0) {
                documents.addAll(wts.process(document));
            }
            for (String word : hashSet) {
                if (wordsSeenMultiple.contains(word)) continue;
                if (wordsSeenOnce.contains(word)) {
                    wordsSeenOnce.remove(word);
                    wordsSeenMultiple.add(word);
                    continue;
                }
                wordsSeenOnce.add(word);
            }
        }
        xmlIter = new XMLBeginEndIterator(new StringReader(s), "TXT");
        while (xmlIter.hasNext()) {
            ptb = PTBTokenizer.newPTBTokenizer(new StringReader((String)xmlIter.next()), false, true);
            document = new ArrayList();
            HashSet<String> hashSet = new HashSet<String>();
            while (ptb.hasNext()) {
                w = (CoreLabel)ptb.next();
                hashSet.add(w.word().toLowerCase());
                m = sgml.matcher(w.word());
                if (m.matches()) {
                    if (document.size() <= 0) continue;
                    documents.addAll(wts.process(document));
                    document = new ArrayList();
                    continue;
                }
                document.add(w);
            }
            if (document.size() > 0) {
                documents.addAll(wts.process(document));
            }
            for (String word : hashSet) {
                if (wordsSeenMultiple.contains(word)) continue;
                if (wordsSeenOnce.contains(word)) {
                    wordsSeenOnce.remove(word);
                    wordsSeenMultiple.add(word);
                    continue;
                }
                wordsSeenOnce.add(word);
            }
        }
        knownWords = wordsSeenMultiple;
        knownWords.addAll(wordsSeenOnce);
        wordsSeenMultiple = null;
        ArrayList docs = new ArrayList();
        for (List list : documents) {
            System.err.println(list);
            ArrayList<CoreLabel> doc = new ArrayList<CoreLabel>();
            int pos = 0;
            for (CoreLabel w2 : list) {
                CoreLabel wi = new CoreLabel();
                Matcher lowerMatcher = allLower.matcher(w2.word());
                if (lowerMatcher.matches()) {
                    wi.set(CoreAnnotations.AnswerAnnotation.class, "LOWER");
                } else {
                    Matcher upperMatcher = allUpper.matcher(w2.word());
                    if (upperMatcher.matches()) {
                        wi.set(CoreAnnotations.AnswerAnnotation.class, "UPPER");
                    } else {
                        Matcher startUpperMatcher = startUpper.matcher(w2.word());
                        if (startUpperMatcher.matches()) {
                            wi.set(CoreAnnotations.AnswerAnnotation.class, "INIT_UPPER");
                        } else {
                            wi.set(CoreAnnotations.AnswerAnnotation.class, "O");
                        }
                    }
                }
                wi.setWord(w2.word().toLowerCase());
                wi.set(CoreAnnotations.UnknownAnnotation.class, wordsSeenOnce.contains(w2.word().toLowerCase()) ? "true" : "false");
                wi.set(CoreAnnotations.PositionAnnotation.class, Integer.toString(pos));
                if (((String)wi.get(CoreAnnotations.UnknownAnnotation.class)).equals("true")) {
                    System.err.println(wi.word() + " :: " + (String)wi.get(CoreAnnotations.UnknownAnnotation.class) + " :: " + (String)wi.get(CoreAnnotations.PositionAnnotation.class));
                }
                doc.add(wi);
                ++pos;
            }
            System.err.println();
            docs.add(doc);
        }
        return docs.iterator();
    }

    @Override
    public void printAnswers(List<CoreLabel> doc, PrintWriter out2) {
        for (CoreLabel wi : doc) {
            String prev = (String)wi.get(CoreAnnotations.BeforeAnnotation.class);
            out2.print(prev);
            String w = wi.word();
            if (((String)wi.get(CoreAnnotations.AnswerAnnotation.class)).equals("UPPER")) {
                out2.print(w.toUpperCase());
            } else if (((String)wi.get(CoreAnnotations.AnswerAnnotation.class)).equals("LOWER")) {
                out2.print(w.toLowerCase());
            } else if (((String)wi.get(CoreAnnotations.AnswerAnnotation.class)).equals("INIT_UPPER")) {
                out2.print(w.substring(0, 1).toUpperCase());
                out2.print(w.substring(1));
            } else {
                out2.print(w);
            }
            String after = (String)wi.get(CoreAnnotations.AfterAnnotation.class);
            out2.print(after);
        }
        out2.println();
    }
}

