/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.ie.ner;

import edu.stanford.nlp.classify.Classifier;
import edu.stanford.nlp.classify.Dataset;
import edu.stanford.nlp.classify.GeneralDataset;
import edu.stanford.nlp.classify.LinearClassifier;
import edu.stanford.nlp.classify.LinearClassifierFactory;
import edu.stanford.nlp.classify.LogPrior;
import edu.stanford.nlp.classify.NBLinearClassifierFactory;
import edu.stanford.nlp.classify.ProbabilisticClassifier;
import edu.stanford.nlp.classify.SVMLightClassifierFactory;
import edu.stanford.nlp.ie.AbstractSequenceClassifier;
import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.ling.BasicDatum;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.Datum;
import edu.stanford.nlp.ling.Document;
import edu.stanford.nlp.ling.HasTag;
import edu.stanford.nlp.ling.HasWord;
import edu.stanford.nlp.ling.WordTag;
import edu.stanford.nlp.math.ArrayMath;
import edu.stanford.nlp.math.SloppyMath;
import edu.stanford.nlp.objectbank.ObjectBank;
import edu.stanford.nlp.process.DocumentProcessor;
import edu.stanford.nlp.process.ListProcessor;
import edu.stanford.nlp.sequences.BeamBestSequenceFinder;
import edu.stanford.nlp.sequences.BestSequenceFinder;
import edu.stanford.nlp.sequences.Clique;
import edu.stanford.nlp.sequences.DocumentReaderAndWriter;
import edu.stanford.nlp.sequences.ExactBestSequenceFinder;
import edu.stanford.nlp.sequences.FeatureFactory;
import edu.stanford.nlp.sequences.PlainTextDocumentReaderAndWriter;
import edu.stanford.nlp.sequences.SeqClassifierFlags;
import edu.stanford.nlp.sequences.SequenceModel;
import edu.stanford.nlp.sequences.TrueCasingDocumentReaderAndWriter;
import edu.stanford.nlp.stats.ClassicCounter;
import edu.stanford.nlp.stats.Counter;
import edu.stanford.nlp.util.CoreMap;
import edu.stanford.nlp.util.HashIndex;
import edu.stanford.nlp.util.Index;
import edu.stanford.nlp.util.PaddedList;
import edu.stanford.nlp.util.Pair;
import edu.stanford.nlp.util.StringUtils;
import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Properties;
import java.util.Set;
import java.util.regex.Pattern;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public class CMMClassifier<IN extends CoreLabel>
extends AbstractSequenceClassifier<IN>
implements DocumentProcessor,
ListProcessor<Object, WordTag> {
    private ProbabilisticClassifier<String, String> classifier;
    Set<List<String>> answerArrays;
    public static final String DEFAULT_CLASSIFIER = "ner-eng-ie.cmm-3-all2006.ser.gz";
    static int lastPos = -1;

    protected CMMClassifier() {
        super(new SeqClassifierFlags());
    }

    public CMMClassifier(Properties props) {
        super(props);
    }

    public Set<String> getTags() {
        HashSet<String> tags = new HashSet<String>(this.classIndex.objectsList());
        tags.remove(this.flags.backgroundSymbol);
        return tags;
    }

    @Override
    public List<IN> classify(List<IN> document) {
        if (this.flags.useSequences) {
            this.classifySeq(document);
        } else {
            this.classifyNoSeq(document);
        }
        return document;
    }

    private void classifyNoSeq(List<IN> document) {
        if (this.flags.useReverse) {
            Collections.reverse(document);
        }
        if (this.flags.lowerNewgeneThreshold) {
            System.err.println("Using NEWGENE threshold: " + this.flags.newgeneThreshold);
            int docSize = document.size();
            for (int i = 0; i < docSize; ++i) {
                CoreLabel wordInfo = (CoreLabel)document.get(i);
                Datum<String, String> d = this.makeDatum(document, i, this.featureFactory);
                Counter<String> scores = this.classifier.scoresOf(d);
                String answer = this.flags.backgroundSymbol;
                if ("NEWGENE".equals(wordInfo.get(CoreAnnotations.GazAnnotation.class))) {
                    for (String label : scores.keySet()) {
                        if (!"G".equals(label)) continue;
                        System.err.println(wordInfo.word() + ':' + scores.getCount(label));
                        if (!(scores.getCount(label) > this.flags.newgeneThreshold)) continue;
                        answer = label;
                    }
                }
                wordInfo.set(CoreAnnotations.AnswerAnnotation.class, answer);
            }
        } else {
            int listSize = document.size();
            for (int i = 0; i < listSize; ++i) {
                String answer = this.classOf(document, i);
                CoreLabel wordInfo = (CoreLabel)document.get(i);
                wordInfo.set(CoreAnnotations.AnswerAnnotation.class, answer);
            }
            if (this.flags.justify && this.classifier instanceof LinearClassifier) {
                LinearClassifier lc = (LinearClassifier)this.classifier;
                int lsize = document.size();
                for (int i = 0; i < lsize; ++i) {
                    CoreLabel lineInfo = (CoreLabel)document.get(i);
                    System.err.print("@@ Position " + i + ": ");
                    System.err.println(lineInfo.word() + " chose " + (String)lineInfo.get(CoreAnnotations.AnswerAnnotation.class));
                    lc.justificationOf(this.makeDatum(document, i, this.featureFactory));
                }
            }
        }
        if (this.flags.useReverse) {
            Collections.reverse(document);
        }
    }

    protected String classOf(List<IN> lineInfos, int pos) {
        Datum<String, String> d = this.makeDatum(lineInfos, pos, this.featureFactory);
        return this.classifier.classOf(d);
    }

    public double loglikelihood(List<IN> lineInfos) {
        double cll = 0.0;
        for (int i = 0; i < lineInfos.size(); ++i) {
            Datum<String, String> d = this.makeDatum(lineInfos, i, this.featureFactory);
            Counter<String> c = this.classifier.logProbabilityOf(d);
            double total = Double.NEGATIVE_INFINITY;
            for (String s : c.keySet()) {
                total = SloppyMath.logAdd(total, c.getCount(s));
            }
            cll -= c.getCount(d.label()) - total;
        }
        if (this.classifier instanceof LinearClassifier) {
            double sigmaSq = this.flags.sigma * this.flags.sigma;
            LinearClassifier lc = (LinearClassifier)this.classifier;
            for (String feature : lc.features()) {
                for (String classLabel : this.classIndex) {
                    double w = lc.weight(feature, classLabel);
                    cll += w * w / 2.0 / sigmaSq;
                }
            }
        }
        return cll;
    }

    @Override
    public SequenceModel getSequenceModel(List<IN> document) {
        if (this.flags.useReverse) {
            Collections.reverse(document);
        }
        Scorer ts = new Scorer(document, this.classIndex, this, !this.flags.useTaggySequences ? (this.flags.usePrevSequences ? 1 : 0) : this.flags.maxLeft, this.flags.useNextSequences ? 1 : 0, this.answerArrays);
        return ts;
    }

    private void classifySeq(List<IN> document) {
        int[] tags;
        BestSequenceFinder ti;
        if (document.isEmpty()) {
            return;
        }
        SequenceModel ts = this.getSequenceModel(document);
        if (this.flags.useViterbi) {
            ti = new ExactBestSequenceFinder();
            tags = ((ExactBestSequenceFinder)ti).bestSequence(ts);
        } else {
            ti = new BeamBestSequenceFinder(this.flags.beamSize, true, true);
            tags = ((BeamBestSequenceFinder)ti).bestSequence(ts, document.size());
        }
        if (this.flags.lowerNewgeneThreshold) {
            System.err.println("Using NEWGENE threshold: " + this.flags.newgeneThreshold);
            int[] copy = new int[tags.length];
            System.arraycopy(tags, 0, copy, 0, tags.length);
            int ngTag = this.classIndex.indexOf("G");
            int bgTag = this.classIndex.indexOf(this.flags.backgroundSymbol);
            int dSize = document.size();
            for (int i = 0; i < dSize; ++i) {
                int k;
                int j;
                CoreLabel wordInfo = (CoreLabel)document.get(i);
                if (!"NEWGENE".equals(wordInfo.get(CoreAnnotations.GazAnnotation.class))) continue;
                int start = i;
                for (j = i; j < document.size() && "NEWGENE".equals((wordInfo = (CoreLabel)document.get(j)).get(CoreAnnotations.GazAnnotation.class)); ++j) {
                }
                int end = j;
                int winStart = Math.max(0, start - 4);
                int winEnd = Math.min(tags.length, end + 4);
                for (j = winStart; j < winEnd; ++j) {
                    copy[j] = bgTag;
                }
                double bgScore = 0.0;
                for (j = start; j < end; ++j) {
                    double[] scores = ts.scoresOf(copy, j);
                    scores = Scorer.recenter(scores);
                    bgScore += scores[bgTag];
                }
                ClassicCounter<Pair<Integer, Integer>> prevScores = new ClassicCounter<Pair<Integer, Integer>>();
                for (j = start; j < end; ++j) {
                    for (k = start; k < end; ++k) {
                        copy[k] = bgTag;
                    }
                    for (k = j; k < end; ++k) {
                        copy[k] = ngTag;
                        double ngScore = 0.0;
                        for (int m = start; m < end; ++m) {
                            double[] scores = ts.scoresOf(copy, m);
                            scores = Scorer.recenter(scores);
                            ngScore += scores[tags[m]];
                        }
                        prevScores.incrementCount(new Pair<Integer, Integer>(j, k), ngScore - bgScore);
                    }
                }
                for (j = start; j < end; ++j) {
                    for (k = j; k < end; ++k) {
                        double score = prevScores.getCount(new Pair<Integer, Integer>(j, k));
                        Pair<Integer, Integer> al = new Pair<Integer, Integer>(j - 1, k);
                        Pair<Integer, Integer> ar = new Pair<Integer, Integer>(j, k + 1);
                        Pair<Integer, Integer> sl = new Pair<Integer, Integer>(j + 1, k);
                        Pair<Integer, Integer> sr = new Pair<Integer, Integer>(j, k - 1);
                        if (!(score >= this.flags.newgeneThreshold) || prevScores.containsKey(al) && !(score > prevScores.getCount(al)) || prevScores.containsKey(ar) && !(score > prevScores.getCount(ar)) || prevScores.containsKey(sl) && !(score > prevScores.getCount(sl)) || prevScores.containsKey(sr) && !(score > prevScores.getCount(sr))) continue;
                        StringBuilder sb = new StringBuilder();
                        wordInfo = (CoreLabel)document.get(j);
                        String docId = (String)wordInfo.get(CoreAnnotations.IDAnnotation.class);
                        String startIndex = (String)wordInfo.get(CoreAnnotations.PositionAnnotation.class);
                        wordInfo = (CoreLabel)document.get(k);
                        String endIndex = (String)wordInfo.get(CoreAnnotations.PositionAnnotation.class);
                        for (int m = j; m <= k; ++m) {
                            wordInfo = (CoreLabel)document.get(m);
                            sb.append(wordInfo.word());
                            sb.append(' ');
                        }
                        System.out.println(docId + '|' + startIndex + ' ' + endIndex + '|' + sb.toString().trim());
                    }
                }
                for (j = winStart; j < winEnd; ++j) {
                    copy[j] = tags[j];
                }
                i = end;
            }
        }
        int docSize = document.size();
        for (int i = 0; i < docSize; ++i) {
            CoreLabel lineInfo = (CoreLabel)document.get(i);
            String answer = (String)this.classIndex.get(tags[i]);
            lineInfo.set(CoreAnnotations.AnswerAnnotation.class, answer);
        }
        if (this.flags.justify && this.classifier instanceof LinearClassifier) {
            LinearClassifier lc = (LinearClassifier)this.classifier;
            if (this.flags.dump) {
                lc.dump();
            }
            int docSize2 = document.size();
            for (int i = 0; i < docSize2; ++i) {
                CoreLabel lineInfo = (CoreLabel)document.get(i);
                System.err.print("@@ Position is: " + i + ": ");
                System.err.println(lineInfo.word() + ' ' + (String)lineInfo.get(CoreAnnotations.AnswerAnnotation.class));
                lc.justificationOf(this.makeDatum(document, i, this.featureFactory));
            }
        }
        if (this.flags.useReverse) {
            Collections.reverse(document);
        }
    }

    public void adapt(String filename, Dataset<String, String> trainDataset) {
        this.flags.ocrTrain = false;
        ObjectBank docs = this.makeObjectBankFromFile(filename);
        this.adapt(docs, trainDataset);
    }

    public void adapt(ObjectBank<List<IN>> featureLabels, Dataset<String, String> trainDataset) {
        Dataset<String, String> adapt = this.getDataset(featureLabels, trainDataset);
        this.adapt(adapt);
    }

    public void retrain(ObjectBank<List<IN>> featureLabels, Index<String> featureIndex, Index<String> labelIndex) {
        int fs = featureIndex.size();
        int ls = labelIndex.size();
        Dataset<String, String> adapt = this.getDataset(featureLabels, featureIndex, labelIndex);
        int prior = LogPrior.LogPriorType.QUADRATIC.ordinal();
        LinearClassifier lc = (LinearClassifier)this.classifier;
        LinearClassifierFactory<String, String> lcf = new LinearClassifierFactory<String, String>(this.flags.tolerance, this.flags.useSum, prior, this.flags.sigma, this.flags.epsilon, this.flags.QNsize);
        double[][] weights = lc.weights();
        Index newF = adapt.featureIndex;
        Index newL = adapt.labelIndex;
        int newFS = newF.size();
        int newLS = newL.size();
        double[] x = new double[newFS * newLS];
        for (int i = 0; i < fs; ++i) {
            for (int j = 0; j < ls; ++j) {
                String f = featureIndex.get(i);
                String l = labelIndex.get(j);
                int newi = newF.indexOf(f) * newLS + newL.indexOf(l);
                x[newi] = weights[i][j];
            }
        }
        weights = lcf.trainWeights(adapt, x);
        lc.setWeights(weights);
    }

    public void retrain(ObjectBank<List<IN>> doc) {
        if (this.classifier == null) {
            System.err.println("Cannot retrain before you train!");
            System.exit(-1);
        }
        Index<String> findex = ((LinearClassifier)this.classifier).featureIndex();
        Index<String> lindex = ((LinearClassifier)this.classifier).labelIndex();
        System.err.println("Starting retrain:\t# of original features" + findex.size() + ", # of original labels" + lindex.size());
        this.retrain(doc, findex, lindex);
    }

    @Override
    public void train(Collection<List<IN>> wordInfos) {
        Dataset<String, String> train = this.getDataset(wordInfos);
        this.train(train);
        for (int i = 0; i < this.flags.numTimesPruneFeatures; ++i) {
            Index<String> featuresAboveThreshhold = this.getFeaturesAboveThreshhold(train, this.flags.featureDiffThresh);
            System.err.println("Removing features with weight below " + this.flags.featureDiffThresh + " and retraining...");
            train = this.getDataset(train, featuresAboveThreshhold);
            int tmp = this.flags.QNsize;
            this.flags.QNsize = this.flags.QNsize2;
            this.train(train);
            this.flags.QNsize = tmp;
        }
        if (this.flags.doAdaptation && this.flags.adaptFile != null) {
            this.adapt(this.flags.adaptFile, train);
        }
        System.err.print("Built this classifier: ");
        if (this.classifier instanceof LinearClassifier) {
            String classString = ((LinearClassifier)this.classifier).toString(this.flags.printClassifier, this.flags.printClassifierParam);
            System.err.println(classString);
        } else {
            String classString = this.classifier.toString();
            System.err.println(classString);
        }
    }

    public Index<String> getFeaturesAboveThreshhold(Dataset<String, String> dataset, double thresh) {
        if (!(this.classifier instanceof LinearClassifier)) {
            throw new RuntimeException("Attempting to remove features based on weight from a non-linear classifier");
        }
        Index featureIndex = dataset.featureIndex;
        Index labelIndex = dataset.labelIndex;
        HashIndex<String> features = new HashIndex<String>();
        Iterator featureIt = featureIndex.iterator();
        LinearClassifier lc = (LinearClassifier)this.classifier;
        block0: while (featureIt.hasNext()) {
            String f = (String)featureIt.next();
            Iterator labelIt = labelIndex.iterator();
            double smallest = Double.POSITIVE_INFINITY;
            double biggest = Double.NEGATIVE_INFINITY;
            while (labelIt.hasNext()) {
                String l = (String)labelIt.next();
                double weight2 = lc.weight(f, l);
                if (weight2 < smallest) {
                    smallest = weight2;
                }
                if (weight2 > biggest) {
                    biggest = weight2;
                }
                if (!(biggest - smallest > thresh)) continue;
                features.add(f);
                continue block0;
            }
        }
        return features;
    }

    public Dataset<String, String> getDataset(Collection<List<IN>> data) {
        return this.getDataset(data, null, null);
    }

    public Dataset<String, String> getDataset(Collection<List<IN>> data, Index<String> featureIndex, Index<String> classIndex) {
        Dataset<Object, Object> train;
        this.makeAnswerArraysAndTagIndex(data);
        int size = 0;
        for (List<IN> doc : data) {
            size += doc.size();
        }
        System.err.println("Making Dataset...");
        if (featureIndex != null && classIndex != null) {
            System.err.println("Using feature/class Index from existing Dataset...");
            System.err.println("(This is used when getting Dataset from adaptation set. We want to make the index consistent.)");
            train = new Dataset<String, String>(size, featureIndex, classIndex);
        } else {
            train = new Dataset(size);
        }
        for (List<IN> doc : data) {
            if (this.flags.useReverse) {
                Collections.reverse(doc);
            }
            int dsize = doc.size();
            for (int i = 0; i < dsize; ++i) {
                Datum<String, String> d = this.makeDatum(doc, i, this.featureFactory);
                train.add(d);
            }
            if (!this.flags.useReverse) continue;
            Collections.reverse(doc);
        }
        System.err.println("done.");
        if (this.flags.featThreshFile != null) {
            System.err.println("applying thresholds...");
            List<Pair<Pattern, Integer>> thresh = CMMClassifier.getThresholds(this.flags.featThreshFile);
            train.applyFeatureCountThreshold(thresh);
        } else if (this.flags.featureThreshold > 1) {
            System.err.println("Removing Features with counts < " + this.flags.featureThreshold);
            train.applyFeatureCountThreshold(this.flags.featureThreshold);
        }
        train.summaryStatistics();
        return train;
    }

    public Dataset<String, String> getBiasedDataset(ObjectBank<List<IN>> data, Index<String> featureIndex, Index<String> classIndex) {
        this.makeAnswerArraysAndTagIndex(data);
        HashIndex<String> origFeatIndex = new HashIndex<String>(featureIndex.objectsList());
        int size = 0;
        for (List<IN> doc : data) {
            size += doc.size();
        }
        System.err.println("Making Dataset...");
        Dataset<String, String> train = new Dataset<String, String>(size, featureIndex, classIndex);
        for (List<IN> doc : data) {
            if (this.flags.useReverse) {
                Collections.reverse(doc);
            }
            int dsize = doc.size();
            for (int i = 0; i < dsize; ++i) {
                Datum<String, String> d = this.makeDatum(doc, i, this.featureFactory);
                ArrayList<String> newFeats = new ArrayList<String>();
                for (String f : d.asFeatures()) {
                    if (origFeatIndex.contains(f)) continue;
                    newFeats.add(f);
                }
                train.add(d);
            }
            if (!this.flags.useReverse) continue;
            Collections.reverse(doc);
        }
        System.err.println("done.");
        if (this.flags.featThreshFile != null) {
            System.err.println("applying thresholds...");
            List<Pair<Pattern, Integer>> thresh = CMMClassifier.getThresholds(this.flags.featThreshFile);
            train.applyFeatureCountThreshold(thresh);
        } else if (this.flags.featureThreshold > 1) {
            System.err.println("Removing Features with counts < " + this.flags.featureThreshold);
            train.applyFeatureCountThreshold(this.flags.featureThreshold);
        }
        train.summaryStatistics();
        return train;
    }

    public Dataset<String, String> getDataset(ObjectBank<List<IN>> data, Dataset<String, String> origDataset) {
        if (origDataset == null) {
            return this.getDataset(data);
        }
        return this.getDataset(data, origDataset.featureIndex, origDataset.labelIndex);
    }

    public Dataset<String, String> getDataset(Dataset<String, String> oldData, Index<String> goodFeatures) {
        int i;
        int[][] oldDataArray = oldData.getDataArray();
        int[] oldLabelArray = oldData.getLabelsArray();
        Index oldFeatureIndex = oldData.featureIndex;
        int[] oldToNewFeatureMap = new int[oldFeatureIndex.size()];
        int[][] newDataArray = new int[oldDataArray.length][];
        System.err.print("Building reduced dataset...");
        int size = oldFeatureIndex.size();
        int max = 0;
        for (i = 0; i < size; ++i) {
            oldToNewFeatureMap[i] = goodFeatures.indexOf((String)oldFeatureIndex.get(i));
            if (oldToNewFeatureMap[i] <= max) continue;
            max = oldToNewFeatureMap[i];
        }
        for (i = 0; i < oldDataArray.length; ++i) {
            int[] data = oldDataArray[i];
            size = 0;
            for (int j = 0; j < data.length; ++j) {
                if (oldToNewFeatureMap[data[j]] <= 0) continue;
                ++size;
            }
            int[] newData = new int[size];
            int index = 0;
            for (int j = 0; j < data.length; ++j) {
                int f = oldToNewFeatureMap[data[j]];
                if (f <= 0) continue;
                newData[index++] = f;
            }
            newDataArray[i] = newData;
        }
        Dataset<String, String> train = new Dataset<String, String>(oldData.labelIndex, oldLabelArray, goodFeatures, newDataArray, newDataArray.length);
        System.err.println("done.");
        if (this.flags.featThreshFile != null) {
            System.err.println("applying thresholds...");
            List<Pair<Pattern, Integer>> thresh = CMMClassifier.getThresholds(this.flags.featThreshFile);
            train.applyFeatureCountThreshold(thresh);
        } else if (this.flags.featureThreshold > 1) {
            System.err.println("Removing Features with counts < " + this.flags.featureThreshold);
            train.applyFeatureCountThreshold(this.flags.featureThreshold);
        }
        train.summaryStatistics();
        return train;
    }

    private void adapt(Dataset<String, String> adapt) {
        if (this.flags.classifierType.equalsIgnoreCase("SVM")) {
            throw new UnsupportedOperationException();
        }
        this.adaptMaxEnt(adapt);
    }

    private void adaptMaxEnt(Dataset<String, String> adapt) {
        int prior;
        if (this.classifier instanceof LinearClassifier) {
            prior = LogPrior.LogPriorType.QUADRATIC.ordinal();
            if (this.flags.useHuber) {
                throw new UnsupportedOperationException();
            }
            if (this.flags.useQuartic) {
                throw new UnsupportedOperationException();
            }
        } else {
            throw new UnsupportedOperationException();
        }
        LinearClassifierFactory lcf = new LinearClassifierFactory(this.flags.tolerance, this.flags.useSum, prior, this.flags.adaptSigma, this.flags.epsilon, this.flags.QNsize);
        ((LinearClassifier)this.classifier).adaptWeights(adapt, lcf);
    }

    private void train(Dataset<String, String> train) {
        if (this.flags.classifierType.equalsIgnoreCase("SVM")) {
            this.trainSVM(train);
        } else {
            this.trainMaxEnt(train);
        }
    }

    private void trainSVM(Dataset<String, String> train) {
        SVMLightClassifierFactory fact = new SVMLightClassifierFactory();
        this.classifier = fact.trainClassifier((GeneralDataset)train);
    }

    private void trainMaxEnt(Dataset<String, String> train) {
        Classifier lc;
        int prior = LogPrior.LogPriorType.QUADRATIC.ordinal();
        if (this.flags.useHuber) {
            prior = LogPrior.LogPriorType.HUBER.ordinal();
        } else if (this.flags.useQuartic) {
            prior = LogPrior.LogPriorType.QUARTIC.ordinal();
        }
        if (this.flags.useNB) {
            lc = new NBLinearClassifierFactory(this.flags.sigma).trainClassifier((GeneralDataset)train);
        } else {
            LinearClassifierFactory lcf = new LinearClassifierFactory(this.flags.tolerance, this.flags.useSum, prior, this.flags.sigma, this.flags.epsilon, this.flags.QNsize);
            if (this.flags.useQN) {
                lcf.useQuasiNewton(this.flags.useRobustQN);
            } else if (this.flags.useStochasticQN) {
                lcf.useStochasticQN(this.flags.initialGain, this.flags.stochasticBatchSize);
            } else if (this.flags.useSMD) {
                lcf.useStochasticMetaDescent(this.flags.initialGain, this.flags.stochasticBatchSize, this.flags.stochasticMethod, this.flags.SGDPasses);
            } else if (this.flags.useSGD) {
                lcf.useStochasticGradientDescent(this.flags.gainSGD, this.flags.stochasticBatchSize);
            } else if (this.flags.useSGDtoQN) {
                lcf.useStochasticGradientDescentToQuasiNewton(this.flags);
            } else if (this.flags.useHybrid) {
                lcf.useHybridMinimizer(this.flags.initialGain, this.flags.stochasticBatchSize, this.flags.stochasticMethod, this.flags.hybridCutoffIteration);
            } else {
                lcf.useConjugateGradientAscent();
            }
            lc = lcf.trainClassifier((GeneralDataset)train);
        }
        this.classifier = lc;
    }

    private void trainSemiSup(Dataset<String, String> data, Dataset<String, String> biasedData, double[][] confusionMatrix) {
        int prior = LogPrior.LogPriorType.QUADRATIC.ordinal();
        if (this.flags.useHuber) {
            prior = LogPrior.LogPriorType.HUBER.ordinal();
        } else if (this.flags.useQuartic) {
            prior = LogPrior.LogPriorType.QUARTIC.ordinal();
        }
        LinearClassifierFactory<String, String> lcf = new LinearClassifierFactory<String, String>(this.flags.tolerance, this.flags.useSum, prior, this.flags.sigma, this.flags.epsilon, this.flags.QNsize);
        if (this.flags.useQN) {
            lcf.useQuasiNewton();
        } else {
            lcf.useConjugateGradientAscent();
        }
        this.classifier = (LinearClassifier)lcf.trainClassifierSemiSup(data, biasedData, confusionMatrix, null);
    }

    @Override
    public void serializeClassifier(String serializePath) {
        System.err.print("Serializing classifier to " + serializePath + "...");
        try {
            ObjectOutputStream oos = IOUtils.writeStreamFromString(serializePath);
            oos.writeObject(this.classifier);
            oos.writeObject(this.flags);
            oos.writeObject(this.featureFactory);
            oos.writeObject(this.classIndex);
            oos.writeObject(this.answerArrays);
            if (this.readerAndWriter instanceof TrueCasingDocumentReaderAndWriter) {
                oos.writeObject(TrueCasingDocumentReaderAndWriter.knownWords);
            }
            oos.writeObject(this.knownLCWords);
            oos.close();
            System.err.println("Done.");
        }
        catch (Exception e) {
            System.err.println("Error serializing to " + serializePath);
            e.printStackTrace();
        }
    }

    public void loadDefaultClassifier() {
        this.loadJarClassifier(DEFAULT_CLASSIFIER, null);
    }

    public static CMMClassifier getDefaultClassifier() {
        CMMClassifier cmm = new CMMClassifier();
        cmm.loadDefaultClassifier();
        return cmm;
    }

    @Override
    public void loadClassifier(ObjectInputStream ois, Properties props) throws ClassCastException, IOException, ClassNotFoundException {
        this.classifier = (LinearClassifier)ois.readObject();
        this.flags = (SeqClassifierFlags)ois.readObject();
        this.featureFactory = (FeatureFactory)ois.readObject();
        if (props != null) {
            this.flags.setProperties(props);
        }
        this.reinit();
        this.classIndex = (Index)ois.readObject();
        this.answerArrays = (Set)ois.readObject();
        if (this.readerAndWriter instanceof TrueCasingDocumentReaderAndWriter) {
            TrueCasingDocumentReaderAndWriter.knownWords = (Set)ois.readObject();
        }
        this.knownLCWords = (Set)ois.readObject();
    }

    public static CMMClassifier getClassifierNoExceptions(File file) {
        CMMClassifier cmm = new CMMClassifier();
        cmm.loadClassifierNoExceptions(file);
        return cmm;
    }

    public static CMMClassifier getClassifier(File file) throws IOException, ClassCastException, ClassNotFoundException {
        CMMClassifier cmm = new CMMClassifier();
        cmm.loadClassifier(file);
        return cmm;
    }

    public static CMMClassifier getClassifierNoExceptions(String loadPath) {
        CMMClassifier cmm = new CMMClassifier();
        cmm.loadClassifierNoExceptions(loadPath);
        return cmm;
    }

    public static CMMClassifier getClassifier(String loadPath) throws IOException, ClassCastException, ClassNotFoundException {
        CMMClassifier cmm = new CMMClassifier();
        cmm.loadClassifier(loadPath);
        return cmm;
    }

    public static CMMClassifier getClassifierNoExceptions(InputStream in) {
        CMMClassifier cmm = new CMMClassifier();
        cmm.loadClassifierNoExceptions(new BufferedInputStream(in), null);
        return cmm;
    }

    public static CMMClassifier getClassifier(InputStream in) throws IOException, ClassCastException, ClassNotFoundException {
        CMMClassifier cmm = new CMMClassifier();
        cmm.loadClassifier(new BufferedInputStream(in));
        return cmm;
    }

    private void makeAnswerArraysAndTagIndex(Collection<List<IN>> docs) {
        if (this.answerArrays == null) {
            this.answerArrays = new HashSet<List<String>>();
        }
        if (this.classIndex == null) {
            this.classIndex = new HashIndex();
        }
        for (List<IN> doc : docs) {
            if (this.flags.useReverse) {
                Collections.reverse(doc);
            }
            int leng = doc.size();
            for (int start = 0; start < leng; ++start) {
                for (int diff = 1; diff <= this.flags.maxLeft && start + diff <= leng; ++diff) {
                    String[] seq = new String[diff];
                    for (int i = start; i < start + diff; ++i) {
                        seq[i - start] = (String)((CoreLabel)doc.get(i)).get(CoreAnnotations.AnswerAnnotation.class);
                    }
                    this.answerArrays.add(Arrays.asList(seq));
                }
            }
            for (int i = 0; i < leng; ++i) {
                CoreLabel wordInfo = (CoreLabel)doc.get(i);
                this.classIndex.add(wordInfo.get(CoreAnnotations.AnswerAnnotation.class));
            }
            if (!this.flags.useReverse) continue;
            Collections.reverse(doc);
        }
    }

    public <T extends CoreLabel> Datum<String, String> makeDatum(List<IN> info, int loc, FeatureFactory featureFactory) {
        PaddedList<CoreMap> pInfo = new PaddedList<CoreMap>(info, this.pad);
        ArrayList<String> features = new ArrayList<String>();
        List<Clique> cliques = featureFactory.getCliques();
        for (Clique c : cliques) {
            Collection<String> feats = featureFactory.getCliqueFeatures(pInfo, loc, c);
            feats = CMMClassifier.addOtherClasses(feats, pInfo, loc, c);
            features.addAll(feats);
        }
        this.printFeatures(pInfo.get(loc), features);
        CoreLabel c = (CoreLabel)info.get(loc);
        return new BasicDatum<String, String>((Collection<String>)features, (String)c.get(CoreAnnotations.AnswerAnnotation.class));
    }

    private static Collection<String> addOtherClasses(Collection<String> feats, List<? extends CoreLabel> info, int loc, Clique c) {
        String addend = null;
        String pAnswer = (String)info.get(loc - 1).get(CoreAnnotations.AnswerAnnotation.class);
        String p2Answer = (String)info.get(loc - 2).get(CoreAnnotations.AnswerAnnotation.class);
        String p3Answer = (String)info.get(loc - 3).get(CoreAnnotations.AnswerAnnotation.class);
        String p4Answer = (String)info.get(loc - 4).get(CoreAnnotations.AnswerAnnotation.class);
        String p5Answer = (String)info.get(loc - 5).get(CoreAnnotations.AnswerAnnotation.class);
        String nAnswer = (String)info.get(loc + 1).get(CoreAnnotations.AnswerAnnotation.class);
        if (c == FeatureFactory.cliqueCpC) {
            addend = '|' + pAnswer;
        } else if (c == FeatureFactory.cliqueCp2C) {
            addend = '|' + p2Answer;
        } else if (c == FeatureFactory.cliqueCp3C) {
            addend = '|' + p3Answer;
        } else if (c == FeatureFactory.cliqueCp4C) {
            addend = '|' + p4Answer;
        } else if (c == FeatureFactory.cliqueCp5C) {
            addend = '|' + p5Answer;
        } else if (c == FeatureFactory.cliqueCpCp2C) {
            addend = '|' + pAnswer + '-' + p2Answer;
        } else if (c == FeatureFactory.cliqueCpCp2Cp3C) {
            addend = '|' + pAnswer + '-' + p2Answer + '-' + p3Answer;
        } else if (c == FeatureFactory.cliqueCpCp2Cp3Cp4C) {
            addend = '|' + pAnswer + '-' + p2Answer + '-' + p3Answer + '-' + p4Answer;
        } else if (c == FeatureFactory.cliqueCpCp2Cp3Cp4Cp5C) {
            addend = '|' + pAnswer + '-' + p2Answer + '-' + p3Answer + '-' + p4Answer + '-' + p5Answer;
        } else if (c == FeatureFactory.cliqueCnC) {
            addend = '|' + nAnswer;
        } else if (c == FeatureFactory.cliqueCpCnC) {
            addend = '|' + pAnswer + '-' + nAnswer;
        }
        if (addend == null) {
            return feats;
        }
        HashSet<String> newFeats = new HashSet<String>();
        for (String feat : feats) {
            String newFeat = feat + addend;
            newFeats.add(newFeat);
        }
        return newFeats;
    }

    private static List<Pair<Pattern, Integer>> getThresholds(String filename) {
        try {
            String line;
            BufferedReader in = new BufferedReader(new FileReader(filename));
            ArrayList<Pair<Pattern, Integer>> thresholds = new ArrayList<Pair<Pattern, Integer>>();
            while ((line = in.readLine()) != null) {
                int i = line.lastIndexOf(32);
                Pattern p = Pattern.compile(line.substring(0, i));
                Integer t = Integer.valueOf(line.substring(i + 1));
                Pair<Pattern, Integer> pair = new Pair<Pattern, Integer>(p, t);
                thresholds.add(pair);
            }
            in.close();
            return thresholds;
        }
        catch (Exception e) {
            throw new RuntimeException("Error reading threshold file", e);
        }
    }

    public void trainSemiSup() {
        int j;
        int i;
        String[] bits;
        String filename = this.flags.trainFile;
        String biasedFilename = this.flags.biasedTrainFile;
        ObjectBank data = this.makeObjectBankFromFile(filename);
        ObjectBank biasedData = this.makeObjectBankFromFile(biasedFilename);
        HashIndex<String> featureIndex = new HashIndex<String>();
        HashIndex<String> classIndex = new HashIndex<String>();
        Dataset<String, String> dataset = this.getDataset(data, featureIndex, classIndex);
        Dataset<String, String> biasedDataset = this.getBiasedDataset(biasedData, featureIndex, classIndex);
        double[][] confusionMatrix = new double[classIndex.size()][classIndex.size()];
        for (int i2 = 0; i2 < confusionMatrix.length; ++i2) {
            Arrays.fill(confusionMatrix[i2], 0.0);
            confusionMatrix[i2][i2] = 1.0;
        }
        String cm = this.flags.confusionMatrix;
        for (String bit : bits = cm.split(":")) {
            double d;
            String[] bits1 = bit.split("\\|");
            int i1 = classIndex.indexOf(bits1[0]);
            int i2 = classIndex.indexOf(bits1[1]);
            confusionMatrix[i2][i1] = d = Double.parseDouble(bits1[2]);
        }
        for (i = 0; i < confusionMatrix.length; ++i) {
            ArrayMath.normalize(confusionMatrix[i]);
        }
        for (i = 0; i < confusionMatrix.length; ++i) {
            for (j = 0; j < i; ++j) {
                double d = confusionMatrix[i][j];
                confusionMatrix[i][j] = confusionMatrix[j][i];
                confusionMatrix[j][i] = d;
            }
        }
        for (i = 0; i < confusionMatrix.length; ++i) {
            for (j = 0; j < confusionMatrix.length; ++j) {
                System.err.println("P(" + (String)classIndex.get(j) + '|' + (String)classIndex.get(i) + ") = " + confusionMatrix[j][i]);
            }
        }
        this.trainSemiSup(dataset, biasedDataset, confusionMatrix);
    }

    private boolean normalize() {
        return this.flags.normalize;
    }

    public Counter<String> scoresOf(List<IN> lineInfos, int pos) {
        Datum<String, String> d = this.makeDatum(lineInfos, pos, this.featureFactory);
        return this.classifier.logProbabilityOf(d);
    }

    @Override
    public void printProbsDocument(List<IN> document) {
    }

    public static void main(String[] args) throws Exception {
        StringUtils.printErrInvocationString("CMMClassifier", args);
        Properties props = StringUtils.argsToProperties(args);
        CMMClassifier cmm = new CMMClassifier(props);
        String testFile = cmm.flags.testFile;
        String textFile = cmm.flags.textFile;
        String loadPath = cmm.flags.loadClassifier;
        String serializeTo = cmm.flags.serializeTo;
        if (loadPath != null) {
            cmm.loadClassifierNoExceptions(loadPath, props);
        } else if (cmm.flags.loadJarClassifier != null) {
            cmm.loadJarClassifier(cmm.flags.loadJarClassifier, props);
        } else if (cmm.flags.trainFile != null) {
            if (cmm.flags.biasedTrainFile != null) {
                cmm.trainSemiSup();
            } else {
                cmm.train();
            }
        } else {
            cmm.loadDefaultClassifier();
        }
        if (serializeTo != null) {
            cmm.serializeClassifier(serializeTo);
        }
        if (testFile != null) {
            cmm.classifyAndWriteAnswers(testFile);
        } else if (cmm.flags.testFiles != null) {
            cmm.classifyAndWriteAnswers(cmm.flags.baseTestDir, cmm.flags.testFiles);
        }
        if (textFile != null) {
            DocumentReaderAndWriter oldRW = cmm.readerAndWriter;
            cmm.readerAndWriter = new PlainTextDocumentReaderAndWriter();
            cmm.classifyAndWriteAnswers(textFile);
            cmm.readerAndWriter = oldRW;
        }
    }

    public Document<?, ?, WordTag> processDocument(Document in) {
        Document d = in.blankDocument();
        d.addAll(this.process((List)in));
        return d;
    }

    @Override
    public List<WordTag> process(List list) {
        ArrayList<CoreLabel> featureLabels = new ArrayList<CoreLabel>();
        for (Object o : list) {
            CoreLabel wi = new CoreLabel();
            if (o instanceof HasWord) {
                wi.setWord(((HasWord)o).word());
                if (o instanceof HasTag) {
                    wi.setTag(((HasTag)o).tag());
                }
            } else {
                wi.setWord(o.toString());
            }
            featureLabels.add(wi);
        }
        List tagged = this.classify(featureLabels);
        ArrayList<WordTag> out2 = new ArrayList<WordTag>();
        for (CoreLabel wi : tagged) {
            out2.add(new WordTag(wi.word(), (String)wi.get(CoreAnnotations.AnswerAnnotation.class)));
        }
        return out2;
    }

    public double weight(String feature, String label) {
        return ((LinearClassifier)this.classifier).weight(feature, label);
    }

    public double[][] weights() {
        return ((LinearClassifier)this.classifier).weights();
    }

    /*
     * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
     */
    static class Scorer
    implements SequenceModel {
        private CMMClassifier classifier = null;
        private int[] tagArray = null;
        private int[] backgroundTags = null;
        private Index<String> tagIndex = null;
        private List<? extends CoreLabel> lineInfos = null;
        private int pre = 0;
        private int post = 0;
        private Set<List<String>> legalTags = null;
        private static final boolean VERBOSE = false;
        private double[] scoreCache = null;
        private int[] lastWindow = null;
        private int percent = -1;
        private int num = 0;
        private long secs = System.currentTimeMillis();
        private long hit = 0L;
        private long tot = 0L;

        void buildTagArray() {
            int sz = this.tagIndex.size();
            this.tagArray = new int[sz];
            for (int i = 0; i < sz; ++i) {
                this.tagArray[i] = i;
            }
        }

        @Override
        public int length() {
            return this.lineInfos.size() - this.pre - this.post;
        }

        @Override
        public int leftWindow() {
            return this.pre;
        }

        @Override
        public int rightWindow() {
            return this.post;
        }

        @Override
        public int[] getPossibleValues(int position) {
            if (this.tagArray == null) {
                this.buildTagArray();
            }
            if (position < this.pre) {
                return this.backgroundTags;
            }
            return this.tagArray;
        }

        @Override
        public double scoreOf(int[] sequence) {
            throw new UnsupportedOperationException();
        }

        @Override
        public double scoreOf(int[] tags, int pos) {
            int i;
            if (this.lastWindow == null) {
                this.lastWindow = new int[this.leftWindow() + this.rightWindow() + 1];
                Arrays.fill(this.lastWindow, -1);
            }
            boolean match = pos == lastPos;
            for (i = pos - this.leftWindow(); i <= pos + this.rightWindow(); ++i) {
                if (i == pos || i < 0) continue;
                match &= tags[i] == this.lastWindow[i - pos + this.leftWindow()];
            }
            if (!match) {
                this.scoreCache = this.scoresOf(tags, pos);
                for (i = pos - this.leftWindow(); i <= pos + this.rightWindow(); ++i) {
                    if (i < 0) continue;
                    this.lastWindow[i - pos + this.leftWindow()] = tags[i];
                }
                lastPos = pos;
            }
            return this.scoreCache[tags[pos]];
        }

        @Override
        public double[] scoresOf(int[] tags, int pos) {
            String[] answers = new String[1 + this.leftWindow() + this.rightWindow()];
            String[] pre = new String[this.leftWindow()];
            for (int i = 0; i < 1 + this.leftWindow() + this.rightWindow(); ++i) {
                int absPos = pos - this.leftWindow() + i;
                if (absPos < 0) continue;
                answers[i] = this.tagIndex.get(tags[absPos]);
                CoreLabel li = this.lineInfos.get(absPos);
                li.set(CoreAnnotations.AnswerAnnotation.class, answers[i]);
                if (i >= this.leftWindow()) continue;
                pre[i] = answers[i];
            }
            double[] scores = new double[this.tagIndex.size()];
            if (!this.legalTags.contains(Arrays.asList(pre)) && this.classifier.flags.useObservedSequencesOnly) {
                Arrays.fill(scores, -1000.0);
                return scores;
            }
            ++this.num;
            ++this.hit;
            Counter<String> c = this.classifier.scoresOf(this.lineInfos, pos);
            for (String s : c.keySet()) {
                int t = this.tagIndex.indexOf(s);
                if (t <= -1) continue;
                int[] tA = this.getPossibleValues(pos);
                for (int j = 0; j < tA.length; ++j) {
                    if (tA[j] != t) continue;
                    scores[j] = c.getCount(s);
                }
            }
            if (this.classifier.normalize()) {
                ArrayMath.logNormalize(scores);
            }
            return scores;
        }

        static double[] recenter(double[] x) {
            double[] r = new double[x.length];
            double logTotal = ArrayMath.logSum(x);
            for (int i = 0; i < x.length; ++i) {
                r[i] = x[i] - logTotal;
            }
            return r;
        }

        Scorer(List<? extends CoreLabel> lineInfos, Index<String> tagIndex, CMMClassifier classifier, int pre, int post, Set<List<String>> legalTags) {
            this.pre = pre;
            this.post = post;
            this.lineInfos = lineInfos;
            this.tagIndex = tagIndex;
            this.classifier = classifier;
            this.legalTags = legalTags;
            this.backgroundTags = new int[]{tagIndex.indexOf(classifier.flags.backgroundSymbol)};
        }
    }
}

