/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.international.arabic.pipeline;

import edu.stanford.nlp.international.arabic.Buckwalter;
import edu.stanford.nlp.international.arabic.pipeline.DefaultLexicalMapper;
import edu.stanford.nlp.international.arabic.pipeline.LDCPosMapper;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.process.treebank.AbstractDataset;
import edu.stanford.nlp.process.treebank.Dataset;
import edu.stanford.nlp.trees.BobChrisTreeNormalizer;
import edu.stanford.nlp.trees.DiskTreebank;
import edu.stanford.nlp.trees.LabeledScoredTreeFactory;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.TreeFactory;
import edu.stanford.nlp.trees.TreeVisitor;
import edu.stanford.nlp.trees.TreebankLanguagePack;
import edu.stanford.nlp.trees.international.arabic.ATBTreeUtils;
import edu.stanford.nlp.trees.international.arabic.ArabicTreeNormalizer;
import edu.stanford.nlp.trees.international.arabic.ArabicTreeReaderFactory;
import edu.stanford.nlp.trees.international.arabic.ArabicTreebankLanguagePack;
import edu.stanford.nlp.util.Filter;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.io.UnsupportedEncodingException;
import java.util.Collections;
import java.util.Properties;

public class ATBArabicDataset
extends AbstractDataset {
    public ATBArabicDataset() {
        this.treebank = new DiskTreebank(new ArabicTreeReaderFactory.ArabicRawTreeReaderFactory(true), "UTF-8");
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    @Override
    public void build() {
        for (File path : this.pathsToData) {
            if (this.splitFilter == null) {
                this.treebank.loadPath(path, this.treeFileExtension, false);
                continue;
            }
            this.treebank.loadPath(path, this.splitFilter);
        }
        PrintWriter outfile = null;
        PrintWriter flatFile = null;
        try {
            outfile = new PrintWriter(new BufferedWriter(new OutputStreamWriter((OutputStream)new FileOutputStream(this.outFileName), "UTF-8")));
            flatFile = this.makeFlatFile ? new PrintWriter(new BufferedWriter(new OutputStreamWriter((OutputStream)new FileOutputStream(this.flatFileName), "UTF-8"))) : null;
            this.treebank.apply(new ArabicRawTreeNormalizer(outfile, flatFile));
            this.outputFileList.add(this.outFileName);
            if (this.makeFlatFile) {
                this.outputFileList.add(this.flatFileName);
                this.toStringBuffer.append(" Made flat files\n");
            }
        }
        catch (UnsupportedEncodingException e) {
            System.err.printf("%s: Filesystem does not support UTF-8 output\n", this.getClass().getName());
            e.printStackTrace();
        }
        catch (FileNotFoundException e) {
            System.err.printf("%s: Could not open %s for writing\n", this.getClass().getName(), this.outFileName);
        }
        finally {
            if (outfile != null) {
                outfile.close();
            }
            if (flatFile != null) {
                flatFile.close();
            }
        }
    }

    @Override
    public boolean setOptions(Properties opts) {
        boolean ret = super.setOptions(opts);
        if (this.lexMapper == null) {
            this.lexMapper = new DefaultLexicalMapper();
            this.lexMapper.setup(null, this.lexMapOptions.split(","));
        }
        if (this.pathsToMappings.size() != 0) {
            if (this.posMapper == null) {
                this.posMapper = new LDCPosMapper(this.addDeterminer);
            }
            String[] mapOpts = this.posMapOptions.split(",");
            for (File path : this.pathsToMappings) {
                this.posMapper.setup(path, mapOpts);
            }
        }
        return ret;
    }

    protected class ArabicRawTreeNormalizer
    implements TreeVisitor {
        protected final Buckwalter encodingMap;
        protected final PrintWriter outfile;
        protected final PrintWriter flatFile;
        protected final Filter<Tree> nullFilter;
        protected final Filter<Tree> aOverAFilter;
        protected final TreeFactory tf;
        protected final TreebankLanguagePack tlp;

        public ArabicRawTreeNormalizer(PrintWriter outFile, PrintWriter flatFile) {
            this.encodingMap = ATBArabicDataset.this.encoding == Dataset.Encoding.UTF8 ? new Buckwalter() : new Buckwalter(true);
            this.outfile = outFile;
            this.flatFile = flatFile;
            this.nullFilter = new ArabicTreeNormalizer.ArabicEmptyFilter();
            this.aOverAFilter = new BobChrisTreeNormalizer.AOverAFilter();
            this.tf = new LabeledScoredTreeFactory();
            this.tlp = new ArabicTreebankLanguagePack();
        }

        protected void processPreterminal(Tree node) {
            String rawTag = node.value();
            String posTag = ATBArabicDataset.this.posMapper == null ? rawTag : ATBArabicDataset.this.posMapper.map(rawTag, node.firstChild().value());
            String rawWord = node.firstChild().value();
            if (rawWord.equals("F")) {
                posTag = posTag.equals("NOUN.VN") ? "CONJ" : "CC";
                rawWord = "f";
            }
            if (rawWord.startsWith("MERGE_with_previous_token:")) {
                rawWord = rawWord.replace("MERGE_with_previous_token:", "");
            }
            if (rawWord.contains("e")) {
                rawWord = rawWord.replace("e", "");
            }
            String finalWord = ATBArabicDataset.this.lexMapper.map(rawTag, rawWord);
            if (ATBArabicDataset.this.lexMapper.canChangeEncoding(rawTag, finalWord)) {
                finalWord = this.encodingMap.apply(finalWord);
            }
            node.setValue(posTag);
            if (ATBArabicDataset.this.morphDelim == null) {
                node.firstChild().setValue(finalWord);
                if (node.firstChild().label() instanceof CoreLabel) {
                    ((CoreLabel)node.firstChild().label()).setWord(finalWord);
                }
            } else {
                node.firstChild().setValue(finalWord + ATBArabicDataset.this.morphDelim + rawTag);
            }
        }

        public Tree arabicAoverAFilter(Tree t) {
            if (t == null || t.isLeaf() || t.isPreTerminal()) {
                return t;
            }
            if (t.numChildren() == 1) {
                Tree fc = t.firstChild();
                if (t.label() != null && fc.label() != null && t.value().equals(fc.value())) {
                    t.setChildren(fc.children());
                }
            }
            for (Tree kid : t.getChildrenAsList()) {
                this.arabicAoverAFilter(kid);
            }
            return t;
        }

        @Override
        public void visitTree(Tree t) {
            if (t == null || t.value().equals("X")) {
                return;
            }
            if (t.yield().size() > ATBArabicDataset.this.maxLen) {
                return;
            }
            t = t.prune(this.nullFilter, this.tf);
            t = this.arabicAoverAFilter(t);
            if (ATBArabicDataset.this.customTreeVisitor != null) {
                ATBArabicDataset.this.customTreeVisitor.visitTree(t);
            }
            for (Tree node : t) {
                if (node.isPreTerminal()) {
                    this.processPreterminal(node);
                }
                if (!ATBArabicDataset.this.removeDashTags || node.isLeaf()) continue;
                node.setValue(this.tlp.basicCategory(node.value()));
            }
            if (ATBArabicDataset.this.addRoot && t.value() != null && !t.value().equals("ROOT")) {
                t = this.tf.newTreeNode("ROOT", Collections.singletonList(t));
            }
            this.outfile.println(t.toString());
            if (this.flatFile != null) {
                String flatString = ATBArabicDataset.this.removeEscapeTokens ? ATBTreeUtils.unEscape(ATBTreeUtils.flattenTree(t)) : ATBTreeUtils.flattenTree(t);
                this.flatFile.println(flatString);
            }
        }
    }
}

