/*
 * Decompiled with CFR 0.152.
 */
package org.snu.ids.ha.index;

import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import javax.swing.JLabel;
import javax.swing.JProgressBar;
import org.snu.ids.ha.constants.POSTag;
import org.snu.ids.ha.index.Keyword;
import org.snu.ids.ha.index.KeywordList;
import org.snu.ids.ha.index.WordDic;
import org.snu.ids.ha.ma.CharSetType;
import org.snu.ids.ha.ma.MCandidate;
import org.snu.ids.ha.ma.MExpression;
import org.snu.ids.ha.ma.Morpheme;
import org.snu.ids.ha.ma.MorphemeAnalyzer;
import org.snu.ids.ha.ma.Token;
import org.snu.ids.ha.ma.Tokenizer;
import org.snu.ids.ha.util.StringSet;
import org.snu.ids.ha.util.Util;
import org.tartarus.snowball.EnglishStemmer;

public class KeywordExtractor
extends MorphemeAnalyzer {
    static WordDic UOMDic = null;
    static WordDic ChemFormulaDic = null;
    static WordDic CompNounDic = null;
    static WordDic VerbNounDic = null;
    static WordDic JunkWordDic = null;
    static WordDic VerbJunkWordDic = null;
    static final int MAX_UOM_SIZE = 7;
    public static final StringSet MULTIPLYERS;
    public static final StringSet RANGE_INDICATOR;
    public static final String STD_UOM_CONNECTOR = "*";

    static {
        UOMDic = new WordDic("/dic/ecat/UOM.dic");
        ChemFormulaDic = new WordDic("/dic/ecat/ChemFormula.dic");
        CompNounDic = new WordDic("/dic/ecat/CompNoun.dic");
        VerbNounDic = new WordDic("/dic/ecat/VerbNoun.dic");
        JunkWordDic = new WordDic("/dic/ecat/JunkWord.dic");
        VerbJunkWordDic = new WordDic("/dic/ecat/VerbJunkWord.dic");
        MULTIPLYERS = new StringSet(new String[]{STD_UOM_CONNECTOR, "x", "X", "\u00d7", "\u2169"});
        RANGE_INDICATOR = new StringSet(new String[]{"-", "\u00b1", "~", "+"});
    }

    public KeywordList extractKeyword(JProgressBar progressBar, JLabel label, String string, boolean onlyNoun) {
        KeywordList ret = null;
        String line = null;
        int offset = 0;
        String[] strArr = string.split("\n");
        if (progressBar != null) {
            progressBar.setIndeterminate(false);
            progressBar.setMaximum(strArr.length);
            progressBar.setStringPainted(true);
            label.setText("0");
        }
        int lineNo = 0;
        int len = strArr.length;
        while (lineNo < len) {
            line = strArr[lineNo];
            if (Util.valid(line)) {
                KeywordList keywordList = this.extractKeyword(line, onlyNoun);
                if (offset > 0) {
                    int i = 0;
                    int size = keywordList.size();
                    while (i < size) {
                        Keyword keyword = (Keyword)keywordList.get(i);
                        keyword.setIndex(offset + keyword.getIndex());
                        ++i;
                    }
                }
                if (keywordList != null && keywordList.size() > 0) {
                    if (ret == null) {
                        ret = new KeywordList(keywordList);
                    } else {
                        ret.addAll(keywordList);
                    }
                }
            }
            if (progressBar != null) {
                progressBar.setValue(lineNo + 1);
                label.setText(String.valueOf(lineNo + 1));
            }
            offset += line.length() + 1;
            ++lineNo;
        }
        if (progressBar != null) {
            progressBar.setStringPainted(false);
        }
        return ret;
    }

    public KeywordList extractKeyword(String string, boolean onlyNoun) {
        ArrayList<Keyword> ret = new ArrayList<Keyword>();
        EnglishStemmer engStemmer = new EnglishStemmer();
        try {
            List<MExpression> meList = this.leaveJustBest(this.postProcess(this.analyze(string)));
            Morpheme mp = null;
            MCandidate mc = null;
            MExpression me = null;
            Keyword keyword = null;
            ArrayList<Morpheme> mpList = new ArrayList<Morpheme>();
            int i = 0;
            int size = meList == null ? 0 : meList.size();
            while (i < size) {
                me = meList.get(i);
                mc = (MCandidate)me.get(0);
                int jSize = mc.size();
                if (jSize == 1) {
                    mp = (Morpheme)mc.get(0);
                    mp.setString(me.getExp());
                    mpList.add(mp);
                } else {
                    int j = 0;
                    while (j < jSize) {
                        mpList.add((Morpheme)mc.get(j));
                        ++j;
                    }
                }
                ++i;
            }
            int endIdx = mpList.size() - 1;
            while (endIdx > 0) {
                int startIdx = Math.max(endIdx - 7, 0);
                while (startIdx < endIdx) {
                    String tempName = "";
                    int i2 = startIdx;
                    while (i2 <= endIdx) {
                        tempName = String.valueOf(tempName) + ((Morpheme)mpList.get(i2)).getString();
                        ++i2;
                    }
                    if (UOMDic.contains(tempName)) {
                        while (startIdx < endIdx) {
                            mpList.remove(startIdx + 1);
                            --endIdx;
                        }
                        mp = (Morpheme)mpList.get(startIdx);
                        mp.setString(tempName);
                        mp.setCharSet(CharSetType.COMBINED);
                        mp.setTag(POSTag.NNM);
                    } else if (ChemFormulaDic.contains(tempName)) {
                        while (startIdx < endIdx) {
                            mpList.remove(startIdx + 1);
                            --endIdx;
                        }
                        mp = (Morpheme)mpList.get(startIdx);
                        mp.setString(tempName);
                        mp.setCharSet(CharSetType.COMBINED);
                        mp.setTag(POSTag.UN);
                    } else if (CompNounDic.contains(tempName)) {
                        while (startIdx < endIdx) {
                            mpList.remove(startIdx + 1);
                            --endIdx;
                        }
                        if (!JunkWordDic.contains(tempName)) {
                            mp = (Morpheme)mpList.get(startIdx);
                            mp.setString(tempName);
                            mp.setCharSet(CharSetType.COMBINED);
                            mp.setTag(POSTag.NNG);
                            mp.setComposed(true);
                        }
                    }
                    ++startIdx;
                }
                --endIdx;
            }
            i = 0;
            size = mpList.size();
            while (i < size) {
                mp = (Morpheme)mpList.get(i);
                mp.setString(mp.getString().toLowerCase());
                if (!(onlyNoun && !mp.isTagOf(POSTag.N) || JunkWordDic.contains(mp.getString()))) {
                    if (mp.isTagOf(POSTag.UN) && mp.getCharSet() == CharSetType.ENGLISH) {
                        keyword = new Keyword(mp);
                        engStemmer.setCurrent(keyword.getString().toLowerCase());
                        engStemmer.stem();
                        keyword.setString(engStemmer.getCurrent());
                        ret.add(keyword);
                    } else if (mp.isTagOf(POSTag.V)) {
                        String temp = mp.getString();
                        int tempLen = temp.length();
                        char ch = temp.charAt(tempLen - 1);
                        if (tempLen > 2 && (ch == '\ud558' || ch == '\ub418') && VerbNounDic.contains(temp = temp.substring(0, tempLen - 1))) {
                            keyword = new Keyword(mp);
                            keyword.setString(temp);
                            keyword.setTag(POSTag.NNG);
                            ret.add(keyword);
                        } else {
                            keyword = new Keyword(mp);
                            ret.add(keyword);
                        }
                    } else {
                        mp.isTagOf(POSTag.NP);
                        keyword = new Keyword(mp);
                        ret.add(keyword);
                    }
                }
                ++i;
            }
            Morpheme mp0 = null;
            Morpheme mp1 = null;
            Morpheme mp2 = null;
            Morpheme mp3 = null;
            int i3 = 0;
            int size2 = mpList.size();
            int step = 0;
            while (i3 < size2) {
                mp0 = (Morpheme)mpList.get(i3);
                step = 0;
                if (i3 + 1 < size2 && mp0.isTagOf(POSTag.NN) && (mp1 = (Morpheme)mpList.get(i3 + 1)).isTagOf(POSTag.NN) && mp0.getIndex() + mp0.getString().length() == mp1.getIndex()) {
                    if (i3 + 2 < size2 && (mp2 = (Morpheme)mpList.get(i3 + 2)).isTagOf(POSTag.NN) && mp1.getIndex() + mp1.getString().length() == mp2.getIndex()) {
                        if (i3 + 3 < size2 && (mp3 = (Morpheme)mpList.get(i3 + 3)).isTagOf(POSTag.NN) && mp2.getIndex() + mp2.getString().length() == mp3.getIndex()) {
                            keyword = new Keyword(mp0);
                            keyword.setComposed(true);
                            keyword.setString(String.valueOf(mp0.getString()) + mp1.getString() + mp2.getString() + mp3.getString());
                            ret.add(keyword);
                            ++step;
                        } else {
                            keyword = new Keyword(mp0);
                            keyword.setComposed(true);
                            keyword.setString(String.valueOf(mp0.getString()) + mp1.getString() + mp2.getString());
                            ret.add(keyword);
                        }
                        ++step;
                    } else {
                        keyword = new Keyword(mp0);
                        keyword.setComposed(true);
                        keyword.setString(String.valueOf(mp0.getString()) + mp1.getString());
                        ret.add(keyword);
                    }
                    ++step;
                }
                i3 += step;
                ++i3;
            }
            i3 = 0;
            while (i3 < ret.size()) {
                keyword = (Keyword)ret.get(i3);
                if (keyword.isTagOf(POSTag.XP | POSTag.XS | POSTag.VX) || JunkWordDic.contains(mp.getString())) {
                    ret.remove(i3);
                    --i3;
                }
                ++i3;
            }
            ArrayList<Keyword> cnKeywordList = new ArrayList<Keyword>();
            String[] cnKeywords = null;
            int i4 = 0;
            int size3 = ret.size();
            while (i4 < size3) {
                Keyword k = (Keyword)ret.get(i4);
                if (k.isComposed() && (cnKeywords = this.dic.getCompNoun(k.getString())) != null) {
                    int addIdx = 0;
                    int j = 0;
                    int len = cnKeywords.length;
                    while (j < len) {
                        if (!JunkWordDic.contains(cnKeywords[j])) {
                            Keyword newKeyword = new Keyword(k);
                            newKeyword.setVocTag("E");
                            newKeyword.setString(cnKeywords[j]);
                            newKeyword.setComposed(false);
                            newKeyword.setIndex(k.getIndex() + addIdx);
                            addIdx += newKeyword.getString().length();
                            cnKeywordList.add(newKeyword);
                        }
                        ++j;
                    }
                }
                ++i4;
            }
            ret.addAll(cnKeywordList);
            Collections.sort(ret, new Comparator<Keyword>(){

                @Override
                public int compare(Keyword o1, Keyword o2) {
                    if (o1.getIndex() == o2.getIndex()) {
                        return o1.getString().length() - o2.getString().length();
                    }
                    return o1.getIndex() - o2.getIndex();
                }
            });
        }
        catch (Exception e) {
            System.err.println(string);
            e.printStackTrace();
        }
        return new KeywordList((List<Keyword>)ret);
    }

    public KeywordList removeJunkWord(KeywordList keywordList) {
        int i = 0;
        int size = keywordList == null ? 0 : keywordList.size();
        while (i < size) {
            ++i;
        }
        return keywordList;
    }

    public Keyword getCompositeNoun(MCandidate mc) {
        Keyword ret = null;
        if (mc == null || mc.size() < 2) {
            return null;
        }
        int nnCnt = 0;
        int i = 0;
        while (i < mc.size()) {
            Morpheme mp = (Morpheme)mc.get(i);
            if (mp.isTagOf(POSTag.NN)) {
                if (ret == null) {
                    ret = new Keyword(mp);
                    ret.setComposed(true);
                    ++nnCnt;
                } else {
                    if (nnCnt == 0) {
                        return null;
                    }
                    ret.setString(String.valueOf(ret.getString()) + mp.getString());
                    ++nnCnt;
                }
            } else {
                if (ret != null && nnCnt > 1) {
                    return ret;
                }
                nnCnt = 0;
            }
            ++i;
        }
        if (nnCnt == 0) {
            return null;
        }
        return ret;
    }

    public static String getFormatedUOMValues(String inputString) {
        String resultString = "";
        List<Token> list = Tokenizer.tokenize(inputString);
        Token token = null;
        int i = 0;
        while (i < list.size()) {
            token = list.get(i);
            if (token.isCharSetOf(CharSetType.NUMBER)) {
                resultString = String.valueOf(resultString) + token.getString();
            } else if (KeywordExtractor.isUOMConnector(token.getString())) {
                resultString = String.valueOf(resultString) + STD_UOM_CONNECTOR;
            } else if (!token.getString().equals(" ") && !token.getString().equals("\t")) {
                resultString = String.valueOf(resultString) + token.getString();
            }
            ++i;
        }
        return resultString;
    }

    private static boolean isUOMConnector(String uomCon) {
        return MULTIPLYERS.contains(uomCon);
    }

    private static boolean isUOMConnector2(String uomCon) {
        return MULTIPLYERS.contains(uomCon) || RANGE_INDICATOR.contains(uomCon);
    }

    public static void main(String[] args) {
        String string = "\ubb38\uc11c \uc5d4\ud130\ud2f0\uc758 \uac1c\ub150\uc774 \uba85\ud655\ud558\uc9c0 \ubabb\ud558\ub2e4. \uc989, \ubb38\uc11c \uc5d4\ud130\ud2f0\uc5d0 \uc800\uc7a5\ub418\ub294 \ub2e8\uc704\uac1c\uccb4\uc778 \ubb38\uc11c\uac00 \ub2e4\ub978 \ubd80\uc11c\ub85c \ubc1c\uc2e0\uc744 \ud558\uba74 \ub2e4\ub978 \ubb38\uc11c\uac00 \ub418\ub294 \uac83\uc778\uc9c0 \uc218\uc815\uc744 \ud560 \ub54c\ub294 \ubb38\uc11c\uac00 \uc0c8\ub85c \uc0dd\uc131\ub418\uc9c0 \uc54a\ub294 \uac83\uc778\uc9c0, \ud639\uc740 \uacb0\uc7ac\uc120\uc73c\ub85c \ubc1c\uc2e0\ud558\uba74 \ubb38\uc11c\uac00 \uadf8\ub300\ub85c \uc788\ub294 \uac83\uc778\uc9c0 \ub4f1\uc5d0 \ub300\ud55c \uba85\ud655\ud55c \uc815\uc758\uac00 \uc5c6\ub2e4. \uac1c\ubc1c \ub2f4\ub2f9\uc790 \ub9c8\uc800\ub3c4 \uc774\ub7ec\ud55c \uac1c\ub150\uc744 \uba85\ud655\ud788 \uc124\uba85\ud558\uc9c0 \ubabb\ud558\uace0 \uc788\ub2e4.";
        string = String.valueOf(string) + "\n\uc0ac\uc6a9\ub178\uc990 : Variojet 045\n\uc791\ub3d9\uc555\ub825 : 10\u223c135 bar\n\ucd5c\ub300\uc555\ub825 : 150 bar\n\ubb3c\ud1a0\ucd9c\ub7c9 : 1400 rpm 11 L/min\n\ubb3c\ud761\uc785\ud5c8\uc6a9\ucd5c\uace0\uc628\ub3c4 : 70 \u2103\n\ucd5c\ub300\ubb3c\ud761\uc785\ub192\uc774 : 2.5 m\n\uc18c\ube44\uc804\ub825(\uc2dc\uc791) : 3.1 kW\n\uc18c\ube44\uc804\ub825(\uc815\uc0c1\uc791\ub3d9) : 2.3 kW\n\ud06c\uae30 : 350\u00d7330\u00d7900 mm\n\ubb34\uac8c : 32 kg\n\uc138\uc81c\ud761\uc785\uac00\ub2a5 HClO4 ClO4 KClO4 CH3OC6H4OH H2(SO4)2";
        KeywordExtractor ke = new KeywordExtractor();
        Keyword keyword = null;
        KeywordList ret = ke.extractKeyword(string, false);
        int size = ret == null ? 0 : ret.size();
        int i = 0;
        while (i < size) {
            keyword = (Keyword)ret.get(i);
            System.out.println(String.valueOf(i) + "\t" + keyword);
            ++i;
        }
    }
}

