package com.googlecode.clearnlp.component.morph;

import com.googlecode.clearnlp.component.AbstractComponent;
import com.googlecode.clearnlp.constituent.CTLibEn;
import com.googlecode.clearnlp.dependency.DEPNode;
import com.googlecode.clearnlp.dependency.DEPTree;
import com.googlecode.clearnlp.morphology.MPLib;
import com.googlecode.clearnlp.morphology.MPLibEn;
import com.googlecode.clearnlp.util.pair.Pair;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.StringTokenizer;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;

/* loaded from: input_file:com/googlecode/clearnlp/component/morph/CEnglishMPAnalyzer.class */
public class CEnglishMPAnalyzer extends AbstractComponent {
    public final String FIELD_DELIM = "_";
    final String LANG_DIR = "en/";
    final String NOUN_EXC = "en/noun.exc";
    final String VERB_EXC = "en/verb.exc";
    final String ADJ_EXC = "en/adj.exc";
    final String ADV_EXC = "en/adv.exc";
    final String NOUN_BASE = "en/noun.txt";
    final String VERB_BASE = "en/verb.txt";
    final String ADJ_BASE = "en/adj.txt";
    final String ADV_BASE = "en/adv.txt";
    final String ORD_BASE = "en/ordinal.txt";
    final String CRD_BASE = "en/cardinal.txt";
    final String NOUN_RULE = "en/noun.rule";
    final String VERB_RULE = "en/verb.rule";
    final String ADJ_RULE = "en/adj.rule";
    final String ABBR_RULE = "en/abbr.rule";
    final String POS_NOUN = "N";
    final String POS_VERB = "V";
    final String POS_ADJ = "J";
    final String POS_ADV = "R";
    Map<String, String> m_noun_exc;
    Map<String, String> m_verb_exc;
    Map<String, String> m_adj_exc;
    Map<String, String> m_adv_exc;
    Set<String> s_noun_base;
    Set<String> s_verb_base;
    Set<String> s_adj_base;
    Set<String> s_adv_base;
    Set<String> s_ord_base;
    Set<String> s_crd_base;
    List<Pair<String, String>> a_noun_rule;
    List<Pair<String, String>> a_verb_rule;
    List<Pair<String, String>> a_adj_rule;
    Map<String, String> m_abbr_rule;

    public CEnglishMPAnalyzer(ZipInputStream zipInputStream) throws IOException {
        try {
            init(zipInputStream);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    public void init(ZipInputStream zipInputStream) throws IOException {
        while (true) {
            ZipEntry nextEntry = zipInputStream.getNextEntry();
            if (nextEntry == null) {
                zipInputStream.close();
                return;
            }
            String name = nextEntry.getName();
            if (name.equals("en/noun.exc")) {
                this.m_noun_exc = getExcecptionMap(zipInputStream);
            } else if (name.equals("en/verb.exc")) {
                this.m_verb_exc = getExcecptionMap(zipInputStream);
            } else if (name.equals("en/adj.exc")) {
                this.m_adj_exc = getExcecptionMap(zipInputStream);
            } else if (name.equals("en/adv.exc")) {
                this.m_adv_exc = getExcecptionMap(zipInputStream);
            } else if (name.equals("en/noun.txt")) {
                this.s_noun_base = getBaseSet(zipInputStream);
            } else if (name.equals("en/verb.txt")) {
                this.s_verb_base = getBaseSet(zipInputStream);
            } else if (name.equals("en/adj.txt")) {
                this.s_adj_base = getBaseSet(zipInputStream);
            } else if (name.equals("en/adv.txt")) {
                this.s_adv_base = getBaseSet(zipInputStream);
            } else if (name.equals("en/ordinal.txt")) {
                this.s_ord_base = getBaseSet(zipInputStream);
            } else if (name.equals("en/cardinal.txt")) {
                this.s_crd_base = getBaseSet(zipInputStream);
            } else if (name.equals("en/noun.rule")) {
                this.a_noun_rule = getRuleList(zipInputStream);
            } else if (name.equals("en/verb.rule")) {
                this.a_verb_rule = getRuleList(zipInputStream);
            } else if (name.equals("en/adj.rule")) {
                this.a_adj_rule = getRuleList(zipInputStream);
            } else if (name.equals("en/abbr.rule")) {
                this.m_abbr_rule = getAbbreviationMap(zipInputStream);
            }
        }
    }

    private HashMap<String, String> getExcecptionMap(ZipInputStream zipInputStream) throws IOException {
        HashMap<String, String> hashMap = new HashMap<>();
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(zipInputStream));
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                return hashMap;
            }
            StringTokenizer stringTokenizer = new StringTokenizer(readLine);
            String nextToken = stringTokenizer.hasMoreTokens() ? stringTokenizer.nextToken() : null;
            String nextToken2 = stringTokenizer.hasMoreTokens() ? stringTokenizer.nextToken() : null;
            if (nextToken != null && nextToken2 != null) {
                hashMap.put(nextToken, nextToken2);
                while (stringTokenizer.hasMoreTokens()) {
                    hashMap.put(stringTokenizer.nextToken(), nextToken2);
                }
            }
        }
    }

    private HashSet<String> getBaseSet(ZipInputStream zipInputStream) throws IOException {
        HashSet<String> hashSet = new HashSet<>();
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(zipInputStream));
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                return hashSet;
            }
            hashSet.add(readLine.trim());
        }
    }

    private ArrayList<Pair<String, String>> getRuleList(ZipInputStream zipInputStream) throws IOException {
        ArrayList<Pair<String, String>> arrayList = new ArrayList<>();
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(zipInputStream));
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                return arrayList;
            }
            StringTokenizer stringTokenizer = new StringTokenizer(readLine);
            arrayList.add(new Pair<>(stringTokenizer.nextToken(), stringTokenizer.hasMoreTokens() ? stringTokenizer.nextToken() : ""));
        }
    }

    private HashMap<String, String> getAbbreviationMap(ZipInputStream zipInputStream) throws IOException {
        HashMap<String, String> hashMap = new HashMap<>();
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(zipInputStream));
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                return hashMap;
            }
            StringTokenizer stringTokenizer = new StringTokenizer(readLine);
            hashMap.put(stringTokenizer.nextToken() + "_" + stringTokenizer.nextToken(), stringTokenizer.nextToken());
        }
    }

    @Override // com.googlecode.clearnlp.component.AbstractComponent
    public void process(DEPTree dEPTree) {
        int size = dEPTree.size();
        for (int i = 1; i < size; i++) {
            DEPNode dEPNode = dEPTree.get(i);
            dEPNode.lemma = getLemma(dEPNode.form, dEPNode.pos);
        }
    }

    public String getLemma(String str, String str2) {
        return getLemmaAux(MPLib.normalizeBasic(str).toLowerCase(), str2);
    }

    public Set<String> getPOSTags(String str) {
        HashSet hashSet = new HashSet();
        String lowerCase = str.toLowerCase();
        if (this.m_noun_exc.containsKey(lowerCase) || getBaseAux(lowerCase, this.s_noun_base, this.a_noun_rule) != null) {
            hashSet.add("N");
        }
        if (this.m_verb_exc.containsKey(lowerCase) || getBaseAux(lowerCase, this.s_verb_base, this.a_verb_rule) != null) {
            hashSet.add("V");
        }
        if (this.m_adj_exc.containsKey(lowerCase) || getBaseAux(lowerCase, this.s_adj_base, this.a_adj_rule) != null) {
            hashSet.add("J");
        }
        if (this.m_adv_exc.containsKey(lowerCase)) {
            hashSet.add("R");
        }
        return hashSet;
    }

    private String getLemmaAux(String str, String str2) {
        String number = getNumber(str, str2);
        if (number != null) {
            return number;
        }
        String exception = getException(str, str2);
        if (exception != null) {
            return exception;
        }
        String base = getBase(str, str2);
        if (base != null) {
            return base;
        }
        String abbreviation = getAbbreviation(str, str2);
        return abbreviation != null ? abbreviation : str;
    }

    private String getNumber(String str, String str2) {
        if (!str2.equals(CTLibEn.POS_CD)) {
            return null;
        }
        if (this.s_crd_base.contains(str)) {
            return "#crd#";
        }
        if (str.equals("0st") || str.equals("0nd") || str.equals("0rd") || str.equals("0th") || this.s_ord_base.contains(str)) {
            return "#ord#";
        }
        return null;
    }

    private String getException(String str, String str2) {
        if (MPLibEn.isNoun(str2)) {
            return this.m_noun_exc.get(str);
        }
        if (MPLibEn.isVerb(str2)) {
            return this.m_verb_exc.get(str);
        }
        if (MPLibEn.isAdjective(str2)) {
            return this.m_adj_exc.get(str);
        }
        if (MPLibEn.isAdverb(str2)) {
            return this.m_adv_exc.get(str);
        }
        return null;
    }

    private String getBase(String str, String str2) {
        if (MPLibEn.isNoun(str2)) {
            return getBaseAux(str, this.s_noun_base, this.a_noun_rule);
        }
        if (MPLibEn.isVerb(str2)) {
            return getBaseAux(str, this.s_verb_base, this.a_verb_rule);
        }
        if (MPLibEn.isAdjective(str2)) {
            return getBaseAux(str, this.s_adj_base, this.a_adj_rule);
        }
        return null;
    }

    private String getBaseAux(String str, Set<String> set, List<Pair<String, String>> list) {
        for (Pair<String, String> pair : list) {
            if (str.endsWith(pair.o1)) {
                String str2 = str.substring(0, str.length() - pair.o1.length()) + pair.o2;
                if (set.contains(str2)) {
                    return str2;
                }
            }
        }
        return null;
    }

    private String getAbbreviation(String str, String str2) {
        return this.m_abbr_rule.get(str + "_" + str2);
    }
}
