package com.googlecode.clearnlp.pos;

import com.carrotsearch.hppc.cursors.ObjectCursor;
import com.googlecode.clearnlp.classification.model.StringModel;
import com.googlecode.clearnlp.classification.prediction.StringPrediction;
import com.googlecode.clearnlp.classification.train.StringTrainSpace;
import com.googlecode.clearnlp.classification.vector.StringFeatureVector;
import com.googlecode.clearnlp.constituent.CTLibEn;
import com.googlecode.clearnlp.engine.AbstractEngine;
import com.googlecode.clearnlp.engine.EngineProcess;
import com.googlecode.clearnlp.feature.xml.FtrToken;
import com.googlecode.clearnlp.feature.xml.POSFtrXml;
import com.googlecode.clearnlp.propbank.PBLib;
import com.googlecode.clearnlp.reader.AbstractReader;
import com.googlecode.clearnlp.util.UTInput;
import com.googlecode.clearnlp.util.UTOutput;
import com.googlecode.clearnlp.util.UTString;
import com.googlecode.clearnlp.util.map.Prob1DMap;
import com.googlecode.clearnlp.util.map.Prob2DMap;
import com.googlecode.clearnlp.util.pair.StringDoublePair;
import java.io.BufferedReader;
import java.io.PrintStream;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;

/* loaded from: input_file:com/googlecode/clearnlp/pos/POSTagger.class */
public class POSTagger extends AbstractEngine {
    private Set<String> s_lemmas;
    private Prob2DMap p_ambi;
    private Map<String, String> m_ambi;
    private Prob1DMap p_forms;
    private Set<String> s_forms;
    private POSFtrXml f_xml;
    private StringTrainSpace s_space;
    private StringModel s_model;
    private POSNode[] p_nodes;
    private String[] g_labels;
    private int i_source;
    private double d_denominator;

    public POSTagger(Set<String> set) {
        super((byte) 0);
        this.s_lemmas = set;
        this.p_ambi = new Prob2DMap();
        this.p_forms = new Prob1DMap();
    }

    public POSTagger(POSFtrXml pOSFtrXml, Set<String> set, Set<String> set2, Map<String, String> map, StringTrainSpace stringTrainSpace) {
        super((byte) 1);
        this.f_xml = pOSFtrXml;
        this.s_lemmas = set;
        this.s_forms = set2;
        this.m_ambi = map;
        this.s_space = stringTrainSpace;
    }

    public POSTagger(POSFtrXml pOSFtrXml, Set<String> set, Set<String> set2, Map<String, String> map, StringModel stringModel) {
        super((byte) 2);
        this.f_xml = pOSFtrXml;
        this.s_lemmas = set;
        this.s_forms = set2;
        this.m_ambi = map;
        this.s_model = stringModel;
        this.d_denominator = Math.sqrt(set2.size());
    }

    public POSTagger(POSFtrXml pOSFtrXml, BufferedReader bufferedReader) {
        super((byte) 2);
        this.f_xml = pOSFtrXml;
        loadModel(bufferedReader);
    }

    public void loadModel(BufferedReader bufferedReader) {
        try {
            this.s_lemmas = UTInput.getStringSet(bufferedReader);
            this.s_forms = UTInput.getStringSet(bufferedReader);
            this.m_ambi = UTInput.getStringMap(bufferedReader, " ");
        } catch (Exception e) {
            e.printStackTrace();
        }
        this.s_model = new StringModel(bufferedReader);
        this.d_denominator = Math.sqrt(this.s_forms.size());
    }

    @Override // com.googlecode.clearnlp.engine.AbstractEngine
    public void saveModel(PrintStream printStream) {
        UTOutput.printSet(printStream, this.s_lemmas);
        UTOutput.printSet(printStream, this.s_forms);
        UTOutput.printMap(printStream, this.m_ambi, " ");
        this.s_model.save(printStream);
    }

    public void init(POSNode[] pOSNodeArr) {
        this.p_nodes = pOSNodeArr;
        if (this.i_flag == 1) {
            this.g_labels = POSLib.getLabels(pOSNodeArr);
        }
        for (POSNode pOSNode : pOSNodeArr) {
            pOSNode.pos = AbstractReader.DUMMY_TAG;
        }
    }

    public void tag(POSNode[] pOSNodeArr) {
        EngineProcess.normalizeForms(pOSNodeArr);
        if (this.i_flag == 0) {
            addLexica(pOSNodeArr);
            return;
        }
        init(pOSNodeArr);
        int length = pOSNodeArr.length;
        this.i_source = 0;
        while (this.i_source < length) {
            StringFeatureVector featureVector = getFeatureVector(this.f_xml);
            switch (this.i_flag) {
                case 1:
                    train(featureVector);
                    break;
                case 2:
                    predict(featureVector);
                    break;
            }
            this.i_source++;
        }
    }

    private void train(StringFeatureVector stringFeatureVector) {
        String str = this.g_labels[this.i_source];
        this.p_nodes[this.i_source].pos = str;
        if (stringFeatureVector.size() > 0) {
            this.s_space.addInstance(str, stringFeatureVector);
        }
    }

    private void predict(StringFeatureVector stringFeatureVector) {
        StringPrediction predictBest = this.s_model.predictBest(stringFeatureVector);
        this.p_nodes[this.i_source].pos = predictBest.label;
    }

    private void addLexica(POSNode[] pOSNodeArr) {
        for (POSNode pOSNode : pOSNodeArr) {
            if (this.s_lemmas.contains(pOSNode.lemma)) {
                this.p_forms.add(pOSNode.simplifiedForm);
                this.p_ambi.add(pOSNode.simplifiedForm, pOSNode.pos);
            }
        }
    }

    public Map<String, String> getAmbiguityMap(double d) {
        HashMap hashMap = new HashMap();
        for (String str : this.p_ambi.keySet()) {
            StringDoublePair[] prob1D = this.p_ambi.getProb1D(str);
            StringBuilder sb = new StringBuilder();
            Arrays.sort(prob1D);
            for (StringDoublePair stringDoublePair : prob1D) {
                if (stringDoublePair.d <= d) {
                    break;
                }
                sb.append("_");
                sb.append(stringDoublePair.s);
            }
            if (sb.length() > 0) {
                hashMap.put(str, sb.substring(1));
            }
        }
        return hashMap;
    }

    public Set<String> getFormSet(int i) {
        HashSet hashSet = new HashSet();
        Iterator it = this.p_forms.keys().iterator();
        while (it.hasNext()) {
            String str = (String) ((ObjectCursor) it.next()).value;
            if (this.p_forms.get(str) > i) {
                hashSet.add(str);
            }
        }
        return hashSet;
    }

    public void clearFormSet() {
        this.s_forms.clear();
    }

    public double getCosineSimilarity(POSNode[] pOSNodeArr) {
        HashSet hashSet = new HashSet();
        for (POSNode pOSNode : pOSNodeArr) {
            hashSet.add(pOSNode.simplifiedForm);
        }
        double sqrt = Math.sqrt(hashSet.size()) * this.d_denominator;
        hashSet.retainAll(this.s_forms);
        return hashSet.size() / sqrt;
    }

    @Override // com.googlecode.clearnlp.engine.AbstractEngine
    protected String getField(FtrToken ftrToken) {
        POSNode node = getNode(ftrToken);
        if (node == null) {
            return null;
        }
        if (ftrToken.isField("f")) {
            if (this.s_lemmas.contains(node.lemma)) {
                return node.simplifiedForm;
            }
            return null;
        }
        if (ftrToken.isField("m")) {
            if (this.s_lemmas.contains(node.lemma)) {
                return node.lemma;
            }
            return null;
        }
        if (ftrToken.isField("p")) {
            if (node.isPos(AbstractReader.DUMMY_TAG)) {
                return null;
            }
            return node.pos;
        }
        if (ftrToken.isField("a")) {
            return this.m_ambi.get(node.simplifiedForm);
        }
        Matcher matcher = POSFtrXml.P_BOOLEAN.matcher(ftrToken.field);
        if (!matcher.find()) {
            return null;
        }
        switch (Integer.parseInt(matcher.group(1))) {
            case 0:
                if (UTString.isAllUpperCase(node.simplifiedForm)) {
                    return ftrToken.field;
                }
                return null;
            case 1:
                if (UTString.isAllLowerCase(node.simplifiedForm)) {
                    return ftrToken.field;
                }
                return null;
            case 2:
                if (UTString.beginsWithUpperCase(node.simplifiedForm)) {
                    return ftrToken.field;
                }
                return null;
            case 3:
                if (UTString.getNumOfCapitalsNotAtBeginning(node.simplifiedForm) == 1) {
                    return ftrToken.field;
                }
                return null;
            case AbstractEngine.FLAG_DEMO /* 4 */:
                if (UTString.getNumOfCapitalsNotAtBeginning(node.simplifiedForm) > 1) {
                    return ftrToken.field;
                }
                return null;
            case 5:
                if (node.simplifiedForm.contains(CTLibEn.POS_PERIOD)) {
                    return ftrToken.field;
                }
                return null;
            case 6:
                if (UTString.containsDigit(node.simplifiedForm)) {
                    return ftrToken.field;
                }
                return null;
            case 7:
                if (node.simplifiedForm.contains(PBLib.DELIM_LABEL)) {
                    return ftrToken.field;
                }
                return null;
            default:
                return null;
        }
    }

    @Override // com.googlecode.clearnlp.engine.AbstractEngine
    protected String[] getFields(FtrToken ftrToken) {
        POSNode node = getNode(ftrToken);
        if (node == null) {
            return null;
        }
        Matcher matcher = POSFtrXml.P_PREFIX.matcher(ftrToken.field);
        if (matcher.find()) {
            String[] prefixes = UTString.getPrefixes(node.lemma, Integer.parseInt(matcher.group(1)));
            if (prefixes.length == 0) {
                return null;
            }
            return prefixes;
        }
        Matcher matcher2 = POSFtrXml.P_SUFFIX.matcher(ftrToken.field);
        if (!matcher2.find()) {
            return null;
        }
        String[] suffixes = UTString.getSuffixes(node.lemma, Integer.parseInt(matcher2.group(1)));
        if (suffixes.length == 0) {
            return null;
        }
        return suffixes;
    }

    private POSNode getNode(FtrToken ftrToken) {
        int i = this.i_source + ftrToken.offset;
        if (0 > i || i >= this.p_nodes.length) {
            return null;
        }
        return this.p_nodes[i];
    }
}
