package com.googlecode.clearnlp.component.pos;

import com.googlecode.clearnlp.classification.algorithm.AbstractAlgorithm;
import com.googlecode.clearnlp.classification.model.StringModel;
import com.googlecode.clearnlp.classification.prediction.StringPrediction;
import com.googlecode.clearnlp.classification.train.StringTrainSpace;
import com.googlecode.clearnlp.classification.vector.StringFeatureVector;
import com.googlecode.clearnlp.component.AbstractStatisticalComponent;
import com.googlecode.clearnlp.constituent.CTLibEn;
import com.googlecode.clearnlp.dependency.DEPNode;
import com.googlecode.clearnlp.dependency.DEPTree;
import com.googlecode.clearnlp.engine.AbstractEngine;
import com.googlecode.clearnlp.engine.EngineProcess;
import com.googlecode.clearnlp.feature.xml.FtrToken;
import com.googlecode.clearnlp.feature.xml.JointFtrXml;
import com.googlecode.clearnlp.pos.POSState;
import com.googlecode.clearnlp.propbank.PBLib;
import com.googlecode.clearnlp.util.UTInput;
import com.googlecode.clearnlp.util.UTOutput;
import com.googlecode.clearnlp.util.UTString;
import com.googlecode.clearnlp.util.map.Prob2DMap;
import com.googlecode.clearnlp.util.pair.ObjectDoublePair;
import com.googlecode.clearnlp.util.pair.Pair;
import com.googlecode.clearnlp.util.pair.StringDoublePair;
import com.googlecode.clearnlp.util.triple.Triple;
import java.io.BufferedReader;
import java.io.PrintStream;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Deque;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
import java.util.zip.ZipOutputStream;

/* loaded from: input_file:com/googlecode/clearnlp/component/pos/CPOSBackTagger.class */
public class CPOSBackTagger extends AbstractStatisticalComponent {
    protected final String ENTRY_CONFIGURATION = "pos_CONFIGURATION";
    protected final String ENTRY_FEATURE = "pos_FEATURE";
    protected final String ENTRY_LEXICA = "pos_LEXICA";
    protected final String ENTRY_MODEL = "pos_MODEL";
    protected final int LEXICA_LOWER_SIMPLIFIED_FORMS = 0;
    protected final int LEXICA_AMBIGUITY_CLASSES = 1;
    protected Set<String> s_lsfs;
    protected Prob2DMap p_ambi;
    protected Map<String, String> m_ambi;
    protected String[] g_tags;
    protected int i_input;
    protected double d_score;
    protected double d_margin;

    public CPOSBackTagger() {
        this.ENTRY_CONFIGURATION = "pos_CONFIGURATION";
        this.ENTRY_FEATURE = "pos_FEATURE";
        this.ENTRY_LEXICA = "pos_LEXICA";
        this.ENTRY_MODEL = "pos_MODEL";
        this.LEXICA_LOWER_SIMPLIFIED_FORMS = 0;
        this.LEXICA_AMBIGUITY_CLASSES = 1;
    }

    public CPOSBackTagger(JointFtrXml[] jointFtrXmlArr, Set<String> set) {
        super(jointFtrXmlArr);
        this.ENTRY_CONFIGURATION = "pos_CONFIGURATION";
        this.ENTRY_FEATURE = "pos_FEATURE";
        this.ENTRY_LEXICA = "pos_LEXICA";
        this.ENTRY_MODEL = "pos_MODEL";
        this.LEXICA_LOWER_SIMPLIFIED_FORMS = 0;
        this.LEXICA_AMBIGUITY_CLASSES = 1;
        this.s_lsfs = set;
        this.p_ambi = new Prob2DMap();
    }

    public CPOSBackTagger(JointFtrXml[] jointFtrXmlArr, StringTrainSpace[] stringTrainSpaceArr, Object[] objArr, double d) {
        super(jointFtrXmlArr, stringTrainSpaceArr, objArr);
        this.ENTRY_CONFIGURATION = "pos_CONFIGURATION";
        this.ENTRY_FEATURE = "pos_FEATURE";
        this.ENTRY_LEXICA = "pos_LEXICA";
        this.ENTRY_MODEL = "pos_MODEL";
        this.LEXICA_LOWER_SIMPLIFIED_FORMS = 0;
        this.LEXICA_AMBIGUITY_CLASSES = 1;
        this.d_margin = d;
    }

    public CPOSBackTagger(JointFtrXml[] jointFtrXmlArr, StringModel[] stringModelArr, Object[] objArr, double d) {
        super(jointFtrXmlArr, stringModelArr, objArr);
        this.ENTRY_CONFIGURATION = "pos_CONFIGURATION";
        this.ENTRY_FEATURE = "pos_FEATURE";
        this.ENTRY_LEXICA = "pos_LEXICA";
        this.ENTRY_MODEL = "pos_MODEL";
        this.LEXICA_LOWER_SIMPLIFIED_FORMS = 0;
        this.LEXICA_AMBIGUITY_CLASSES = 1;
        this.d_margin = d;
    }

    public CPOSBackTagger(JointFtrXml[] jointFtrXmlArr, StringTrainSpace[] stringTrainSpaceArr, StringModel[] stringModelArr, Object[] objArr, double d) {
        super(jointFtrXmlArr, stringTrainSpaceArr, stringModelArr, objArr);
        this.ENTRY_CONFIGURATION = "pos_CONFIGURATION";
        this.ENTRY_FEATURE = "pos_FEATURE";
        this.ENTRY_LEXICA = "pos_LEXICA";
        this.ENTRY_MODEL = "pos_MODEL";
        this.LEXICA_LOWER_SIMPLIFIED_FORMS = 0;
        this.LEXICA_AMBIGUITY_CLASSES = 1;
        this.d_margin = d;
    }

    public CPOSBackTagger(ZipInputStream zipInputStream) {
        super(zipInputStream);
        this.ENTRY_CONFIGURATION = "pos_CONFIGURATION";
        this.ENTRY_FEATURE = "pos_FEATURE";
        this.ENTRY_LEXICA = "pos_LEXICA";
        this.ENTRY_MODEL = "pos_MODEL";
        this.LEXICA_LOWER_SIMPLIFIED_FORMS = 0;
        this.LEXICA_AMBIGUITY_CLASSES = 1;
    }

    @Override // com.googlecode.clearnlp.component.AbstractStatisticalComponent
    protected void initLexia(Object[] objArr) {
        this.s_lsfs = (Set) objArr[0];
        this.m_ambi = (Map) objArr[1];
    }

    @Override // com.googlecode.clearnlp.component.AbstractStatisticalComponent
    public void loadModels(ZipInputStream zipInputStream) {
        int length = "pos_FEATURE".length();
        int length2 = "pos_MODEL".length();
        this.f_xmls = new JointFtrXml[1];
        this.s_models = null;
        while (true) {
            try {
                ZipEntry nextEntry = zipInputStream.getNextEntry();
                if (nextEntry == null) {
                    return;
                }
                String name = nextEntry.getName();
                if (name.equals("pos_CONFIGURATION")) {
                    loadDefaultConfiguration(zipInputStream);
                } else if (name.startsWith("pos_FEATURE")) {
                    loadFeatureTemplates(zipInputStream, Integer.parseInt(name.substring(length)));
                } else if (name.startsWith("pos_MODEL")) {
                    loadStatisticalModels(zipInputStream, Integer.parseInt(name.substring(length2)));
                } else if (name.equals("pos_LEXICA")) {
                    loadLexica(zipInputStream);
                }
            } catch (Exception e) {
                e.printStackTrace();
                return;
            }
        }
    }

    protected void loadLexica(ZipInputStream zipInputStream) throws Exception {
        BufferedReader createBufferedReader = UTInput.createBufferedReader(zipInputStream);
        System.out.println("Loading lexica.");
        this.s_lsfs = UTInput.getStringSet(createBufferedReader);
        this.m_ambi = UTInput.getStringMap(createBufferedReader, " ");
    }

    @Override // com.googlecode.clearnlp.component.AbstractStatisticalComponent
    public void saveModels(ZipOutputStream zipOutputStream) {
        try {
            saveDefaultConfiguration(zipOutputStream, "pos_CONFIGURATION");
            saveFeatureTemplates(zipOutputStream, "pos_FEATURE");
            saveLexica(zipOutputStream);
            saveStatisticalModels(zipOutputStream, "pos_MODEL");
            zipOutputStream.close();
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    protected void saveLexica(ZipOutputStream zipOutputStream) throws Exception {
        zipOutputStream.putNextEntry(new ZipEntry("pos_LEXICA"));
        PrintStream createPrintBufferedStream = UTOutput.createPrintBufferedStream(zipOutputStream);
        System.out.println("Saving lexica.");
        UTOutput.printSet(createPrintBufferedStream, this.s_lsfs);
        createPrintBufferedStream.flush();
        UTOutput.printMap(createPrintBufferedStream, this.m_ambi, " ");
        createPrintBufferedStream.flush();
        zipOutputStream.closeEntry();
    }

    @Override // com.googlecode.clearnlp.component.AbstractStatisticalComponent
    public Object[] getLexica() {
        Object[] objArr = new Object[2];
        objArr[0] = this.s_lsfs;
        objArr[1] = this.i_flag == 0 ? getAmbiguityClasses() : this.m_ambi;
        return objArr;
    }

    public Set<String> getLowerSimplifiedForms() {
        return this.s_lsfs;
    }

    public void clearLowerSimplifiedForms() {
        this.s_lsfs.clear();
    }

    private Map<String, String> getAmbiguityClasses() {
        double ambiguityClassThreshold = this.f_xmls[0].getAmbiguityClassThreshold();
        HashMap hashMap = new HashMap();
        for (String str : this.p_ambi.keySet()) {
            StringBuilder sb = new StringBuilder();
            StringDoublePair[] prob1D = this.p_ambi.getProb1D(str);
            Arrays.sort(prob1D);
            for (StringDoublePair stringDoublePair : prob1D) {
                if (stringDoublePair.d <= ambiguityClassThreshold) {
                    break;
                }
                sb.append("_");
                sb.append(stringDoublePair.s);
            }
            if (sb.length() > 0) {
                hashMap.put(str, sb.substring(1));
            }
        }
        return hashMap;
    }

    @Override // com.googlecode.clearnlp.component.AbstractStatisticalComponent
    public void countAccuracy(int[] iArr) {
        int i = 0;
        for (int i2 = 1; i2 < this.t_size; i2++) {
            if (this.d_tree.get(i2).pos.equals(this.g_tags[i2])) {
                i++;
            }
        }
        iArr[0] = iArr[0] + (this.t_size - 1);
        iArr[1] = iArr[1] + i;
    }

    @Override // com.googlecode.clearnlp.component.AbstractComponent
    public void process(DEPTree dEPTree) {
        init(dEPTree);
        processAux();
    }

    protected void init(DEPTree dEPTree) {
        this.d_tree = dEPTree;
        this.t_size = dEPTree.size();
        this.d_score = 0.0d;
        this.i_input = 1;
        if (this.i_flag != 2) {
            this.g_tags = dEPTree.getPOSTags();
            dEPTree.clearPOSTags();
        }
        EngineProcess.normalizeForms(dEPTree);
    }

    protected void processAux() {
        if (this.i_flag == 0) {
            addLexica();
            return;
        }
        List<Pair<String, StringFeatureVector>> tag = tag();
        if (tag != null) {
            for (Pair<String, StringFeatureVector> pair : tag) {
                this.s_spaces[0].addInstance(pair.o1, pair.o2);
            }
        }
    }

    protected void addLexica() {
        for (int i = 1; i < this.t_size; i++) {
            DEPNode dEPNode = this.d_tree.get(i);
            if (this.s_lsfs.contains(dEPNode.lowerSimplifiedForm)) {
                this.p_ambi.add(dEPNode.simplifiedForm, this.g_tags[i]);
            }
        }
    }

    protected List<Pair<String, StringFeatureVector>> tag() {
        return this.i_flag == 1 ? tagMain().o2 : tagBranches();
    }

    protected Triple<String[], List<Pair<String, StringFeatureVector>>, Deque<POSState>> tagMain() {
        ArrayList arrayList = new ArrayList();
        ArrayDeque arrayDeque = new ArrayDeque();
        while (this.i_input < this.t_size) {
            tagAux(getLabel(arrayList, arrayDeque));
        }
        return new Triple<>(this.d_tree.getPOSTags(), arrayList, arrayDeque);
    }

    private void tagAux(StringPrediction stringPrediction) {
        this.d_tree.get(this.i_input).pos = stringPrediction.label;
        this.d_score += stringPrediction.score;
        this.i_input++;
    }

    protected List<Pair<String, StringFeatureVector>> tagBranches() {
        ArrayList arrayList = new ArrayList();
        Triple<String[], List<Pair<String, StringFeatureVector>>, Deque<POSState>> tagMain = tagMain();
        arrayList.add(new ObjectDoublePair<>(tagMain, this.d_score));
        Iterator<POSState> it = tagMain.o3.iterator();
        while (it.hasNext()) {
            reset(it.next());
            arrayList.add(new ObjectDoublePair<>(tagMain(), this.d_score));
        }
        if (this.i_flag == 2 || this.i_flag == 4) {
            this.d_tree.resetPOSTags(getMax(arrayList).o1);
            return null;
        }
        setGoldScores(arrayList);
        Triple<String[], List<Pair<String, StringFeatureVector>>, Deque<POSState>> max = getMax(arrayList);
        ArrayList arrayList2 = new ArrayList(tagMain.o2);
        arrayList2.addAll(max.o2);
        return arrayList2;
    }

    private void reset(POSState pOSState) {
        this.i_input = pOSState.input;
        this.d_score = pOSState.score;
        tagAux(pOSState.label);
        for (int i = this.i_input + 1; i < this.t_size; i++) {
            this.d_tree.get(i).pos = null;
        }
    }

    private Triple<String[], List<Pair<String, StringFeatureVector>>, Deque<POSState>> getMax(List<ObjectDoublePair<Triple<String[], List<Pair<String, StringFeatureVector>>, Deque<POSState>>>> list) {
        ObjectDoublePair<Triple<String[], List<Pair<String, StringFeatureVector>>, Deque<POSState>>> objectDoublePair = list.get(0);
        int size = list.size();
        for (int i = 1; i < size; i++) {
            ObjectDoublePair<Triple<String[], List<Pair<String, StringFeatureVector>>, Deque<POSState>>> objectDoublePair2 = list.get(i);
            if (objectDoublePair.d < objectDoublePair2.d) {
                objectDoublePair = objectDoublePair2;
            }
        }
        return (Triple) objectDoublePair.o;
    }

    /* JADX WARN: Multi-variable type inference failed */
    private void setGoldScores(List<ObjectDoublePair<Triple<String[], List<Pair<String, StringFeatureVector>>, Deque<POSState>>>> list) {
        for (ObjectDoublePair<Triple<String[], List<Pair<String, StringFeatureVector>>, Deque<POSState>>> objectDoublePair : list) {
            String[] strArr = (String[]) ((Triple) objectDoublePair.o).o1;
            int i = 0;
            for (int i2 = 1; i2 < this.t_size; i2++) {
                if (this.g_tags[i2].equals(strArr[i2])) {
                    i++;
                }
            }
            objectDoublePair.d = i;
        }
    }

    private StringPrediction getLabel(List<Pair<String, StringFeatureVector>> list, Deque<POSState> deque) {
        StringFeatureVector featureVector = getFeatureVector(this.f_xmls[0]);
        StringPrediction stringPrediction = null;
        if (this.i_flag == 1) {
            stringPrediction = getGoldLabel();
            if (featureVector.size() > 0) {
                list.add(new Pair<>(stringPrediction.label, featureVector));
            }
        } else if (this.i_flag == 2 || this.i_flag == 4) {
            stringPrediction = getAutoLabel(featureVector, deque);
        } else if (this.i_flag == 3) {
            stringPrediction = getAutoLabel(featureVector, deque);
            if (featureVector.size() > 0) {
                list.add(new Pair<>(getGoldLabel().label, featureVector));
            }
        }
        return stringPrediction;
    }

    private StringPrediction getGoldLabel() {
        return new StringPrediction(this.g_tags[this.i_input], 1.0d);
    }

    private StringPrediction getAutoLabel(StringFeatureVector stringFeatureVector, Deque<POSState> deque) {
        List<StringPrediction> predictAll = this.s_models[0].predictAll(stringFeatureVector);
        AbstractAlgorithm.normalize(predictAll);
        StringPrediction stringPrediction = predictAll.get(0);
        StringPrediction stringPrediction2 = predictAll.get(1);
        if (stringPrediction.score - stringPrediction2.score < this.d_margin) {
            deque.add(new POSState(this.i_input, this.d_score, stringPrediction2));
        }
        return stringPrediction;
    }

    @Override // com.googlecode.clearnlp.component.AbstractStatisticalComponent
    protected String getField(FtrToken ftrToken) {
        int parseInt;
        int length;
        DEPNode nodeInput = getNodeInput(ftrToken);
        if (nodeInput == null) {
            return null;
        }
        if (ftrToken.isField(JointFtrXml.F_SIMPLIFIED_FORM)) {
            if (this.s_lsfs.contains(nodeInput.lowerSimplifiedForm)) {
                return nodeInput.simplifiedForm;
            }
            return null;
        }
        if (ftrToken.isField(JointFtrXml.F_LOWER_SIMPLIFIED_FORM)) {
            if (this.s_lsfs.contains(nodeInput.lowerSimplifiedForm)) {
                return nodeInput.lowerSimplifiedForm;
            }
            return null;
        }
        if (ftrToken.isField("p")) {
            return nodeInput.pos;
        }
        if (ftrToken.isField("a")) {
            return this.m_ambi.get(nodeInput.simplifiedForm);
        }
        Matcher matcher = JointFtrXml.P_BOOLEAN.matcher(ftrToken.field);
        if (!matcher.find()) {
            Matcher matcher2 = JointFtrXml.P_FEAT.matcher(ftrToken.field);
            if (matcher2.find()) {
                return nodeInput.getFeat(matcher2.group(1));
            }
            Matcher matcher3 = JointFtrXml.P_PREFIX.matcher(ftrToken.field);
            if (matcher3.find()) {
                int parseInt2 = Integer.parseInt(matcher3.group(1));
                if (parseInt2 <= nodeInput.lowerSimplifiedForm.length()) {
                    return nodeInput.lowerSimplifiedForm.substring(0, parseInt2);
                }
                return null;
            }
            Matcher matcher4 = JointFtrXml.P_SUFFIX.matcher(ftrToken.field);
            if (!matcher4.find() || (parseInt = Integer.parseInt(matcher4.group(1))) > (length = nodeInput.lowerSimplifiedForm.length())) {
                return null;
            }
            return nodeInput.lowerSimplifiedForm.substring(length - parseInt, length);
        }
        int parseInt3 = Integer.parseInt(matcher.group(1));
        switch (parseInt3) {
            case 0:
                if (UTString.isAllUpperCase(nodeInput.simplifiedForm)) {
                    return ftrToken.field;
                }
                return null;
            case 1:
                if (UTString.isAllLowerCase(nodeInput.simplifiedForm)) {
                    return ftrToken.field;
                }
                return null;
            case 2:
                if (UTString.beginsWithUpperCase(nodeInput.simplifiedForm)) {
                    return ftrToken.field;
                }
                return null;
            case 3:
                if (UTString.getNumOfCapitalsNotAtBeginning(nodeInput.simplifiedForm) == 1) {
                    return ftrToken.field;
                }
                return null;
            case AbstractEngine.FLAG_DEMO /* 4 */:
                if (UTString.getNumOfCapitalsNotAtBeginning(nodeInput.simplifiedForm) > 1) {
                    return ftrToken.field;
                }
                return null;
            case 5:
                if (nodeInput.simplifiedForm.contains(CTLibEn.POS_PERIOD)) {
                    return ftrToken.field;
                }
                return null;
            case 6:
                if (UTString.containsDigit(nodeInput.simplifiedForm)) {
                    return ftrToken.field;
                }
                return null;
            case 7:
                if (nodeInput.simplifiedForm.contains(PBLib.DELIM_LABEL)) {
                    return ftrToken.field;
                }
                return null;
            case 8:
                if (this.i_input == this.t_size - 1) {
                    return ftrToken.field;
                }
                return null;
            case 9:
                if (this.i_input == 1) {
                    return ftrToken.field;
                }
                return null;
            default:
                throw new IllegalArgumentException("Unsupported feature: " + parseInt3);
        }
    }

    @Override // com.googlecode.clearnlp.component.AbstractStatisticalComponent
    protected String[] getFields(FtrToken ftrToken) {
        DEPNode nodeInput = getNodeInput(ftrToken);
        if (nodeInput == null) {
            return null;
        }
        Matcher matcher = JointFtrXml.P_PREFIX.matcher(ftrToken.field);
        if (matcher.find()) {
            String[] prefixes = UTString.getPrefixes(nodeInput.lowerSimplifiedForm, Integer.parseInt(matcher.group(1)));
            if (prefixes.length == 0) {
                return null;
            }
            return prefixes;
        }
        Matcher matcher2 = JointFtrXml.P_SUFFIX.matcher(ftrToken.field);
        if (!matcher2.find()) {
            return null;
        }
        String[] suffixes = UTString.getSuffixes(nodeInput.lowerSimplifiedForm, Integer.parseInt(matcher2.group(1)));
        if (suffixes.length == 0) {
            return null;
        }
        return suffixes;
    }

    protected DEPNode getNodeInput(FtrToken ftrToken) {
        int i = this.i_input + ftrToken.offset;
        if (0 >= i || i >= this.t_size) {
            return null;
        }
        return this.d_tree.get(i);
    }
}
