package org.languagetool.rules;

import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.ResourceBundle;
import java.util.Set;
import org.jetbrains.annotations.Nullable;
import org.languagetool.AnalyzedSentence;
import org.languagetool.JLanguageTool;
import org.languagetool.Language;
import org.languagetool.languagemodel.LanguageModel;
import org.languagetool.tokenizers.Tokenizer;
import org.languagetool.tools.StringTools;
import org.languagetool.tools.Tools;

/* loaded from: input_file:org/languagetool/rules/ConfusionProbabilityRule.class */
public abstract class ConfusionProbabilityRule extends Rule {
    public static final String RULE_ID = "CONFUSION_RULE";
    public static final float MIN_COVERAGE = 0.5f;
    private static final double MIN_PROB = 0.0d;
    private static final boolean DEBUG = false;
    private final Map<String, List<ConfusionSet>> wordToSets;
    private final LanguageModel lm;
    private final long totalTokenCount;
    private final int grams;
    private final Language language;

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:org/languagetool/rules/ConfusionProbabilityRule$GoogleToken.class */
    public static class GoogleToken {
        String token;
        int startPos;
        int endPos;

        GoogleToken(String str, int i, int i2) {
            this.token = str;
            this.startPos = i;
            this.endPos = i2;
        }

        boolean isWhitespace() {
            return StringTools.isWhitespace(this.token);
        }

        public String toString() {
            return this.token;
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:org/languagetool/rules/ConfusionProbabilityRule$Probability.class */
    public static class Probability {
        final double prob;
        final float coverage;

        Probability(double d, float f) {
            this.prob = d;
            this.coverage = f;
        }
    }

    public ConfusionProbabilityRule(ResourceBundle resourceBundle, LanguageModel languageModel, Language language) {
        this(resourceBundle, languageModel, language, 3);
    }

    public ConfusionProbabilityRule(ResourceBundle resourceBundle, LanguageModel languageModel, Language language, int i) {
        super(resourceBundle);
        setCategory(new Category(resourceBundle.getString("category_typo")));
        setLocQualityIssueType(ITSIssueType.NonConformance);
        try {
            InputStream fromResourceDirAsStream = JLanguageTool.getDataBroker().getFromResourceDirAsStream("/" + language.getShortName() + "/confusion_sets.txt");
            Throwable th = null;
            try {
                this.wordToSets = new ConfusionSetLoader().loadConfusionSet(fromResourceDirAsStream);
                if (fromResourceDirAsStream != null) {
                    if (DEBUG != 0) {
                        try {
                            fromResourceDirAsStream.close();
                        } catch (Throwable th2) {
                            th.addSuppressed(th2);
                        }
                    } else {
                        fromResourceDirAsStream.close();
                    }
                }
                this.lm = (LanguageModel) Objects.requireNonNull(languageModel);
                this.language = (Language) Objects.requireNonNull(language);
                if (i < 1 || i > 5) {
                    throw new IllegalArgumentException("grams must be between 1 and 5: " + i);
                }
                this.grams = i;
                this.totalTokenCount = languageModel.getTotalTokenCount();
            } finally {
            }
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    @Override // org.languagetool.rules.Rule
    public String getId() {
        return RULE_ID;
    }

    @Override // org.languagetool.rules.Rule
    public RuleMatch[] match(AnalyzedSentence analyzedSentence) {
        String text = analyzedSentence.getText();
        List<GoogleToken> googleTokens = getGoogleTokens(text, true);
        ArrayList arrayList = new ArrayList();
        int i = DEBUG;
        for (GoogleToken googleToken : googleTokens) {
            String str = googleToken.token;
            List<ConfusionSet> list = this.wordToSets.get(str);
            boolean z = DEBUG;
            if (list == null && str.length() > 0 && Character.isUpperCase(str.charAt(DEBUG))) {
                list = this.wordToSets.get(StringTools.lowercaseFirstChar(str));
                z = true;
            }
            if (list != null) {
                Iterator<ConfusionSet> it = list.iterator();
                while (it.hasNext()) {
                    ConfusionSet next = it.next();
                    if (next != null) {
                        Set<ConfusionString> uppercaseFirstCharSet = z ? next.getUppercaseFirstCharSet() : next.getSet();
                        ConfusionString betterAlternativeOrNull = getBetterAlternativeOrNull(googleTokens.get(i), googleTokens, uppercaseFirstCharSet, next.getFactor());
                        if (betterAlternativeOrNull != null && !isException(text)) {
                            RuleMatch ruleMatch = new RuleMatch(this, googleToken.startPos, googleToken.endPos, getMessage(getConfusionString(uppercaseFirstCharSet, googleTokens.get(i)), betterAlternativeOrNull));
                            ruleMatch.setSuggestedReplacement(betterAlternativeOrNull.getString());
                            arrayList.add(ruleMatch);
                        }
                    }
                }
            }
            i++;
        }
        return (RuleMatch[]) arrayList.toArray(new RuleMatch[arrayList.size()]);
    }

    protected boolean isException(String str) {
        return false;
    }

    @Override // org.languagetool.rules.Rule
    public String getDescription() {
        return Tools.i18n(this.messages, "statistics_rule_description", new Object[DEBUG]);
    }

    protected Tokenizer getWordTokenizer() {
        return this.language.getWordTokenizer();
    }

    private String getMessage(ConfusionString confusionString, ConfusionString confusionString2) {
        return (confusionString.getDescription() == null || confusionString2.getDescription() == null) ? confusionString2.getDescription() != null ? Tools.i18n(this.messages, "statistics_suggest2", confusionString2.getString(), confusionString2.getDescription()) : Tools.i18n(this.messages, "statistics_suggest3", confusionString2.getString()) : Tools.i18n(this.messages, "statistics_suggest1", confusionString2.getString(), confusionString2.getDescription(), confusionString.getString(), confusionString.getDescription());
    }

    private List<GoogleToken> getGoogleTokens(String str, boolean z) {
        ArrayList arrayList = new ArrayList();
        if (z) {
            arrayList.add(new GoogleToken(LanguageModel.GOOGLE_SENTENCE_START, DEBUG, DEBUG));
        }
        List<String> list = getWordTokenizer().tokenize(str);
        int i = DEBUG;
        for (String str2 : list) {
            if (!StringTools.isWhitespace(str2)) {
                arrayList.add(new GoogleToken(str2, i, i + str2.length()));
            }
            i += str2.length();
        }
        return arrayList;
    }

    @Override // org.languagetool.rules.Rule
    public void reset() {
    }

    public void setConfusionSet(ConfusionSet confusionSet) {
        this.wordToSets.clear();
        Iterator<ConfusionString> it = confusionSet.getSet().iterator();
        while (it.hasNext()) {
            this.wordToSets.put(it.next().getString(), Collections.singletonList(confusionSet));
        }
    }

    public int getNGrams() {
        return this.grams;
    }

    @Nullable
    private ConfusionString getBetterAlternativeOrNull(GoogleToken googleToken, List<GoogleToken> list, Set<ConfusionString> set, long j) {
        if (set.size() != 2) {
            throw new RuntimeException("Confusion set must be of size 2: " + set);
        }
        return getBetterAlternativeOrNull(googleToken, list, getAlternativeTerm(set, googleToken), j);
    }

    private ConfusionString getAlternativeTerm(Set<ConfusionString> set, GoogleToken googleToken) {
        for (ConfusionString confusionString : set) {
            if (!confusionString.getString().equals(googleToken.token)) {
                return confusionString;
            }
        }
        throw new RuntimeException("No alternative found for: " + googleToken);
    }

    private ConfusionString getConfusionString(Set<ConfusionString> set, GoogleToken googleToken) {
        for (ConfusionString confusionString : set) {
            if (confusionString.getString().equals(googleToken.token)) {
                return confusionString;
            }
        }
        throw new RuntimeException("Not found in set: " + googleToken);
    }

    private ConfusionString getBetterAlternativeOrNull(GoogleToken googleToken, List<GoogleToken> list, ConfusionString confusionString, long j) {
        double d;
        double d2;
        String str = googleToken.token;
        if (this.grams == 3) {
            d = get3gramProbabilityFor(googleToken, list, str);
            d2 = get3gramProbabilityFor(googleToken, list, confusionString.getString());
        } else {
            if (this.grams != 4) {
                throw new RuntimeException("Only 3grams and 4grams are supported");
            }
            d = get4gramProbabilityFor(googleToken, list, str);
            d2 = get4gramProbabilityFor(googleToken, list, confusionString.getString());
        }
        debug("P(" + str + ") = %.90f\n", Double.valueOf(d));
        debug("P(" + confusionString + ") = %.90f\n", Double.valueOf(d2));
        if (d2 < MIN_PROB || d2 <= d * j) {
            return null;
        }
        return confusionString;
    }

    List<String> getContext(GoogleToken googleToken, List<GoogleToken> list, String str, int i, int i2) {
        return getContext(googleToken, list, Collections.singletonList(new GoogleToken(str, DEBUG, str.length())), i, i2);
    }

    private List<String> getContext(GoogleToken googleToken, List<GoogleToken> list, List<GoogleToken> list2, int i, int i2) {
        int indexOf = list.indexOf(googleToken);
        if (indexOf == -1) {
            throw new RuntimeException("Token not found: " + googleToken);
        }
        ArrayList arrayList = new ArrayList();
        int i3 = 1;
        int i4 = DEBUG;
        while (i4 < i) {
            if (indexOf - i3 < 0) {
                arrayList.clear();
                Iterator<GoogleToken> it = list2.iterator();
                while (it.hasNext()) {
                    arrayList.add(it.next().token);
                }
                for (int i5 = indexOf - 1; i5 >= 0; i5--) {
                    arrayList.add(DEBUG, list.get(i5).token);
                }
                return arrayList;
            }
            if (!list.get(indexOf - i3).isWhitespace()) {
                arrayList.add(DEBUG, list.get(indexOf - i3).token);
                i4++;
            }
            i3++;
        }
        Iterator<GoogleToken> it2 = list2.iterator();
        while (it2.hasNext()) {
            arrayList.add(it2.next().token);
        }
        int i6 = 1;
        int i7 = DEBUG;
        while (i7 < i2) {
            if (indexOf + i6 >= list.size()) {
                arrayList.add(".");
                i7++;
            } else if (!list.get(indexOf + i6).isWhitespace()) {
                arrayList.add(list.get(indexOf + i6).token);
                i7++;
            }
            i6++;
        }
        return arrayList;
    }

    private double get3gramProbabilityFor(GoogleToken googleToken, List<GoogleToken> list, String str) {
        Probability pseudoProbability;
        Probability pseudoProbability2;
        Probability probability;
        List<GoogleToken> googleTokens = getGoogleTokens(str, false);
        if (googleTokens.size() == 1) {
            pseudoProbability = getPseudoProbability(getContext(googleToken, list, str, DEBUG, 2));
            probability = getPseudoProbability(getContext(googleToken, list, str, 1, 1));
            pseudoProbability2 = getPseudoProbability(getContext(googleToken, list, str, 2, DEBUG));
        } else {
            if (googleTokens.size() != 2) {
                throw new RuntimeException("Words that consists of more than 2 tokens (according to Google tokenization) are not supported yet: " + str + " -> " + googleTokens);
            }
            pseudoProbability = getPseudoProbability(getContext(googleToken, list, googleTokens, DEBUG, 1));
            pseudoProbability2 = getPseudoProbability(getContext(googleToken, list, googleTokens, 1, DEBUG));
            probability = new Probability((pseudoProbability.prob + pseudoProbability2.prob) / 2.0d, 1.0f);
        }
        if (pseudoProbability.coverage >= 0.5f || probability.coverage >= 0.5f || pseudoProbability2.coverage >= 0.5f) {
            return pseudoProbability.prob * probability.prob * pseudoProbability2.prob;
        }
        debug("  Min coverage of %.2f not reached: %.2f, %.2f, %.2f, assuming p=0\n", Float.valueOf(0.5f), Float.valueOf(pseudoProbability.coverage), Float.valueOf(probability.coverage), Float.valueOf(pseudoProbability2.coverage));
        return MIN_PROB;
    }

    private double get4gramProbabilityFor(GoogleToken googleToken, List<GoogleToken> list, String str) {
        Probability pseudoProbability = getPseudoProbability(getContext(googleToken, list, str, DEBUG, 3));
        Probability pseudoProbability2 = getPseudoProbability(getContext(googleToken, list, str, 1, 2));
        Probability pseudoProbability3 = getPseudoProbability(getContext(googleToken, list, str, 3, DEBUG));
        if (pseudoProbability.coverage >= 0.5f || pseudoProbability2.coverage >= 0.5f || pseudoProbability3.coverage >= 0.5f) {
            return pseudoProbability.prob * pseudoProbability2.prob * pseudoProbability3.prob;
        }
        debug("  Min coverage of %.2f not reached: %.2f, %.2f, %.2f, assuming p=0\n", Float.valueOf(0.5f), Float.valueOf(pseudoProbability.coverage), Float.valueOf(pseudoProbability2.coverage), Float.valueOf(pseudoProbability3.coverage));
        return MIN_PROB;
    }

    Probability getPseudoProbability(List<String> list) {
        int i = DEBUG;
        long count = this.lm.getCount(list.get(DEBUG));
        int i2 = DEBUG + 1;
        if (count > 0) {
            i++;
        }
        double d = (count + 1) / (this.totalTokenCount + 1);
        debug("    P for %s: %.20f (%d)\n", list.get(DEBUG), Double.valueOf(d), Long.valueOf(count));
        for (int i3 = 2; i3 <= list.size(); i3++) {
            List<String> subList = list.subList(DEBUG, i3);
            long count2 = this.lm.getCount(subList);
            double d2 = (count2 + 1) / (count + 1);
            i2++;
            debug("    P for " + subList + ": %.20f (%d)\n", Double.valueOf(d2), Long.valueOf(count2));
            if (count2 > 0) {
                i++;
            }
            d *= d2;
        }
        debug("  " + StringTools.listToString(list, " ") + " => %.20f\n", Double.valueOf(d));
        return new Probability(d, i / i2);
    }

    double getPseudoProbability2(List<String> list) {
        long count = this.lm.getCount(list.get(DEBUG));
        double d = (count + 1) / (this.totalTokenCount + 1);
        for (int i = 2; i <= list.size(); i++) {
            d *= (this.lm.getCount(list.subList(DEBUG, i)) + 1) / (count + 1);
        }
        return d;
    }

    private void debug(String str, Object... objArr) {
    }
}
