package org.languagetool.tagging.disambiguation.uk;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.regex.Pattern;
import org.apache.commons.lang3.StringUtils;
import org.languagetool.AnalyzedSentence;
import org.languagetool.AnalyzedToken;
import org.languagetool.AnalyzedTokenReadings;
import org.languagetool.language.Ukrainian;
import org.languagetool.rules.uk.LemmaHelper;
import org.languagetool.tagging.disambiguation.AbstractDisambiguator;
import org.languagetool.tagging.disambiguation.Disambiguator;
import org.languagetool.tagging.disambiguation.rules.XmlRuleDisambiguator;
import org.languagetool.tagging.uk.PosTagHelper;
import org.languagetool.tools.StringTools;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/languagetool/tagging/disambiguation/uk/UkrainianHybridDisambiguator.class */
public class UkrainianHybridDisambiguator extends AbstractDisambiguator {
    private static final String LAST_NAME_TAG = ":lname";
    private static final String PLURAL_LNAME = "noun:anim:p:.*:[lp]name.*";
    private static final String ST_ABBR = "ст.";
    private final Disambiguator chunker = new UkrainianMultiwordChunker("/uk/multiwords.txt", true);
    private final Disambiguator disambiguator = new XmlRuleDisambiguator(new Ukrainian());
    private final SimpleDisambiguator simpleDisambiguator = new SimpleDisambiguator();
    private static Logger logger = LoggerFactory.getLogger(UkrainianHybridDisambiguator.class);
    private static final Pattern INITIAL_REGEX = Pattern.compile("[А-ЯІЇЄҐ]\\.");
    private static final Pattern INANIM_VKLY = Pattern.compile("noun:inanim:.:v_kly.*");
    private static final Pattern PLURAL_NAME = Pattern.compile("noun:anim:p:.*:fname.*");
    private static final Pattern LATIN_DIGITS_PATTERN = Pattern.compile("[XIVХІ]+([–—-][XIVХІ]+)?");
    private static final Pattern DIGITS_PATTERN = Pattern.compile("[0-9]+([–—-][0-9]+)?");
    private static final Pattern STATION_NAME_PATTERN = Pattern.compile("метро|[А-Я][а-яіїєґ'-]+");

    public final AnalyzedSentence disambiguate(AnalyzedSentence analyzedSentence) throws IOException {
        preDisambiguate(analyzedSentence);
        return this.disambiguator.disambiguate(this.chunker.disambiguate(analyzedSentence));
    }

    public AnalyzedSentence preDisambiguate(AnalyzedSentence analyzedSentence) {
        retagInitials(analyzedSentence);
        removeInanimVKly(analyzedSentence);
        removePluralForNames(analyzedSentence);
        removeLowerCaseHomonymsForAbbreviations(analyzedSentence);
        removeLowerCaseBadForUpperCaseGood(analyzedSentence);
        this.simpleDisambiguator.removeRareForms(analyzedSentence);
        disambiguateSt(analyzedSentence);
        return analyzedSentence;
    }

    private void removeLowerCaseBadForUpperCaseGood(AnalyzedSentence analyzedSentence) {
        AnalyzedTokenReadings[] tokensWithoutWhitespace = analyzedSentence.getTokensWithoutWhitespace();
        for (int i = 1; i < tokensWithoutWhitespace.length; i++) {
            if (tokensWithoutWhitespace[i].getReadings().size() > 1 && StringTools.isCapitalizedWord(tokensWithoutWhitespace[i].getToken()) && LemmaHelper.hasLemma(tokensWithoutWhitespace[i], Pattern.compile("[А-ЯІЇЄҐ][а-яіїєґ'-].*"), Pattern.compile(".*?:prop"))) {
                String lowerCase = tokensWithoutWhitespace[i].getAnalyzedToken(0).getLemma().toLowerCase();
                List readings = tokensWithoutWhitespace[i].getReadings();
                for (int size = readings.size() - 1; size >= 0; size--) {
                    AnalyzedToken analyzedToken = (AnalyzedToken) readings.get(size);
                    if (PosTagHelper.hasPosTagPart(analyzedToken, ":bad") && lowerCase.equals(analyzedToken.getLemma())) {
                        tokensWithoutWhitespace[i].removeReading(analyzedToken);
                    }
                }
            }
        }
    }

    private void removeLowerCaseHomonymsForAbbreviations(AnalyzedSentence analyzedSentence) {
        AnalyzedTokenReadings[] tokensWithoutWhitespace = analyzedSentence.getTokensWithoutWhitespace();
        for (int i = 1; i < tokensWithoutWhitespace.length; i++) {
            if (StringUtils.isAllUpperCase(tokensWithoutWhitespace[i].getToken()) && PosTagHelper.hasPosTagPart(tokensWithoutWhitespace[i], ":abbr")) {
                List readings = tokensWithoutWhitespace[i].getReadings();
                for (int size = readings.size() - 1; size >= 0; size--) {
                    AnalyzedToken analyzedToken = (AnalyzedToken) readings.get(size);
                    if (!PosTagHelper.hasPosTagPart(analyzedToken, ":abbr") && !"SENT_END".equals(analyzedToken)) {
                        tokensWithoutWhitespace[i].removeReading(analyzedToken);
                    }
                }
            }
        }
    }

    private void removeInanimVKly(AnalyzedSentence analyzedSentence) {
        String pOSTag;
        AnalyzedTokenReadings[] tokensWithoutWhitespace = analyzedSentence.getTokensWithoutWhitespace();
        for (int i = 1; i < tokensWithoutWhitespace.length; i++) {
            List readings = tokensWithoutWhitespace[i].getReadings();
            if (i >= tokensWithoutWhitespace.length - 1 || !Arrays.asList(",", "!", "»", "“", "”", "...").contains(tokensWithoutWhitespace[i + 1].getToken()) || !PosTagHelper.hasPosTag(tokensWithoutWhitespace[i - 1], "adj.*v_kly.*")) {
                ArrayList arrayList = new ArrayList();
                boolean z = false;
                for (int i2 = 0; i2 < readings.size() && (pOSTag = ((AnalyzedToken) readings.get(i2)).getPOSTag()) != null; i2++) {
                    if (!pOSTag.equals("SENT_END")) {
                        if (INANIM_VKLY.matcher(pOSTag).matches()) {
                            arrayList.add(readings.get(i2));
                        } else {
                            z = true;
                        }
                    }
                }
                if (arrayList.size() > 0 && z) {
                    Iterator it = arrayList.iterator();
                    while (it.hasNext()) {
                        tokensWithoutWhitespace[i].removeReading((AnalyzedToken) it.next());
                    }
                }
            }
        }
    }

    private void removePluralForNames(AnalyzedSentence analyzedSentence) {
        String pOSTag;
        AnalyzedTokenReadings[] tokensWithoutWhitespace = analyzedSentence.getTokensWithoutWhitespace();
        for (int i = 1; i < tokensWithoutWhitespace.length; i++) {
            List readings = tokensWithoutWhitespace[i].getReadings();
            if ((i <= 1 || (!PosTagHelper.hasPosTag(tokensWithoutWhitespace[i - 1], "adj:p:.*") && !PosTagHelper.hasPosTag(tokensWithoutWhitespace[i - 1], ".*num.*") && !LemmaHelper.hasLemma(tokensWithoutWhitespace[i - 1], (List<String>) Arrays.asList("багато", "мало", "півсотня", "сотня")))) && ((i >= tokensWithoutWhitespace.length - 1 || !PosTagHelper.hasPosTag(tokensWithoutWhitespace[i + 1], PLURAL_LNAME)) && (i >= tokensWithoutWhitespace.length - 3 || !PosTagHelper.hasPosTagPart(tokensWithoutWhitespace[i + 1], LAST_NAME_TAG) || !PosTagHelper.hasPosTagPart(tokensWithoutWhitespace[i + 3], LAST_NAME_TAG)))) {
                ArrayList arrayList = new ArrayList();
                boolean z = false;
                for (int i2 = 0; i2 < readings.size() && (pOSTag = ((AnalyzedToken) readings.get(i2)).getPOSTag()) != null; i2++) {
                    if (!pOSTag.equals("SENT_END")) {
                        if (PLURAL_NAME.matcher(pOSTag).matches()) {
                            arrayList.add(readings.get(i2));
                        } else {
                            z = true;
                        }
                    }
                }
                if (arrayList.size() > 0 && z) {
                    Iterator it = arrayList.iterator();
                    while (it.hasNext()) {
                        tokensWithoutWhitespace[i].removeReading((AnalyzedToken) it.next());
                    }
                }
            }
        }
    }

    private void retagInitials(AnalyzedSentence analyzedSentence) {
        AnalyzedTokenReadings[] tokens = analyzedSentence.getTokens();
        if (analyzedSentence.toString().contains("Баку")) {
            logger.debug(Arrays.asList(tokens).toString());
        }
        ArrayList arrayList = new ArrayList();
        AnalyzedTokenReadings analyzedTokenReadings = null;
        for (int i = 1; i < tokens.length; i++) {
            if (!tokens[i].isWhitespace()) {
                if (tokens[i].hasPartialPosTag(LAST_NAME_TAG)) {
                    analyzedTokenReadings = tokens[i];
                    if (analyzedSentence.toString().contains("Баку")) {
                        logger.debug("lastN: " + analyzedTokenReadings);
                    }
                    if (arrayList.size() > 0) {
                        checkForInitialRetag(analyzedTokenReadings, arrayList, tokens);
                        analyzedTokenReadings = null;
                        arrayList.clear();
                    }
                } else if (isInitial(tokens, i)) {
                    if (analyzedSentence.toString().contains("Баку")) {
                        logger.debug("init: " + tokens[i]);
                    }
                    arrayList.add(Integer.valueOf(i));
                } else {
                    checkForInitialRetag(analyzedTokenReadings, arrayList, tokens);
                    if (analyzedTokenReadings != null && analyzedSentence.toString().contains("Баку")) {
                        logger.debug("--");
                    }
                    analyzedTokenReadings = null;
                    arrayList.clear();
                }
            }
        }
        checkForInitialRetag(analyzedTokenReadings, arrayList, tokens);
        if (analyzedTokenReadings == null || !analyzedSentence.toString().contains("Баку")) {
            return;
        }
        logger.debug("--");
    }

    private static void checkForInitialRetag(AnalyzedTokenReadings analyzedTokenReadings, List<Integer> list, AnalyzedTokenReadings[] analyzedTokenReadingsArr) {
        if (analyzedTokenReadings != null) {
            if (list.size() == 1 || list.size() == 2) {
                logger.debug("{} / {}", analyzedTokenReadings, list);
                int intValue = list.get(0).intValue();
                analyzedTokenReadingsArr[intValue] = getInitialReadings(analyzedTokenReadingsArr[intValue], analyzedTokenReadings, "fname");
                if (list.size() == 2) {
                    int intValue2 = list.get(1).intValue();
                    analyzedTokenReadingsArr[intValue2] = getInitialReadings(analyzedTokenReadingsArr[intValue2], analyzedTokenReadings, "pname");
                }
            }
        }
    }

    private void disambiguateSt(AnalyzedSentence analyzedSentence) {
        AnalyzedTokenReadings[] tokensWithoutWhitespace = analyzedSentence.getTokensWithoutWhitespace();
        int i = 1;
        while (i < tokensWithoutWhitespace.length) {
            if (ST_ABBR.equals(tokensWithoutWhitespace[i].getToken())) {
                if (i >= tokensWithoutWhitespace.length - 1 || !tokensWithoutWhitespace[i + 1].getToken().matches("[0-9]+([.,–—-][0-9]+)?")) {
                    if (i < tokensWithoutWhitespace.length - 1) {
                        if (LemmaHelper.hasLemma(tokensWithoutWhitespace[i + 1], "ложка") || tokensWithoutWhitespace[i + 1].getToken().equals("л.")) {
                            remove(tokensWithoutWhitespace[i], Pattern.compile("adj:[fp]:.*"));
                            i++;
                        } else if (LemmaHelper.hasLemma(tokensWithoutWhitespace[i + 1], (List<String>) Arrays.asList("лейтенант", "сержант", "солдат", "науковий", "медсестра"))) {
                            remove(tokensWithoutWhitespace[i], Pattern.compile("adj:m:.*"));
                            i++;
                        } else if (STATION_NAME_PATTERN.matcher(tokensWithoutWhitespace[i + 1].getToken()).matches()) {
                            remove(tokensWithoutWhitespace[i], Pattern.compile("noun:inanim:f:.*"));
                            i++;
                        }
                    }
                    if (i > 1) {
                        if (LATIN_DIGITS_PATTERN.matcher(tokensWithoutWhitespace[i - 1].getToken()).matches()) {
                            Pattern compile = Pattern.compile("noun:inanim:n:.*");
                            if (i < tokensWithoutWhitespace.length - 1 && ST_ABBR.equals(tokensWithoutWhitespace[i + 1].getToken())) {
                                compile = Pattern.compile("noun:inanim:p:.*");
                                remove(tokensWithoutWhitespace[i + 1], compile);
                            }
                            remove(tokensWithoutWhitespace[i], compile);
                            i++;
                        } else if (DIGITS_PATTERN.matcher(tokensWithoutWhitespace[i - 1].getToken()).matches()) {
                            Pattern compile2 = Pattern.compile("noun:inanim:[nf]:.*");
                            if (i < tokensWithoutWhitespace.length - 1 && ST_ABBR.equals(tokensWithoutWhitespace[i + 1].getToken())) {
                                compile2 = Pattern.compile("noun:inanim:p:.*");
                                remove(tokensWithoutWhitespace[i + 1], compile2);
                            }
                            remove(tokensWithoutWhitespace[i], compile2);
                            i++;
                        }
                    }
                } else {
                    Pattern compile3 = Pattern.compile("noun:inanim:f:.*");
                    if (i > 2 && ST_ABBR.equals(tokensWithoutWhitespace[i - 1].getToken())) {
                        compile3 = Pattern.compile("noun:inanim:p:.*");
                        remove(tokensWithoutWhitespace[i - 1], compile3);
                    }
                    remove(tokensWithoutWhitespace[i], compile3);
                }
            }
            i++;
        }
    }

    private static void remove(AnalyzedTokenReadings analyzedTokenReadings, Pattern pattern) {
        List readings = analyzedTokenReadings.getReadings();
        for (int size = readings.size() - 1; size >= 0; size--) {
            AnalyzedToken analyzedToken = (AnalyzedToken) readings.get(size);
            if (!"SENT_END".equals(analyzedToken.getPOSTag()) && !PosTagHelper.hasPosTag(analyzedToken, pattern)) {
                analyzedTokenReadings.removeReading(analyzedToken);
            }
        }
    }

    private static AnalyzedTokenReadings getInitialReadings(AnalyzedTokenReadings analyzedTokenReadings, AnalyzedTokenReadings analyzedTokenReadings2, String str) {
        ArrayList arrayList = new ArrayList();
        Iterator it = analyzedTokenReadings2.getReadings().iterator();
        while (it.hasNext()) {
            String pOSTag = ((AnalyzedToken) it.next()).getPOSTag();
            if (pOSTag != null && pOSTag.contains(LAST_NAME_TAG)) {
                String token = analyzedTokenReadings.getAnalyzedToken(0).getToken();
                arrayList.add(new AnalyzedToken(token, pOSTag.replace(LAST_NAME_TAG, ":" + str + ":abbr"), token));
            }
        }
        return new AnalyzedTokenReadings(arrayList, analyzedTokenReadings.getStartPos());
    }

    private static boolean isInitial(AnalyzedTokenReadings[] analyzedTokenReadingsArr, int i) {
        return analyzedTokenReadingsArr[i].getToken().endsWith(".") && INITIAL_REGEX.matcher(analyzedTokenReadingsArr[i].getToken()).matches();
    }
}
