package de.tudarmstadt.ukp.jwktl.parser.ru.wikokit.base.wikt.multi.ru;

import de.tudarmstadt.ukp.jwktl.parser.ru.wikokit.base.wikipedia.language.LanguageType;
import de.tudarmstadt.ukp.jwktl.parser.ru.wikokit.base.wikipedia.text.WikiParser;
import de.tudarmstadt.ukp.jwktl.parser.ru.wikokit.base.wikipedia.util.StringUtilRegular;
import de.tudarmstadt.ukp.jwktl.parser.ru.wikokit.base.wikt.constant.POS;
import de.tudarmstadt.ukp.jwktl.parser.ru.wikokit.base.wikt.util.LangText;
import de.tudarmstadt.ukp.jwktl.parser.ru.wikokit.base.wikt.util.POSText;
import java.util.ArrayList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/* loaded from: input_file:de/tudarmstadt/ukp/jwktl/parser/ru/wikokit/base/wikt/multi/ru/WPOSRu.class */
public class WPOSRu {
    private static final POSText[] NULL_POS_TEXT_ARRAY = new POSText[0];
    private static final Pattern ptrn_2nd_level = Pattern.compile("(?m)^==\\s*([^=]+?)\\s*==\\s*");
    private static final Pattern ptrn_title_add_template = Pattern.compile("(?m)^\\Q{{заголовок|\\E([^|]+?)\\Q|add=\\E([^}]{1,4})\\s*");
    private static final Pattern ptrn_title_add_template_without_lang = Pattern.compile("(?m)^\\Q{{заголовок|add=\\E([^}]{1,4})\\s*");
    private static final Pattern ptrn_morpho_then_2letters = Pattern.compile("===\\s*Морфологические и синтаксические свойства\\s*===\\s*\\n\\s*(..)");

    public static boolean isSecondLevelHeaderWordNotPOS(String str) {
        return str.equalsIgnoreCase("Ссылки");
    }

    public static POSText[] splitToPOSSections(String str, LangText langText) {
        int i;
        if (null == langText.text || 0 == langText.text.length()) {
            return NULL_POS_TEXT_ARRAY;
        }
        Matcher matcher = ptrn_2nd_level.matcher(langText.text.toString());
        boolean find = matcher.find();
        if (find && matcher.groupCount() > 0 && isSecondLevelHeaderWordNotPOS(matcher.group(1))) {
            find = false;
        }
        if (!find) {
            POSText[] splitToPOSWithTitleAddParameter = splitToPOSWithTitleAddParameter(str, langText);
            return splitToPOSWithTitleAddParameter.length > 0 ? splitToPOSWithTitleAddParameter : new POSText[]{guessPOS(langText.text)};
        }
        ArrayList arrayList = new ArrayList();
        StringBuffer stringBuffer = new StringBuffer();
        int i2 = 0;
        String stringBuffer2 = WikiParser.removeAcuteAccent(new StringBuffer(matcher.group(1)), LanguageType.ru).toString();
        boolean find2 = matcher.find();
        if (find2) {
            i = matcher.start();
        } else {
            i = 0;
            System.out.println("Warning: there is only one POS block, e.g. ==Verb I== for the word '" + str + "' with language code '" + langText.getLanguage().toString() + "' in WPOSRu.splitToPOSSections()");
        }
        while (find2) {
            stringBuffer.append(langText.text.substring(i2, i));
            POS guessPOSWith2ndLevelHeader = guessPOSWith2ndLevelHeader(str, stringBuffer2, stringBuffer);
            if (null != guessPOSWith2ndLevelHeader) {
                POSText pOSText = new POSText(guessPOSWith2ndLevelHeader, stringBuffer.toString());
                stringBuffer.setLength(0);
                arrayList.add(pOSText);
            } else {
                stringBuffer.append("");
            }
            stringBuffer2 = WikiParser.removeAcuteAccent(new StringBuffer(matcher.group(1)), LanguageType.ru).toString();
            find2 = matcher.find();
            if (find2) {
                i2 = i;
                i = matcher.start();
            }
        }
        stringBuffer.append(langText.text.substring(i));
        POS guessPOSWith2ndLevelHeader2 = guessPOSWith2ndLevelHeader(str, stringBuffer2, stringBuffer);
        if (null != guessPOSWith2ndLevelHeader2) {
            POSText pOSText2 = new POSText(guessPOSWith2ndLevelHeader2, stringBuffer.toString());
            stringBuffer.setLength(0);
            arrayList.add(pOSText2);
        }
        return (POSText[]) arrayList.toArray(NULL_POS_TEXT_ARRAY);
    }

    private static boolean isValidLanguageCode(String str, LangText langText, String str2) {
        if (str2.startsWith("add=")) {
            return true;
        }
        if (null != str2 && str2.length() >= 2 && LanguageType.has(str2)) {
            if (LanguageType.get(str2) == langText.getLanguage()) {
                return true;
            }
            System.out.println("Error: language code '" + str2 + "' != '" + langText.getLanguage().toString() + "' (in {{заголовок|YY|add=..}} and {{-XX-}}) for the word '" + str + "' in WPOSRu.splitToPOSWithTitleAddParameter()");
            return false;
        }
        if (null == str2) {
            System.out.println("Error: null language code in {{заголовок|lang_code|add=..}} for the word '" + str + "' in WPOSRu.splitToPOSWithTitleAddParameter()");
            return false;
        }
        System.out.println("Error: unknown language code '" + str2 + "' in {{заголовок|lang_code|add=..}} for the word '" + str + "' in WPOSRu.splitToPOSWithTitleAddParameter()");
        return false;
    }

    private static POSText[] splitToPOSWithTitleAddParameter(String str, LangText langText) {
        Matcher matcher;
        boolean z;
        int i;
        String stringBuffer = langText.text.toString();
        if (stringBuffer.contains("{{заголовок|add=")) {
            matcher = ptrn_title_add_template_without_lang.matcher(stringBuffer);
            z = false;
        } else {
            matcher = ptrn_title_add_template.matcher(stringBuffer);
            z = true;
        }
        if (!matcher.find()) {
            return NULL_POS_TEXT_ARRAY;
        }
        ArrayList arrayList = new ArrayList();
        StringBuffer stringBuffer2 = new StringBuffer();
        if (z && !isValidLanguageCode(str, langText, matcher.group(1))) {
            return NULL_POS_TEXT_ARRAY;
        }
        int i2 = 0;
        boolean find = matcher.find();
        if (find) {
            i = matcher.start();
        } else {
            i = 0;
            System.out.println("Warning: there is only one POS block, e.g. {{заголовок|...|add=I}} for the word '" + str + "' with language code  '" + langText.getLanguage().toString() + "' in WPOSRu.splitToPOSSections()");
        }
        while (find) {
            stringBuffer2.append(langText.text.substring(i2, i));
            POSText guessPOS = guessPOS(stringBuffer2);
            if (null != guessPOS.getPOSType()) {
                arrayList.add(guessPOS);
                stringBuffer2.setLength(0);
            } else {
                stringBuffer2.append("");
            }
            find = matcher.find();
            if (find) {
                i2 = i;
                i = matcher.start();
            }
        }
        stringBuffer2.append(langText.text.substring(i));
        POSText guessPOS2 = guessPOS(stringBuffer2);
        if (null != guessPOS2.getPOSType()) {
            stringBuffer2.setLength(0);
            arrayList.add(guessPOS2);
        }
        return (POSText[]) arrayList.toArray(NULL_POS_TEXT_ARRAY);
    }

    public static POSText guessPOS(StringBuffer stringBuffer) {
        POS pos = POS.unknown;
        if (null == stringBuffer || 0 == stringBuffer.length()) {
            return new POSText(pos, "");
        }
        Matcher matcher = ptrn_morpho_then_2letters.matcher(stringBuffer.toString());
        if (matcher.find()) {
            if (matcher.group(1).equalsIgnoreCase("{{")) {
                String lowerCase = StringUtilRegular.getLettersTillSpaceHyphenOrPipe(stringBuffer.substring(matcher.end())).toLowerCase();
                if (POSTemplateRu.has(lowerCase)) {
                    pos = checkIfSuchPOSExist(lowerCase);
                }
            }
        } else if (isPhrasePOS(stringBuffer)) {
            pos = POS.phrase;
        }
        return new POSText(pos, new StringBuffer(stringBuffer));
    }

    private static boolean isPhrasePOS(StringBuffer stringBuffer) {
        return stringBuffer.toString().contains("{{phrase");
    }

    public static POS checkIfSuchPOSExist(String str) {
        int i = 0;
        while (true) {
            if (i >= str.length()) {
                break;
            }
            if (" |}-".indexOf(str.charAt(i)) >= 0) {
                str = str.substring(0, i);
                break;
            }
            i++;
        }
        return POSTemplateRu.has(str) ? POSTemplateRu.get(str) : POSTemplateRu.isPOSIn(str.toLowerCase());
    }

    public static POS guessPOSWith2ndLevelHeader(String str, String str2, StringBuffer stringBuffer) {
        POSText guessPOS = guessPOS(stringBuffer);
        if (POS.unknown != guessPOS.getPOSType() || null == stringBuffer || 0 == stringBuffer.length()) {
            return guessPOS.getPOSType();
        }
        String lowerCase = str2.toLowerCase();
        if (POSTemplateRu.has(lowerCase)) {
            return POSTemplateRu.get(lowerCase);
        }
        String lettersTillSpace = StringUtilRegular.getLettersTillSpace(lowerCase);
        if (POSTemplateRu.has(lettersTillSpace)) {
            return POSTemplateRu.get(lettersTillSpace);
        }
        if (str.equalsIgnoreCase(lettersTillSpace)) {
            return POS.unknown;
        }
        return null;
    }
}
