package de.tudarmstadt.ukp.jwktl.parser.ru.wikokit.base.wikipedia.text;

import de.tudarmstadt.ukp.jwktl.parser.ru.wikokit.base.wikipedia.language.LanguageType;
import de.tudarmstadt.ukp.jwktl.parser.ru.wikokit.base.wikipedia.language.WikimediaSisterProject;
import de.tudarmstadt.ukp.jwktl.parser.ru.wikokit.base.wikipedia.util.StringUtil;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/* loaded from: input_file:de/tudarmstadt/ukp/jwktl/parser/ru/wikokit/base/wikipedia/text/WikiParser.class */
public class WikiParser {
    private static final Pattern ptrn_remove_interwiki = Pattern.compile("\\[\\[\\w\\w\\w?:.+?\\]\\]");
    private static final Pattern ptrn_remove_brackets_in_interwiki = Pattern.compile("\\[\\[\\w\\w\\w?:(.+?)\\]\\]");
    private static final Pattern ptrn_remove_category_en = Pattern.compile("\\[\\[Category:.+?\\]\\]");
    private static final Pattern ptrn_remove_category_ru = Pattern.compile("\\[\\[Категория:.+?\\]\\]");
    private static final Pattern ptrn_tag_code = Pattern.compile("<code>.+?</code>", 32);
    private static final Pattern ptrn_html_comment = Pattern.compile("<!--.+?-->", 32);
    private static final Pattern ptrn_pre_code = Pattern.compile("<pre>.+?</pre>", 32);
    private static final Pattern ptrn_source_code = Pattern.compile("<source.+?</source>", 32);
    private static final Pattern ptrn_remove_brackets_in_wikilinks = Pattern.compile("\\[\\[([^:|]+?)\\]\\]");
    private static final Pattern ptrn_double_brackets = Pattern.compile("\\[\\[(.+?)\\]\\]");
    private static final Pattern ptrn_single_brackets = Pattern.compile("\\[(.+?)\\]");
    private static final Pattern ptrn_double_curly_brackets = Pattern.compile("\\{\\{([^\\{]+?)\\}\\}", 32);
    private static final Pattern ptrn_accent_sign = Pattern.compile("́");
    private static final Pattern ptrn_triple_apostrophe = Pattern.compile("'''(.+?)'''");
    private static final Pattern ptrn_double_apostrophe = Pattern.compile("''(.+?)''");
    private static final Pattern ptrn_site_name = Pattern.compile("(\\A|\\s)\\S+?[.]\\S+?[^.]([\\s,!?]|\\z)");
    private static final StringBuffer NULL_STRINGBUFFER = new StringBuffer("");

    public static StringBuffer removeInterwiki(StringBuffer stringBuffer) {
        return new StringBuffer(ptrn_remove_interwiki.matcher(stringBuffer.toString()).replaceAll(""));
    }

    public static StringBuffer removeBracketsInInterwiki(StringBuffer stringBuffer) {
        Matcher matcher = ptrn_remove_brackets_in_interwiki.matcher(stringBuffer.toString());
        StringBuffer stringBuffer2 = new StringBuffer();
        for (boolean find = matcher.find(); find; find = matcher.find()) {
            matcher.appendReplacement(stringBuffer2, "$1");
        }
        matcher.appendTail(stringBuffer2);
        return stringBuffer2;
    }

    public static StringBuffer removeCategory(StringBuffer stringBuffer, LanguageType languageType) {
        Matcher matcher = null;
        if (languageType.equals("en") || languageType.equals("simple")) {
            matcher = ptrn_remove_category_en.matcher(stringBuffer.toString());
        } else if (languageType.equals("ru")) {
            matcher = ptrn_remove_category_ru.matcher(stringBuffer.toString());
        }
        return new StringBuffer(matcher.replaceAll(""));
    }

    public static StringBuffer removeXMLTag(StringBuffer stringBuffer, String str) {
        return (null == str || str.length() == 0) ? stringBuffer : new StringBuffer(Pattern.compile("<" + str + ">.+?</" + str + ">", 32).matcher(stringBuffer.toString()).replaceAll(""));
    }

    public static StringBuffer removeXMLTagCode(StringBuffer stringBuffer) {
        return new StringBuffer(ptrn_tag_code.matcher(stringBuffer.toString()).replaceAll(""));
    }

    public static StringBuffer removeHTMLComments(StringBuffer stringBuffer) {
        return new StringBuffer(ptrn_html_comment.matcher(stringBuffer.toString()).replaceAll(""));
    }

    public static StringBuffer removePreCode(StringBuffer stringBuffer) {
        return new StringBuffer(ptrn_pre_code.matcher(stringBuffer.toString()).replaceAll(""));
    }

    public static StringBuffer removeSourceCode(StringBuffer stringBuffer) {
        return new StringBuffer(ptrn_source_code.matcher(stringBuffer.toString()).replaceAll(""));
    }

    @Deprecated
    public static StringBuffer removeBracketsInWikiLink(StringBuffer stringBuffer) {
        Matcher matcher = ptrn_remove_brackets_in_wikilinks.matcher(stringBuffer.toString());
        StringBuffer stringBuffer2 = new StringBuffer();
        for (boolean find = matcher.find(); find; find = matcher.find()) {
            matcher.appendReplacement(stringBuffer2, "$1");
        }
        matcher.appendTail(stringBuffer2);
        return stringBuffer2;
    }

    public static StringBuffer parseSingleBrackets(StringBuffer stringBuffer) {
        if (null == stringBuffer || 0 == stringBuffer.length()) {
            return NULL_STRINGBUFFER;
        }
        Matcher matcher = ptrn_single_brackets.matcher(stringBuffer.toString());
        StringBuffer stringBuffer2 = new StringBuffer();
        for (boolean find = matcher.find(); find; find = matcher.find()) {
            String stringBuffer3 = StringUtil.escapeCharDollarAndBackslash(matcher.group(1)).toString();
            if (stringBuffer3.contains(" ")) {
                String trim = ptrn_site_name.matcher(StringUtil.getTextAfterFirstSpace(stringBuffer3)).replaceAll(" ").trim();
                if (trim.length() > 0) {
                    matcher.appendReplacement(stringBuffer2, " ");
                    stringBuffer2.append(trim);
                } else {
                    matcher.appendReplacement(stringBuffer2, "");
                }
            } else {
                matcher.appendReplacement(stringBuffer2, "");
            }
        }
        matcher.appendTail(stringBuffer2);
        return stringBuffer2;
    }

    public static StringBuffer parseDoubleBrackets(StringBuffer stringBuffer, LanguageType languageType, boolean z) {
        if (null == stringBuffer || 0 == stringBuffer.length()) {
            return NULL_STRINGBUFFER;
        }
        Matcher matcher = ptrn_double_brackets.matcher(stringBuffer.toString());
        StringBuffer stringBuffer2 = new StringBuffer();
        for (boolean find = matcher.find(); find; find = matcher.find()) {
            String stringBuffer3 = StringUtil.escapeCharDollarAndBackslash(matcher.group(1)).toString();
            if (-1 != stringBuffer3.indexOf(58)) {
                String textBeforeFirstColumn = StringUtil.getTextBeforeFirstColumn(stringBuffer3);
                String textAfterFirstColumn = StringUtil.getTextAfterFirstColumn(stringBuffer3);
                if (((languageType.equals("en") || languageType.equals("simple")) && textBeforeFirstColumn.equalsIgnoreCase("Category")) || (languageType.equals("ru") && textBeforeFirstColumn.equalsIgnoreCase("Категория"))) {
                    matcher.appendReplacement(stringBuffer2, "");
                } else if (WikimediaSisterProject.existsCode(textBeforeFirstColumn)) {
                    matcher.appendReplacement(stringBuffer2, WikimediaSisterProject.getLinkText(textBeforeFirstColumn, textAfterFirstColumn));
                } else if (LanguageType.has(textBeforeFirstColumn)) {
                    if (z) {
                        matcher.appendReplacement(stringBuffer2, "");
                    } else {
                        matcher.appendReplacement(stringBuffer2, textAfterFirstColumn);
                    }
                }
            } else if (-1 != stringBuffer3.indexOf(124)) {
                StringUtil.getTextBeforeFirstVerticalPipe(stringBuffer3);
                matcher.appendReplacement(stringBuffer2, StringUtil.getTextAfterFirstVerticalPipe(stringBuffer3));
            } else {
                matcher.appendReplacement(stringBuffer2, stringBuffer3);
            }
        }
        matcher.appendTail(stringBuffer2);
        return stringBuffer2;
    }

    public static StringBuffer parseCurlyBrackets(StringBuffer stringBuffer) {
        if (null == stringBuffer || 0 == stringBuffer.length()) {
            return NULL_STRINGBUFFER;
        }
        Matcher matcher = ptrn_double_curly_brackets.matcher(stringBuffer.toString());
        boolean find = matcher.find();
        if (!find) {
            return stringBuffer;
        }
        StringBuffer stringBuffer2 = new StringBuffer();
        while (find) {
            matcher.appendReplacement(stringBuffer2, "");
            find = matcher.find();
        }
        matcher.appendTail(stringBuffer2);
        return stringBuffer2;
    }

    private static StringBuffer parseBounds(StringBuffer stringBuffer, Pattern pattern) {
        if (null == stringBuffer || 0 == stringBuffer.length()) {
            return NULL_STRINGBUFFER;
        }
        Matcher matcher = pattern.matcher(stringBuffer.toString());
        boolean find = matcher.find();
        if (!find) {
            return stringBuffer;
        }
        StringBuffer stringBuffer2 = new StringBuffer();
        while (find) {
            matcher.appendReplacement(stringBuffer2, StringUtil.escapeCharDollarAndBackslash(matcher.group(1)).toString());
            find = matcher.find();
        }
        matcher.appendTail(stringBuffer2);
        return stringBuffer2;
    }

    public static StringBuffer parseDoubleApostrophe(StringBuffer stringBuffer) {
        return parseBounds(stringBuffer, ptrn_double_apostrophe);
    }

    public static StringBuffer parseTripleApostrophe(StringBuffer stringBuffer) {
        return parseBounds(stringBuffer, ptrn_triple_apostrophe);
    }

    public static StringBuffer removeAcuteAccent(StringBuffer stringBuffer, LanguageType languageType) {
        return languageType != LanguageType.ru ? stringBuffer : (null == stringBuffer || 0 == stringBuffer.length()) ? NULL_STRINGBUFFER : new StringBuffer(ptrn_accent_sign.matcher(stringBuffer.toString()).replaceAll(""));
    }

    public static StringBuffer convertWikiToText(StringBuffer stringBuffer, LanguageType languageType, boolean z) {
        return (null == stringBuffer || 0 == stringBuffer.length()) ? NULL_STRINGBUFFER : new StringBuffer(XMLTagsParser.replaceCharFromXML(parseSingleBrackets(parseDoubleBrackets(ImageParser.parseImageDescription(parseDoubleApostrophe(parseTripleApostrophe(removeAcuteAccent(TableParser.removeWikiTables(parseCurlyBrackets(parseCurlyBrackets(ReferenceParser.expandMoveToEndOfText(removeXMLTagCode(removeSourceCode(removePreCode(removeHTMLComments(stringBuffer)))))))), languageType))), languageType), languageType, z)).toString().trim(), ' '));
    }
}
