package de.tudarmstadt.ukp.jwktl.parser.util;

import java.util.ArrayList;
import java.util.List;
import java.util.regex.Pattern;

/* loaded from: input_file:de/tudarmstadt/ukp/jwktl/parser/util/WordListProcessor.class */
public class WordListProcessor {
    protected static final Pattern HTML_REMOVER = Pattern.compile("<[^>]+>");
    protected static final Pattern ESCAPE_DELIMITER1 = Pattern.compile("(\\[\\[[^\\]⁋]*)⁋([,;][^\\]⁋]*\\]\\])");
    protected static final Pattern ESCAPE_DELIMITER2 = Pattern.compile("(\\{\\{[^\\}⁋]*)⁋([,;][^\\}⁋]*\\}\\})");
    protected static final Pattern ESCAPE_DELIMITER3 = Pattern.compile("(''[^'⁋]*)⁋([,;][^'⁋]*'')");
    protected static final Pattern REFERENCE_PATTERN = Pattern.compile("<ref>.*?</ref>");
    protected static final Pattern SUPERSCRIPT_PATTERN = Pattern.compile("<sup>\\[\\d+\\]</sup>");

    protected String escapeDelimiters(String str) {
        StringBuilder sb = new StringBuilder();
        boolean z = false;
        Object obj = "";
        Object obj2 = "";
        for (char c : str.toCharArray()) {
            if (z) {
                if (c == ']' && "[[".equals(obj)) {
                    if ("]".equals(obj2)) {
                        obj2 = "";
                        obj = "";
                        z = false;
                    } else {
                        obj2 = "]";
                    }
                } else if (c == '}' && "{{".equals(obj)) {
                    if ("}".equals(obj2)) {
                        obj2 = "";
                        obj = "";
                        z = false;
                    } else {
                        obj2 = "}";
                    }
                } else if (c == '\'' && "''".equals(obj)) {
                    if ("'".equals(obj2)) {
                        obj2 = "";
                        obj = "";
                        z = false;
                    } else {
                        obj2 = "'";
                    }
                }
            } else if (c == '[') {
                if ("[".equals(obj)) {
                    obj = "[[";
                    z = true;
                } else {
                    obj = "[";
                }
            } else if (c == '{') {
                if ("{".equals(obj)) {
                    obj = "{{";
                    z = true;
                } else {
                    obj = "{";
                }
            } else if (c == '\'') {
                if ("'".equals(obj)) {
                    obj = "''";
                    z = true;
                } else {
                    obj = "'";
                }
            }
            if (!z || c != 8267) {
                sb.append(c);
            }
        }
        return sb.toString();
    }

    public List<String> splitWordList(String str) {
        int indexOf;
        ArrayList arrayList = new ArrayList();
        if (str == null) {
            return arrayList;
        }
        String replace = escapeDelimiters(HTML_REMOVER.matcher(SUPERSCRIPT_PATTERN.matcher(REFERENCE_PATTERN.matcher((str + "⁋").replace('\n', (char) 8267)).replaceAll("")).replaceAll("")).replaceAll("").replace("&quot;", "\"").replace(",", "⁋,").replace(";", "⁋;")).replace("⁋;", "⁋").replace("⁋,", "⁋").replace("]] or [[", "]]⁋[[").replace("]] and [[", "]]⁋[[").replace(" - ", "⁋");
        int indexOf2 = replace.indexOf(" / ");
        if (indexOf2 >= 0 && replace.indexOf(" / ", indexOf2) >= 0) {
            replace = replace.replace(" / ", "⁋");
        }
        do {
            indexOf = replace.indexOf(8267);
            if (indexOf >= 0) {
                String substring = replace.substring(0, indexOf);
                if (substring.length() > 0) {
                    String trim = substring.trim();
                    if (trim.toLowerCase().startsWith("see also")) {
                        trim = trim.substring(8).trim();
                    }
                    if (trim.toLowerCase().startsWith("see")) {
                        trim = trim.substring(3).trim();
                    }
                    if (trim.startsWith(":")) {
                        trim = trim.substring(1).trim();
                    }
                    String trim2 = removeComments(removeTemplates(removeBrackets(deWikify(trim).trim()).trim()).trim()).trim();
                    if (trim2.toLowerCase().startsWith("see also")) {
                        trim2 = trim2.substring(8).trim();
                    }
                    if (trim2.toLowerCase().startsWith("see")) {
                        trim2 = trim2.substring(3).trim();
                    }
                    if (trim2.startsWith(":")) {
                        trim2 = trim2.substring(1).trim();
                    }
                    if (trim2.endsWith(".")) {
                        trim2 = trim2.substring(0, trim2.length() - 1).trim();
                    }
                    if (trim2.endsWith(",")) {
                        trim2 = trim2.substring(0, trim2.length() - 1).trim();
                    }
                    String replace2 = trim2.replace(" / ", "/").replace("/ ", "/");
                    int indexOf3 = replace2.indexOf(47);
                    if (replace2.length() > 0) {
                        if (indexOf3 < 0 || replace2.indexOf(32) >= 0) {
                            arrayList.add(replace2);
                        }
                        do {
                            arrayList.add(replace2.substring(0, indexOf3));
                            replace2 = replace2.substring(indexOf3 + 1);
                            indexOf3 = replace2.indexOf(47);
                        } while (indexOf3 >= 0);
                        arrayList.add(replace2);
                    }
                }
                replace = replace.substring(indexOf + 1);
            }
        } while (indexOf >= 0);
        return arrayList;
    }

    protected String deWikify(String str) {
        StringBuilder sb = new StringBuilder();
        String str2 = str;
        while (true) {
            int indexOf = str2.indexOf("[[");
            if (indexOf < 0) {
                sb.append(str2);
                return sb.toString();
            }
            sb.append(str2.substring(0, indexOf));
            str2 = str2.substring(indexOf + 2);
            int indexOf2 = str2.indexOf("]]");
            if (indexOf2 >= 0) {
                String substring = str2.substring(0, indexOf2);
                str2 = str2.substring(indexOf2 + 2);
                int indexOf3 = substring.indexOf(124);
                if (indexOf3 >= 0) {
                    substring = substring.substring(indexOf3 + 1);
                }
                sb.append(substring);
            }
        }
    }

    protected String removeBrackets(String str) {
        StringBuilder sb = new StringBuilder();
        String str2 = str;
        while (true) {
            int indexOf = str2.indexOf("(");
            if (indexOf < 0) {
                sb.append(str2);
                return sb.toString();
            }
            sb.append(str2.substring(0, indexOf));
            str2 = str2.substring(indexOf + 1);
            int indexOf2 = str2.indexOf(")");
            if (indexOf2 >= 0) {
                str2 = str2.substring(indexOf2 + 1);
            }
        }
    }

    protected String removeComments(String str) {
        StringBuilder sb = new StringBuilder();
        String str2 = str;
        while (true) {
            int indexOf = str2.indexOf("''");
            if (indexOf < 0) {
                sb.append(str2);
                return sb.toString();
            }
            sb.append(str2.substring(0, indexOf));
            str2 = str2.substring(indexOf + 2);
            int indexOf2 = str2.indexOf("''");
            if (indexOf2 >= 0) {
                str2 = str2.substring(indexOf2 + 2);
            }
        }
    }

    protected String removeTemplates(String str) {
        StringBuilder sb = new StringBuilder();
        String str2 = str;
        while (true) {
            int indexOf = str2.indexOf("{{");
            if (indexOf < 0) {
                sb.append(str2);
                return sb.toString().trim();
            }
            sb.append(str2.substring(0, indexOf));
            str2 = str2.substring(indexOf + 2);
            int indexOf2 = str2.indexOf("}}");
            if (indexOf2 >= 0) {
                String substring = str2.substring(0, indexOf2);
                str2 = str2.substring(indexOf2 + 2);
                int indexOf3 = substring.indexOf(124);
                if (indexOf3 >= 0) {
                    String substring2 = substring.substring(0, indexOf3);
                    if ("l".equals(substring2) || substring2.startsWith("l/")) {
                        int indexOf4 = substring.indexOf(124, indexOf3 + 1);
                        if (indexOf4 >= 0) {
                            int indexOf5 = substring.indexOf(124, indexOf4 + 1);
                            sb.append(substring.substring(indexOf4 + 1, indexOf5 == -1 ? substring.length() : indexOf5));
                        } else {
                            sb.append(substring.substring(indexOf3 + 1));
                        }
                    }
                }
            }
        }
    }
}
