package edu.stanford.nlp.international.german.process;

import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.process.CoreLabelProcessor;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.function.Function;

/* loaded from: input_file:edu/stanford/nlp/international/german/process/GermanTokenizerPostProcessor.class */
public class GermanTokenizerPostProcessor extends CoreLabelProcessor {
    public Function<CoreLabel, Boolean> afterIsEmpty = coreLabel -> {
        return Boolean.valueOf(coreLabel.containsKey(CoreAnnotations.AfterAnnotation.class) && coreLabel.after().equals(""));
    };
    public HashSet<String> ordinalPredictingWords = new HashSet<>(Arrays.asList("Januar", "Februar", "März", "April", "Mai", "Juni", "Juli", "August", "September", "Oktober", "November", "Dezember", "Jahrhundert"));
    public HashSet<String> germanAbbreviations = new HashSet<>(Arrays.asList("bzw", "jap", "usw", "ca"));

    public static void mergeTokens(CoreLabel coreLabel, CoreLabel coreLabel2) {
        coreLabel.setWord(coreLabel.word() + coreLabel2.word());
        coreLabel.setAfter(coreLabel2.after());
        coreLabel.setEndPosition(coreLabel2.endPosition());
        coreLabel.setValue(coreLabel.word() + "-" + coreLabel.sentIndex());
    }

    public static void condenseUmlauts(CoreLabel coreLabel) {
        String condenseUmlauts = condenseUmlauts(coreLabel.value());
        if (condenseUmlauts != null) {
            coreLabel.setValue(condenseUmlauts);
        }
        String condenseUmlauts2 = condenseUmlauts(coreLabel.word());
        if (condenseUmlauts2 != null) {
            coreLabel.setWord(condenseUmlauts2);
        }
    }

    public static String condenseUmlauts(String str) {
        StringBuilder sb = null;
        for (int i = 0; i < str.length(); i++) {
            char charAt = str.charAt(i);
            if (charAt == 776) {
                if (sb == null) {
                    sb = new StringBuilder(str.length());
                    sb.append(str.substring(0, i));
                }
                char charAt2 = sb.charAt(sb.length() - 1);
                if (charAt2 == 'a') {
                    sb.setCharAt(sb.length() - 1, (char) 228);
                } else if (charAt2 == 'A') {
                    sb.setCharAt(sb.length() - 1, (char) 196);
                } else if (charAt2 == 'o') {
                    sb.setCharAt(sb.length() - 1, (char) 246);
                } else if (charAt2 == 'O') {
                    sb.setCharAt(sb.length() - 1, (char) 214);
                } else if (charAt2 == 'u') {
                    sb.setCharAt(sb.length() - 1, (char) 252);
                } else if (charAt2 == 'U') {
                    sb.setCharAt(sb.length() - 1, (char) 220);
                } else {
                    sb.append(charAt);
                }
            } else if (sb != null) {
                sb.append(charAt);
            }
        }
        if (sb != null) {
            return sb.toString();
        }
        return null;
    }

    @Override // edu.stanford.nlp.process.CoreLabelProcessor
    public List<CoreLabel> process(List<CoreLabel> list) {
        ArrayList arrayList = new ArrayList();
        if (list.size() > 0) {
            arrayList.add(list.get(0));
        }
        for (int i = 1; i < list.size(); i++) {
            CoreLabel coreLabel = list.get(i);
            CoreLabel coreLabel2 = (CoreLabel) arrayList.get(arrayList.size() - 1);
            boolean booleanValue = this.afterIsEmpty.apply(coreLabel2).booleanValue();
            boolean z = coreLabel2.word().matches("[0-9]+") && coreLabel.word().equals(".") && i + 1 < list.size() && this.ordinalPredictingWords.contains(list.get(i + 1).word());
            boolean z2 = coreLabel2.word().matches("[0-9]+") && coreLabel.word().equals("-") && i + 1 < list.size() && list.get(i + 1).word().matches("[0-9]+");
            boolean z3 = coreLabel2.word().matches("[0-9]+-") && coreLabel.word().matches("[0-9]+");
            boolean z4 = this.germanAbbreviations.contains(coreLabel2.word()) && coreLabel.word().equals(".");
            if (booleanValue && (z || z2 || z3 || z4)) {
                mergeTokens(coreLabel2, coreLabel);
            } else {
                arrayList.add(coreLabel);
            }
        }
        Iterator it = arrayList.iterator();
        while (it.hasNext()) {
            condenseUmlauts((CoreLabel) it.next());
        }
        return arrayList;
    }

    @Override // edu.stanford.nlp.process.CoreLabelProcessor
    public List<CoreLabel> restore(List<CoreLabel> list, List<CoreLabel> list2) {
        return list;
    }
}
