package com.google.refine.clustering.binning;

import com.google.common.collect.ImmutableMap;
import com.google.refine.browsing.facets.ScatterplotFacet;
import java.text.Normalizer;
import java.util.regex.Pattern;
import java.util.stream.Collectors;

/* loaded from: input_file:com/google/refine/clustering/binning/FingerprintKeyer.class */
public class FingerprintKeyer extends Keyer {
    static final Pattern punctctrl = Pattern.compile("\\p{Punct}|[\\x00-\\x08\\x0E-\\x1F\\x7F\\x80-\\x84\\x86-\\x9F]", 256);
    public static final Pattern DIACRITICS_AND_FRIENDS = Pattern.compile("[\\p{InCombiningDiacriticalMarks}\\p{IsLm}\\p{IsSk}]+");
    private static final Pattern WHITESPACE = Pattern.compile("\\s+", 256);
    private static final ImmutableMap<String, String> NONDIACRITICS = ImmutableMap.builder().put("ß", "ss").put("æ", "ae").put("ø", "oe").put("å", "aa").put("©", "c").put("ð", "d").put("đ", "d").put("ɖ", "d").put("þ", "th").put("ƿ", "w").put("ħ", "h").put("ı", "i").put("ĸ", "k").put("ł", ScatterplotFacet.SIZE).put("ŋ", "n").put("ſ", "s").put("ŧ", "t").put("œ", "oe").put("ẜ", "s").put("ẝ", "s").build();

    @Override // com.google.refine.clustering.binning.Keyer
    public String key(String str, Object... objArr) {
        if (str == null || (objArr != null && objArr.length > 0)) {
            throw new IllegalArgumentException("Fingerprint keyer accepts a single string parameter");
        }
        return (String) WHITESPACE.splitAsStream(normalize(str, true)).sorted().distinct().collect(Collectors.joining(" "));
    }

    protected String normalize(String str) {
        return normalize(str, false);
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public String normalize(String str, boolean z) {
        if (z) {
            str = str.trim().toLowerCase();
        }
        String stripNonDiacritics = stripNonDiacritics(stripDiacritics(str));
        if (z) {
            stripNonDiacritics = punctctrl.matcher(stripNonDiacritics).replaceAll("");
        }
        return stripNonDiacritics;
    }

    @Deprecated
    protected String asciify(String str) {
        return normalize(str);
    }

    protected static String stripDiacritics(String str) {
        return DIACRITICS_AND_FRIENDS.matcher(Normalizer.normalize(str, Normalizer.Form.NFKD)).replaceAll("");
    }

    private static String stripNonDiacritics(String str) {
        StringBuffer stringBuffer = new StringBuffer();
        for (int i = 0; i < str.length(); i++) {
            String substring = str.substring(i, i + 1);
            String str2 = (String) NONDIACRITICS.get(substring);
            stringBuffer.append(str2 == null ? substring : str2);
        }
        return stringBuffer.toString();
    }
}
