package edu.stanford.nlp.trees.international.pennchinese;

import edu.stanford.nlp.io.EncodingPrintWriter;
import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.util.StringUtils;
import edu.stanford.nlp.util.logging.Redwood;
import java.io.BufferedReader;
import java.io.IOException;
import java.util.regex.Pattern;

/* loaded from: input_file:edu/stanford/nlp/trees/international/pennchinese/ChineseUtils.class */
public class ChineseUtils {
    private static final boolean ONLY_BMP = false;
    public static final String ONEWHITE = "[\\s\\p{Zs}]";
    public static final String WHITE = "[\\s\\p{Zs}]*";
    public static final String WHITEPLUS = "[\\s\\p{Zs}]+";
    public static final String NUMBERS = "[一二三四五六七八九十]";
    public static final String MID_DOT_REGEX_STR = "[··•․‧∙⋅・]";
    public static final int LEAVE = 0;
    public static final int ASCII = 1;
    public static final int NORMALIZE = 1;
    public static final int FULLWIDTH = 2;
    public static final int DELETE = 3;
    public static final int DELETE_EXCEPT_BETWEEN_ASCII = 4;
    public static final int MAX_LEGAL = 4;
    private static final Redwood.RedwoodChannels log = Redwood.channels(ChineseUtils.class);
    private static final Pattern dateChars = Pattern.compile("[年月日]+");
    private static final Pattern dateCharsPlus = Pattern.compile("[年月日号]+");
    private static final Pattern numberChars = Pattern.compile("[0-9０-９一二三四五六七八九十零〇百千万亿兩○◯〡-〩〸-〺]+");
    private static final Pattern letterChars = Pattern.compile("[A-Za-zＡ-Ｚａ-ｚ]+");
    private static final Pattern periodChars = Pattern.compile("[﹒‧．.点]+");
    private static final Pattern separatingPuncChars = Pattern.compile("[]!\"(),;:<=>?\\[\\\\`{|}~^、-〃〈-】〔-〟〰］！＂（），；：＜＝＞？［＼｀｛｜｝～＾]+");
    private static final Pattern ambiguousPuncChars = Pattern.compile("[-#$%&'*+/@_－＃＄％＆＇＊＋／＠＿]+");
    private static final Pattern midDotPattern = Pattern.compile("[··•․‧∙⋅・]+");

    private ChineseUtils() {
    }

    public static boolean isNumber(char c) {
        return StringUtils.matches(String.valueOf(c), NUMBERS) || Character.isDigit(c);
    }

    public static String normalize(String str) {
        return normalize(str, 2, 1);
    }

    public static String normalize(String str, int i, int i2) {
        return normalize(str, i, i2, 0);
    }

    public static String normalize(String str, int i, int i2, int i3) {
        if (i < 0 || i > 4 || i2 < 0 || i2 > 4) {
            throw new IllegalArgumentException("ChineseUtils: Unknown parameter option");
        }
        return normalizeUnicode(str, i, i2, i3);
    }

    /* JADX WARN: Can't fix incorrect switch cases order, some code will duplicate */
    /* JADX WARN: Failed to find 'out' block for switch in B:17:0x00dd. Please report as an issue. */
    /* JADX WARN: Failed to find 'out' block for switch in B:53:0x01e0. Please report as an issue. */
    /* JADX WARN: Removed duplicated region for block: B:30:0x016f  */
    /* JADX WARN: Removed duplicated region for block: B:33:0x017e  */
    /* JADX WARN: Removed duplicated region for block: B:36:0x018e  */
    /* JADX WARN: Removed duplicated region for block: B:39:0x019c  */
    /* JADX WARN: Removed duplicated region for block: B:52:0x01df A[PHI: r15 r17
      0x01df: PHI (r15v2 char) = 
      (r15v1 char)
      (r15v1 char)
      (r15v1 char)
      (r15v1 char)
      (r15v1 char)
      (r15v1 char)
      (r15v1 char)
      (r15v6 char)
      (r15v1 char)
      (r15v7 char)
     binds: [B:29:0x0149, B:46:0x01c9, B:51:0x01dc, B:50:0x01d9, B:37:0x0193, B:38:0x0196, B:34:0x0183, B:35:0x0186, B:31:0x0174, B:32:0x0177] A[DONT_GENERATE, DONT_INLINE]
      0x01df: PHI (r17v1 boolean) = 
      (r17v0 boolean)
      (r17v0 boolean)
      (r17v4 boolean)
      (r17v0 boolean)
      (r17v0 boolean)
      (r17v5 boolean)
      (r17v0 boolean)
      (r17v0 boolean)
      (r17v0 boolean)
      (r17v0 boolean)
     binds: [B:29:0x0149, B:46:0x01c9, B:51:0x01dc, B:50:0x01d9, B:37:0x0193, B:38:0x0196, B:34:0x0183, B:35:0x0186, B:31:0x0174, B:32:0x0177] A[DONT_GENERATE, DONT_INLINE]] */
    /* JADX WARN: Removed duplicated region for block: B:54:0x0203  */
    /* JADX WARN: Removed duplicated region for block: B:57:0x0213  */
    /* JADX WARN: Removed duplicated region for block: B:60:0x0223  */
    /* JADX WARN: Removed duplicated region for block: B:63:0x024c A[PHI: r15 r17
      0x024c: PHI (r15v3 char) = (r15v2 char), (r15v2 char), (r15v2 char), (r15v2 char), (r15v4 char), (r15v2 char), (r15v5 char) binds: [B:53:0x01e0, B:61:0x0228, B:62:0x022b, B:58:0x0218, B:59:0x021b, B:55:0x0208, B:56:0x020b] A[DONT_GENERATE, DONT_INLINE]
      0x024c: PHI (r17v2 boolean) = (r17v1 boolean), (r17v1 boolean), (r17v3 boolean), (r17v1 boolean), (r17v1 boolean), (r17v1 boolean), (r17v1 boolean) binds: [B:53:0x01e0, B:61:0x0228, B:62:0x022b, B:58:0x0218, B:59:0x021b, B:55:0x0208, B:56:0x020b] A[DONT_GENERATE, DONT_INLINE]] */
    /* JADX WARN: Removed duplicated region for block: B:65:0x0251  */
    /* JADX WARN: Removed duplicated region for block: B:68:0x0259 A[SYNTHETIC] */
    /* JADX WARN: Removed duplicated region for block: B:69:0x0231 A[SYNTHETIC] */
    /*
        Code decompiled incorrectly, please refer to instructions dump.
        To view partially-correct add '--show-bad-code' argument
    */
    private static java.lang.String normalizeBMP(java.lang.String r8, int r9, int r10, int r11) {
        /*
            Method dump skipped, instructions count: 613
            To view this dump add '--comments-level debug' option
        */
        throw new UnsupportedOperationException("Method not decompiled: edu.stanford.nlp.trees.international.pennchinese.ChineseUtils.normalizeBMP(java.lang.String, int, int, int):java.lang.String");
    }

    /* JADX WARN: Can't fix incorrect switch cases order, some code will duplicate */
    /* JADX WARN: Failed to find 'out' block for switch in B:11:0x006a. Please report as an issue. */
    /* JADX WARN: Failed to find 'out' block for switch in B:23:0x00d3. Please report as an issue. */
    /* JADX WARN: Failed to find 'out' block for switch in B:44:0x015c. Please report as an issue. */
    /* JADX WARN: Removed duplicated region for block: B:24:0x00f7  */
    /* JADX WARN: Removed duplicated region for block: B:27:0x0106  */
    /* JADX WARN: Removed duplicated region for block: B:30:0x0116  */
    /* JADX WARN: Removed duplicated region for block: B:33:0x0124  */
    /* JADX WARN: Removed duplicated region for block: B:45:0x017f  */
    /* JADX WARN: Removed duplicated region for block: B:48:0x018f  */
    /* JADX WARN: Removed duplicated region for block: B:51:0x019f  */
    /* JADX WARN: Removed duplicated region for block: B:54:0x01c8 A[PHI: r13 r15
      0x01c8: PHI (r13v3 int) = (r13v2 int), (r13v2 int), (r13v2 int), (r13v2 int), (r13v4 int), (r13v2 int), (r13v5 int) binds: [B:44:0x015c, B:52:0x01a4, B:53:0x01a7, B:49:0x0194, B:50:0x0197, B:46:0x0184, B:47:0x0187] A[DONT_GENERATE, DONT_INLINE]
      0x01c8: PHI (r15v2 boolean) = (r15v1 boolean), (r15v1 boolean), (r15v3 boolean), (r15v1 boolean), (r15v1 boolean), (r15v1 boolean), (r15v1 boolean) binds: [B:44:0x015c, B:52:0x01a4, B:53:0x01a7, B:49:0x0194, B:50:0x0197, B:46:0x0184, B:47:0x0187] A[DONT_GENERATE, DONT_INLINE]] */
    /* JADX WARN: Removed duplicated region for block: B:56:0x01cd  */
    /* JADX WARN: Removed duplicated region for block: B:59:0x01d5 A[SYNTHETIC] */
    /* JADX WARN: Removed duplicated region for block: B:60:0x01ad A[SYNTHETIC] */
    /*
        Code decompiled incorrectly, please refer to instructions dump.
        To view partially-correct add '--show-bad-code' argument
    */
    private static java.lang.String normalizeUnicode(java.lang.String r5, int r6, int r7, int r8) {
        /*
            Method dump skipped, instructions count: 492
            To view this dump add '--comments-level debug' option
        */
        throw new UnsupportedOperationException("Method not decompiled: edu.stanford.nlp.trees.international.pennchinese.ChineseUtils.normalizeUnicode(java.lang.String, int, int, int):java.lang.String");
    }

    private static boolean isMidDot(int i) {
        return i == 183 || i == 903 || i == 8226 || i == 8228 || i == 8231 || i == 8729 || i == 8901 || i == 12539;
    }

    private static boolean isAsciiLowHigh(int i) {
        return (i >= 65281 && i <= 65374) || (i >= 33 && i <= 126);
    }

    public static void main(String[] strArr) throws IOException {
        if (strArr.length < 3) {
            log.info("usage: ChineseUtils ascii space midDot word*");
            log.info("  First 3 args are int flags; a filter or maps args as words; assumes UTF-8");
            return;
        }
        int parseInt = Integer.parseInt(strArr[0]);
        int parseInt2 = Integer.parseInt(strArr[1]);
        int parseInt3 = Integer.parseInt(strArr[2]);
        if (strArr.length > 3) {
            for (int i = 3; i < strArr.length; i++) {
                EncodingPrintWriter.out.println(normalize(strArr[i], parseInt, parseInt2, parseInt3));
            }
            return;
        }
        BufferedReader readerFromStdin = IOUtils.readerFromStdin("UTF-8");
        while (true) {
            String readLine = readerFromStdin.readLine();
            if (readLine == null) {
                return;
            } else {
                EncodingPrintWriter.out.println(normalize(readLine, parseInt, parseInt2, parseInt3));
            }
        }
    }

    public static String shapeOf(CharSequence charSequence, boolean z, boolean z2) {
        String str;
        if (z && dateCharsPlus.matcher(charSequence).matches()) {
            str = "D";
        } else {
            if (charSequence.charAt(0) == 31532) {
                return "o";
            }
            str = dateChars.matcher(charSequence).matches() ? "D" : numberChars.matcher(charSequence).matches() ? "N" : letterChars.matcher(charSequence).matches() ? "L" : periodChars.matcher(charSequence).matches() ? "P" : separatingPuncChars.matcher(charSequence).matches() ? "S" : ambiguousPuncChars.matcher(charSequence).matches() ? "A" : (z2 && midDotPattern.matcher(charSequence).matches()) ? "M" : "C";
        }
        return str;
    }
}
