package com.googlecode.clearnlp.morphology;

import com.googlecode.clearnlp.constituent.CTLibEn;
import com.googlecode.clearnlp.util.pair.Pair;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Pattern;
import jregex.MatchResult;
import jregex.Replacer;
import jregex.Substitution;
import jregex.TextBuffer;

/* loaded from: input_file:com/googlecode/clearnlp/morphology/MPLib.class */
public class MPLib {
    public static final Pattern PUNCT_CHAR = Pattern.compile("\\p{Punct}");
    public static final Pattern PUNCT_ONLY = Pattern.compile("^\\p{Punct}+$");
    public static final Pattern PUNCT_PERIOD = Pattern.compile("^(\\.|\\?|\\!)+$");
    public static final jregex.Pattern PUNCT_REPEAT = new jregex.Pattern("\\.{2,}|\\!{2,}|\\?{2,}|\\-{2,}|\\*{2,}|\\={2,}|\\~{2,}|\\,{2,}");
    public static final Replacer PUNCT_REPEAT_REPLACE = PUNCT_REPEAT.replacer(new Substitution() { // from class: com.googlecode.clearnlp.morphology.MPLib.1
        public void appendSubstitution(MatchResult matchResult, TextBuffer textBuffer) {
            char charAt = matchResult.group(0).charAt(0);
            textBuffer.append(charAt);
            textBuffer.append(charAt);
        }
    });
    public static final Pattern DIGIT_SPAN = Pattern.compile("\\d+");
    public static final Pattern DIGIT_ONLY = Pattern.compile("^\\d+$");
    public static final Pattern DIGIT_LIKE = Pattern.compile("\\d%|\\$\\d|(^|\\d)\\.\\d|\\d,\\d|\\d:\\d|\\d-\\d|\\d\\/\\d");
    public static final Pattern ALPHA_CHAR = Pattern.compile("\\p{Alpha}");
    public static final Pattern ALNUM_CHAR = Pattern.compile("\\p{Alnum}");
    public static final Pattern WHITE_SPAN = Pattern.compile("\\s+");
    public static final jregex.Pattern URL_SPAN = new jregex.Pattern("((([A-Za-z]{3,9}:(?:\\/\\/)?)(?:[-;:&=\\+\\$,\\w]+@)?[A-Za-z0-9.-]+|(?:www.|[-;:&=\\+\\$,\\w]+@)[A-Za-z0-9.-]+)((?:\\/[\\+~%\\/.\\w-_]*)?\\??(?:[-\\+=&;%@.\\w_]*)#?(?:[.\\!\\/\\\\w]*))?|(\\w+\\.)+(com|edu|gov|int|mil|net|org|biz)$)");
    public static final Pattern FILE_EXTS = Pattern.compile("\\S+\\.(3gp|7z|ace|ai(f){0,2}|amr|asf|asp(x)?|asx|avi|bat|bin|bmp|bup|cab|cbr|cd(a|l|r)|chm|dat|divx|dll|dmg|doc|dss|dvf|dwg|eml|eps|exe|fl(a|v)|gif|gz|hqx|(s)?htm(l)?|ifo|indd|iso|jar|jsp|jp(e)?g|lnk|log|m4(a|b|p|v)|mcd|mdb|mid|mov|mp(2|3|4)|mp(e)?g|ms(i|wmm)|ogg|pdf|php|png|pps|ppt|ps(d|t)?|ptb|pub|qb(b|w)|qxd|ra(m|r)|rm(vb)?|rtf|se(a|s)|sit(x)?|sql|ss|swf|tgz|tif|torrent|ttf|txt|vcd|vob|wav|wm(a|v)|wp(d|s)|xls|xml|xtm|zip)$");
    protected static final List<Pair<Pattern, String>> BRACKET_LIST = new ArrayList<Pair<Pattern, String>>() { // from class: com.googlecode.clearnlp.morphology.MPLib.2
        {
            add(new Pair(Pattern.compile(CTLibEn.POS_LRB), "("));
            add(new Pair(Pattern.compile(CTLibEn.POS_RRB), ")"));
            add(new Pair(Pattern.compile("-LSB-"), "["));
            add(new Pair(Pattern.compile("-RSB-"), "]"));
            add(new Pair(Pattern.compile("-LCB-"), "{"));
            add(new Pair(Pattern.compile("-RCB-"), "}"));
            trimToSize();
        }
    };

    public static boolean containsURL(String str) {
        return URL_SPAN.matcher(str).find();
    }

    public static String[] splitWhiteSpaces(String str) {
        return WHITE_SPAN.split(str);
    }

    public static String normalizeBasic(String str) {
        return containsURL(str) ? "#url#" : normalizePunctuation(normalizeDigits(str));
    }

    public static String normalizeDigits(String str) {
        return DIGIT_SPAN.matcher(DIGIT_LIKE.matcher(str).replaceAll(CTLibEn.EC_ZERO)).replaceAll(CTLibEn.EC_ZERO);
    }

    public static String normalizePunctuation(String str) {
        return PUNCT_REPEAT_REPLACE.replace(str);
    }

    public static String revertBracket(String str) {
        for (Pair<Pattern, String> pair : BRACKET_LIST) {
            str = pair.o1.matcher(str).replaceAll(pair.o2);
        }
        return str;
    }

    public static boolean containsAnyPunctuation(String str) {
        return PUNCT_CHAR.matcher(str).find();
    }

    public static boolean containsOnlyPunctuation(String str) {
        return PUNCT_ONLY.matcher(str).find();
    }

    public static boolean containsAnySpecificPunctuation(String str, char... cArr) {
        int length = str.length();
        for (int i = 0; i < length; i++) {
            for (char c : cArr) {
                if (str.charAt(i) == c) {
                    return true;
                }
            }
        }
        return false;
    }

    public static boolean containsOnlyDigits(String str) {
        return DIGIT_ONLY.matcher(str).find();
    }

    public static boolean isPeriodLike(String str) {
        if (PUNCT_PERIOD.matcher(str).find()) {
            return true;
        }
        if (str.length() <= 1 || str.charAt(0) != '/') {
            return false;
        }
        return PUNCT_PERIOD.matcher(str.substring(1)).find();
    }

    public static boolean isAlpha(String str) {
        return ALPHA_CHAR.matcher(str).find();
    }

    public static boolean isAlnum(String str) {
        return ALNUM_CHAR.matcher(str).find();
    }
}
