package org.apdplat.word.segmentation.impl;

import java.util.ArrayList;
import java.util.List;
import java.util.regex.Pattern;
import org.apdplat.word.segmentation.Segmentation;
import org.apdplat.word.segmentation.SegmentationAlgorithm;
import org.apdplat.word.segmentation.Word;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/apdplat/word/segmentation/impl/PureEnglish.class */
public class PureEnglish implements Segmentation {
    private static final Logger LOGGER = LoggerFactory.getLogger(PureEnglish.class);
    private static final Pattern NUMBER = Pattern.compile("\\d+");
    private static final Pattern UNICODE = Pattern.compile("[uU][0-9a-fA-F]{4}");

    @Override // org.apdplat.word.segmentation.Segmentation
    public SegmentationAlgorithm getSegmentationAlgorithm() {
        return SegmentationAlgorithm.PureEnglish;
    }

    @Override // org.apdplat.word.segmentation.Segmentation
    public List<Word> seg(String str) {
        ArrayList arrayList = new ArrayList();
        for (String str2 : str.trim().split("[^a-zA-Z0-9]")) {
            if (!"".equals(str2) && str2.length() >= 2) {
                ArrayList arrayList2 = new ArrayList();
                if (str2.length() < 6 || ((Character.isUpperCase(str2.charAt(str2.length() - 1)) && Character.isUpperCase(str2.charAt(0))) || NUMBER.matcher(str2).find() || isAllUpperCase(str2))) {
                    str2 = str2.toLowerCase();
                }
                int i = 0;
                for (int i2 = 1; i2 < str2.length(); i2++) {
                    if (Character.isUpperCase(str2.charAt(i2)) && Character.isLowerCase(str2.charAt(i2 - 1))) {
                        arrayList2.add(str2.substring(i, i2));
                        i = i2;
                    }
                }
                if (i < str2.length()) {
                    arrayList2.add(str2.substring(i, str2.length()));
                }
                arrayList2.stream().map(str3 -> {
                    return str3.toLowerCase();
                }).forEach(str4 -> {
                    String irregularity;
                    if (str4.length() >= 2 && (irregularity = irregularity(str4)) != null) {
                        arrayList.add(new Word(irregularity));
                    }
                });
            }
        }
        return arrayList;
    }

    private static String irregularity(String str) {
        if (Character.isDigit(str.charAt(0))) {
            LOGGER.debug("词以数字开头，忽略：" + str);
            return null;
        }
        if (str.startsWith("0x") || str.startsWith("0X")) {
            LOGGER.debug("词为16进制，忽略：" + str);
            return null;
        }
        if (str.endsWith("l") && isNumeric(str.substring(0, str.length() - 1))) {
            LOGGER.debug("词为long类型数字，忽略：" + str);
            return null;
        }
        if (UNICODE.matcher(str).find()) {
            LOGGER.debug("词为UNICODE字符编码，忽略：" + str);
            return null;
        }
        boolean z = -1;
        switch (str.hashCode()) {
            case 3152:
                if (str.equals("br")) {
                    z = 6;
                    break;
                }
                break;
            case 3173:
                if (str.equals("ch")) {
                    z = 2;
                    break;
                }
                break;
            case 3456:
                if (str.equals("ll")) {
                    z = false;
                    break;
                }
                break;
            case 3635:
                if (str.equals("re")) {
                    z = true;
                    break;
                }
                break;
            case 3759:
                if (str.equals("ve")) {
                    z = 3;
                    break;
                }
                break;
            case 104580:
                if (str.equals("isn")) {
                    z = 5;
                    break;
                }
                break;
            case 95759637:
                if (str.equals("doesn")) {
                    z = 4;
                    break;
                }
                break;
        }
        switch (z) {
            case false:
                return "will";
            case true:
                return "are";
            case true:
                return "chapter";
            case true:
                return "have";
            case true:
                return "does";
            case true:
                return "is";
            case true:
                return null;
            default:
                return str;
        }
    }

    private boolean isAllUpperCase(String str) {
        for (char c : str.toCharArray()) {
            if (Character.isLowerCase(c)) {
                return false;
            }
        }
        return true;
    }

    private static boolean isNumeric(String str) {
        for (char c : str.toCharArray()) {
            if (!Character.isDigit(c)) {
                return false;
            }
        }
        return true;
    }

    public static void main(String[] strArr) {
        System.out.println(new PureEnglish().seg("Your function may also be added permanently to Hive, however this requires a small modification to a Hive Java file and then rebuilding Hive."));
    }
}
