package org.apdplat.word.util;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.nio.file.Files;
import java.util.List;
import java.util.regex.Pattern;
import org.apdplat.word.recognition.StopWord;
import org.apdplat.word.segmentation.Segmentation;
import org.apdplat.word.segmentation.SegmentationAlgorithm;
import org.apdplat.word.segmentation.SegmentationFactory;
import org.apdplat.word.segmentation.Word;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/apdplat/word/util/Utils.class */
public class Utils {
    private static final Logger LOGGER = LoggerFactory.getLogger(Utils.class);
    private static final Pattern PATTERN_ONE = Pattern.compile("^[\\u4e00-\\u9fa5]+$");
    private static final Pattern PATTERN_TWO = Pattern.compile("^[\\u4e00-\\u9fa5]{2,}$");

    /* loaded from: input_file:org/apdplat/word/util/Utils$FileSegmentationCallback.class */
    public interface FileSegmentationCallback {
        void callback(Word word);
    }

    public static boolean isChineseCharAndLengthAtLeastOne(String str) {
        return PATTERN_ONE.matcher(str).find();
    }

    public static boolean isChineseCharAndLengthAtLeastTwo(String str) {
        return PATTERN_TWO.matcher(str).find();
    }

    public static boolean deleteDir(File file) {
        if (file.isDirectory()) {
            for (File file2 : file.listFiles()) {
                if (!deleteDir(file2)) {
                    return false;
                }
            }
        }
        return file.delete();
    }

    public static void seg(File file, File file2, boolean z, SegmentationAlgorithm segmentationAlgorithm) throws Exception {
        seg(file, file2, z, segmentationAlgorithm, null);
    }

    public static void seg(File file, File file2, boolean z, SegmentationAlgorithm segmentationAlgorithm, FileSegmentationCallback fileSegmentationCallback) throws Exception {
        LOGGER.info("开始对文件进行分词：" + file.toString());
        Segmentation segmentation = SegmentationFactory.getSegmentation(segmentationAlgorithm);
        float maxMemory = ((float) Runtime.getRuntime().maxMemory()) / 1000000.0f;
        float f = ((float) Runtime.getRuntime().totalMemory()) / 1000000.0f;
        float freeMemory = ((float) Runtime.getRuntime().freeMemory()) / 1000000.0f;
        String str = "执行之前剩余内存:" + maxMemory + "-" + f + "+" + freeMemory + "=" + ((maxMemory - f) + freeMemory);
        if (!file2.getParentFile().exists()) {
            file2.getParentFile().mkdirs();
        }
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(file), "utf-8"));
        Throwable th = null;
        try {
            BufferedWriter bufferedWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file2), "utf-8"));
            Throwable th2 = null;
            try {
                try {
                    long size = Files.size(file.toPath());
                    LOGGER.info("size:" + size);
                    LOGGER.info("文件大小：" + ((((float) size) / 1024.0f) / 1024.0f) + " MB");
                    int i = 0;
                    int i2 = 0;
                    long currentTimeMillis = System.currentTimeMillis();
                    while (true) {
                        String readLine = bufferedReader.readLine();
                        if (readLine == null) {
                            break;
                        }
                        if ("".equals(readLine.trim())) {
                            bufferedWriter.write("\n");
                        } else {
                            i += readLine.length();
                            List<Word> seg = segmentation.seg(readLine);
                            if (z) {
                                StopWord.filterStopWords(seg);
                            }
                            if (seg != null) {
                                for (Word word : seg) {
                                    if (fileSegmentationCallback != null) {
                                        fileSegmentationCallback.callback(word);
                                    }
                                    bufferedWriter.write(word.getText() + " ");
                                }
                                bufferedWriter.write("\n");
                                i2 += readLine.length();
                                if (i2 > 500000) {
                                    i2 = 0;
                                    LOGGER.info("分词进度：" + ((int) (((i * 2.0f) / ((float) size)) * 100.0f)) + "%");
                                }
                            }
                        }
                    }
                    long currentTimeMillis2 = System.currentTimeMillis() - currentTimeMillis;
                    LOGGER.info("字符数目：" + i);
                    LOGGER.info("分词耗时：" + getTimeDes(Long.valueOf(currentTimeMillis2)) + " 毫秒");
                    LOGGER.info("分词速度：" + ((float) (i / currentTimeMillis2)) + " 字符/毫秒");
                    if (bufferedWriter != null) {
                        if (0 != 0) {
                            try {
                                bufferedWriter.close();
                            } catch (Throwable th3) {
                                th2.addSuppressed(th3);
                            }
                        } else {
                            bufferedWriter.close();
                        }
                    }
                    float maxMemory2 = ((float) Runtime.getRuntime().maxMemory()) / 1000000.0f;
                    float f2 = ((float) Runtime.getRuntime().totalMemory()) / 1000000.0f;
                    float freeMemory2 = ((float) Runtime.getRuntime().freeMemory()) / 1000000.0f;
                    LOGGER.info(str);
                    LOGGER.info("执行之后剩余内存:" + maxMemory2 + "-" + f2 + "+" + freeMemory2 + "=" + ((maxMemory2 - f2) + freeMemory2));
                    LOGGER.info("将文件 " + file.toString() + " 的分词结果保存到文件 " + file2);
                } finally {
                }
            } catch (Throwable th4) {
                if (bufferedWriter != null) {
                    if (th2 != null) {
                        try {
                            bufferedWriter.close();
                        } catch (Throwable th5) {
                            th2.addSuppressed(th5);
                        }
                    } else {
                        bufferedWriter.close();
                    }
                }
                throw th4;
            }
        } finally {
            if (bufferedReader != null) {
                if (0 != 0) {
                    try {
                        bufferedReader.close();
                    } catch (Throwable th6) {
                        th.addSuppressed(th6);
                    }
                } else {
                    bufferedReader.close();
                }
            }
        }
    }

    public static String getTimeDes(Long l) {
        if (l == null) {
            return "";
        }
        int i = 1000 * 60;
        int i2 = i * 60;
        int i3 = i2 * 24;
        long longValue = l.longValue() / i3;
        long longValue2 = (l.longValue() - (longValue * i3)) / i2;
        long longValue3 = ((l.longValue() - (longValue * i3)) - (longValue2 * i2)) / i;
        long longValue4 = (((l.longValue() - (longValue * i3)) - (longValue2 * i2)) - (longValue3 * i)) / 1000;
        long longValue5 = (((l.longValue() - (longValue * i3)) - (longValue2 * i2)) - (longValue3 * i)) - (longValue4 * 1000);
        StringBuilder sb = new StringBuilder();
        if (longValue > 0) {
            sb.append(longValue).append("天,");
        }
        if (longValue2 > 0) {
            sb.append(longValue2).append("小时,");
        }
        if (longValue3 > 0) {
            sb.append(longValue3).append("分钟,");
        }
        if (longValue4 > 0) {
            sb.append(longValue4).append("秒,");
        }
        if (longValue5 > 0) {
            sb.append(longValue5).append("毫秒,");
        }
        if (sb.length() > 0) {
            sb = sb.deleteCharAt(sb.length() - 1);
        }
        return sb.toString();
    }
}
