package org.apdplat.word;

import java.io.File;
import java.nio.file.Files;
import java.nio.file.OpenOption;
import java.nio.file.Paths;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.Collectors;
import org.apdplat.word.recognition.StopWord;
import org.apdplat.word.segmentation.Segmentation;
import org.apdplat.word.segmentation.SegmentationAlgorithm;
import org.apdplat.word.segmentation.SegmentationFactory;
import org.apdplat.word.segmentation.Word;
import org.apdplat.word.util.Utils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/apdplat/word/WordFrequencyStatistics.class */
public class WordFrequencyStatistics {
    private static final Logger LOGGER = LoggerFactory.getLogger(WordSegmenter.class);
    private String resultPath;
    private Segmentation segmentation;
    private Map<String, AtomicInteger> statisticsMap;
    private boolean removeStopWord;

    public WordFrequencyStatistics() {
        this.resultPath = "WordFrequencyStatistics-Result.txt";
        this.segmentation = SegmentationFactory.getSegmentation(SegmentationAlgorithm.MaxNgramScore);
        this.statisticsMap = new ConcurrentHashMap();
        this.removeStopWord = false;
    }

    public WordFrequencyStatistics(String str) {
        this.resultPath = "WordFrequencyStatistics-Result.txt";
        this.segmentation = SegmentationFactory.getSegmentation(SegmentationAlgorithm.MaxNgramScore);
        this.statisticsMap = new ConcurrentHashMap();
        this.removeStopWord = false;
        this.resultPath = str;
    }

    public WordFrequencyStatistics(String str, SegmentationAlgorithm segmentationAlgorithm) {
        this.resultPath = "WordFrequencyStatistics-Result.txt";
        this.segmentation = SegmentationFactory.getSegmentation(SegmentationAlgorithm.MaxNgramScore);
        this.statisticsMap = new ConcurrentHashMap();
        this.removeStopWord = false;
        this.resultPath = str;
        this.segmentation = SegmentationFactory.getSegmentation(segmentationAlgorithm);
    }

    public WordFrequencyStatistics(String str, String str2) {
        this.resultPath = "WordFrequencyStatistics-Result.txt";
        this.segmentation = SegmentationFactory.getSegmentation(SegmentationAlgorithm.MaxNgramScore);
        this.statisticsMap = new ConcurrentHashMap();
        this.removeStopWord = false;
        this.resultPath = str;
        this.segmentation = SegmentationFactory.getSegmentation(SegmentationAlgorithm.valueOf(str2));
    }

    public void setRemoveStopWord(boolean z) {
        this.removeStopWord = z;
    }

    public boolean isRemoveStopWord() {
        return this.removeStopWord;
    }

    public String getResultPath() {
        return this.resultPath;
    }

    public void setResultPath(String str) {
        this.resultPath = str;
    }

    public SegmentationAlgorithm getSegmentationAlgorithm() {
        return this.segmentation.getSegmentationAlgorithm();
    }

    public void setSegmentationAlgorithm(SegmentationAlgorithm segmentationAlgorithm) {
        this.segmentation = SegmentationFactory.getSegmentation(segmentationAlgorithm);
    }

    public void seg(String str) {
        this.segmentation.seg(str).parallelStream().forEach(word -> {
            if (isRemoveStopWord() && StopWord.is(word.getText())) {
                return;
            }
            statistics(word, 1, this.statisticsMap);
        });
    }

    public void seg(File file, File file2) throws Exception {
        Utils.seg(file, file2, isRemoveStopWord(), this.segmentation.getSegmentationAlgorithm(), word -> {
            statistics(word, 1, this.statisticsMap);
        });
    }

    private void statistics(String str, int i, Map<String, AtomicInteger> map) {
        map.putIfAbsent(str, new AtomicInteger());
        map.get(str).addAndGet(i);
    }

    private void statistics(Word word, int i, Map<String, AtomicInteger> map) {
        statistics(word.getText(), i, map);
    }

    public void dump(String str) {
        this.resultPath = str;
        dump();
    }

    public void dump() {
        dump(this.statisticsMap, this.resultPath);
    }

    private void dump(Map<String, AtomicInteger> map, String str) {
        try {
            List list = (List) map.entrySet().parallelStream().sorted((entry, entry2) -> {
                return new Integer(((AtomicInteger) entry2.getValue()).get()).compareTo(Integer.valueOf(((AtomicInteger) entry.getValue()).intValue()));
            }).map(entry3 -> {
                return ((String) entry3.getKey()) + " " + ((AtomicInteger) entry3.getValue()).get();
            }).collect(Collectors.toList());
            Files.write(Paths.get(str, new String[0]), list, new OpenOption[0]);
            if (list.size() < 100) {
                LOGGER.info("词频统计结果：");
                AtomicInteger atomicInteger = new AtomicInteger();
                list.forEach(str2 -> {
                    LOGGER.info("\t" + atomicInteger.incrementAndGet() + "、" + str2);
                });
            }
            LOGGER.info("词频统计结果成功保存到文件：" + str);
        } catch (Exception e) {
            LOGGER.error("dump error!", e);
        }
    }

    public void merge(String str, String... strArr) {
        try {
            ConcurrentHashMap concurrentHashMap = new ConcurrentHashMap();
            for (String str2 : strArr) {
                Files.lines(Paths.get(str2, new String[0])).forEach(str3 -> {
                    String[] split = str3.split("\\s+");
                    if (split == null || split.length != 2) {
                        return;
                    }
                    statistics(split[0], Integer.parseInt(split[1]), (Map<String, AtomicInteger>) concurrentHashMap);
                });
            }
            dump(concurrentHashMap, str);
        } catch (Exception e) {
            LOGGER.error("merge error!", e);
        }
    }

    public void reset() {
        this.statisticsMap.clear();
    }

    public static void main(String[] strArr) throws Exception {
        if (strArr.length <= 0) {
            WordFrequencyStatistics wordFrequencyStatistics = new WordFrequencyStatistics();
            wordFrequencyStatistics.setRemoveStopWord(false);
            wordFrequencyStatistics.setResultPath("word-frequency-statistics.txt");
            wordFrequencyStatistics.setSegmentationAlgorithm(SegmentationAlgorithm.MaxNgramScore);
            wordFrequencyStatistics.seg("明天下雨，结合成分子，明天有关于分子和原子的课程，下雨了也要去听课");
            wordFrequencyStatistics.dump();
            Files.write(Paths.get("text-to-seg.txt", new String[0]), Arrays.asList("word分词是一个Java实现的分布式中文分词组件，提供了多种基于词典的分词算法，并利用ngram模型来消除歧义。"), new OpenOption[0]);
            wordFrequencyStatistics.reset();
            wordFrequencyStatistics.seg(new File("text-to-seg.txt"), new File("text-seg-result.txt"));
            wordFrequencyStatistics.dump("file-seg-statistics-result.txt");
            return;
        }
        WordFrequencyStatistics wordFrequencyStatistics2 = new WordFrequencyStatistics();
        HashSet<String> hashSet = new HashSet();
        for (String str : strArr) {
            if (str.equals("-removeStopWord")) {
                wordFrequencyStatistics2.setRemoveStopWord(true);
            }
            if (str.startsWith("-textFile=")) {
                hashSet.add(str.replace("-textFile=", ""));
            }
            if (str.startsWith("-statisticsResultFile=")) {
                wordFrequencyStatistics2.setResultPath(str.replace("-statisticsResultFile=", ""));
            }
            if (str.startsWith("-segmentationAlgorithm=")) {
                wordFrequencyStatistics2.setSegmentationAlgorithm(SegmentationAlgorithm.valueOf(str.replace("-segmentationAlgorithm=", "")));
            }
        }
        for (String str2 : hashSet) {
            wordFrequencyStatistics2.seg(new File(str2), new File(str2 + ".seg.txt"));
        }
        wordFrequencyStatistics2.dump();
    }
}
