package org.apdplat.word.analysis;

import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicInteger;
import org.apdplat.word.recognition.StopWord;
import org.apdplat.word.segmentation.Segmentation;
import org.apdplat.word.segmentation.SegmentationAlgorithm;
import org.apdplat.word.segmentation.SegmentationFactory;
import org.apdplat.word.segmentation.Word;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/apdplat/word/analysis/TextSimilarity.class */
public abstract class TextSimilarity implements Similarity, SimilarityRanker {
    protected static final Logger LOGGER = LoggerFactory.getLogger(TextSimilarity.class);
    private Segmentation segmentation = null;
    protected boolean filterStopWord = false;

    public void setSegmentationAlgorithm(SegmentationAlgorithm segmentationAlgorithm) {
        this.segmentation = SegmentationFactory.getSegmentation(segmentationAlgorithm);
        LOGGER.info("设置分词算法为：" + segmentationAlgorithm.getDes());
    }

    @Override // org.apdplat.word.analysis.Similarity
    public double similarScore(String str, String str2) {
        if (LOGGER.isDebugEnabled()) {
            LOGGER.debug("文本1：");
            LOGGER.debug("\t" + str);
            LOGGER.debug("文本2：");
            LOGGER.debug("\t" + str2);
        }
        if (str == null || str2 == null) {
            return 0.0d;
        }
        return similarScore(seg(str), seg(str2));
    }

    @Override // org.apdplat.word.analysis.Similarity
    public double similarScore(List<Word> list, List<Word> list2) {
        if (list == null || list2 == null) {
            return 0.0d;
        }
        if (list.isEmpty() && list2.isEmpty()) {
            return 1.0d;
        }
        if (list.isEmpty() || list2.isEmpty()) {
            return 0.0d;
        }
        if (LOGGER.isDebugEnabled()) {
            LOGGER.debug("词列表1：");
            LOGGER.debug("\t" + list);
            LOGGER.debug("词列表2：");
            LOGGER.debug("\t" + list2);
        }
        double scoreImpl = scoreImpl(list, list2);
        if (LOGGER.isDebugEnabled()) {
            LOGGER.debug("分值：" + scoreImpl);
        }
        double d = ((int) ((scoreImpl * 1000000.0d) + 0.5d)) / 1000000.0d;
        if (LOGGER.isDebugEnabled()) {
            LOGGER.debug("取六位小数，四舍五入，分值：" + d);
        }
        return d;
    }

    protected abstract double scoreImpl(List<Word> list, List<Word> list2);

    private List<Word> seg(String str) {
        if (str == null) {
            return Collections.emptyList();
        }
        if (this.segmentation == null) {
            this.segmentation = SegmentationFactory.getSegmentation(SegmentationAlgorithm.MaxNgramScore);
        }
        List<Word> seg = this.segmentation.seg(str);
        if (this.filterStopWord) {
            StopWord.filterStopWords(seg);
        }
        return seg;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public void taggingWeightWithWordFrequency(List<Word> list, List<Word> list2) {
        if (list.get(0).getWeight() != null || list2.get(0).getWeight() != null) {
            if (LOGGER.isDebugEnabled()) {
                LOGGER.debug("词已经被指定权重，不再使用词频进行标注");
                return;
            }
            return;
        }
        Map<String, AtomicInteger> frequency = frequency(list);
        Map<String, AtomicInteger> frequency2 = frequency(list2);
        if (LOGGER.isDebugEnabled()) {
            LOGGER.debug("词频统计1：\n{}", formatWordsFrequency(frequency));
            LOGGER.debug("词频统计2：\n{}", formatWordsFrequency(frequency2));
        }
        list.parallelStream().forEach(word -> {
            word.setWeight(Float.valueOf(((AtomicInteger) frequency.get(word.getText())).floatValue()));
        });
        list2.parallelStream().forEach(word2 -> {
            word2.setWeight(Float.valueOf(((AtomicInteger) frequency2.get(word2.getText())).floatValue()));
        });
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public Map<String, Float> toFastSearchMap(List<Word> list) {
        ConcurrentHashMap concurrentHashMap = new ConcurrentHashMap();
        if (list == null) {
            return concurrentHashMap;
        }
        list.parallelStream().forEach(word -> {
            if (word.getWeight() != null) {
                concurrentHashMap.put(word.getText(), word.getWeight());
            } else {
                LOGGER.error("词没有权重信息：" + word.getText());
            }
        });
        return concurrentHashMap;
    }

    private Map<String, AtomicInteger> frequency(List<Word> list) {
        HashMap hashMap = new HashMap();
        list.forEach(word -> {
            ((AtomicInteger) hashMap.computeIfAbsent(word.getText(), str -> {
                return new AtomicInteger();
            })).incrementAndGet();
        });
        return hashMap;
    }

    private String formatWordsFrequency(Map<String, AtomicInteger> map) {
        StringBuilder sb = new StringBuilder();
        if (map != null && !map.isEmpty()) {
            AtomicInteger atomicInteger = new AtomicInteger();
            map.entrySet().stream().sorted((entry, entry2) -> {
                return ((AtomicInteger) entry2.getValue()).get() - ((AtomicInteger) entry.getValue()).get();
            }).forEach(entry3 -> {
                sb.append("\t").append(atomicInteger.incrementAndGet()).append("、").append((String) entry3.getKey()).append("=").append(entry3.getValue()).append("\n");
            });
        }
        sb.setLength(sb.length() - 1);
        return sb.toString();
    }
}
