package org.apdplat.word.segmentation;

import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.apdplat.word.util.AutoDetector;
import org.apdplat.word.util.GenericTrie;
import org.apdplat.word.util.ResourceLoader;
import org.apdplat.word.util.WordConfTools;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/apdplat/word/segmentation/WordRefiner.class */
public class WordRefiner {
    private static final Logger LOGGER = LoggerFactory.getLogger(WordRefiner.class);
    private static final GenericTrie<String> GENERIC_TRIE = new GenericTrie<>();

    private WordRefiner() {
    }

    public static void reload() {
        AutoDetector.loadAndWatch(new ResourceLoader() { // from class: org.apdplat.word.segmentation.WordRefiner.1
            @Override // org.apdplat.word.util.ResourceLoader
            public void clear() {
                WordRefiner.GENERIC_TRIE.clear();
            }

            @Override // org.apdplat.word.util.ResourceLoader
            public void load(List<String> list) {
                WordRefiner.LOGGER.info("初始化WordRefiner");
                int i = 0;
                for (String str : list) {
                    try {
                        String[] split = str.split("=");
                        WordRefiner.GENERIC_TRIE.put(split[0].trim(), split[1].trim().replaceAll("\\s+", " "));
                        i++;
                    } catch (Exception e) {
                        WordRefiner.LOGGER.error("错误的WordRefiner数据：" + str);
                    }
                }
                WordRefiner.LOGGER.info("WordRefiner初始化完毕，数据条数：" + i);
            }

            @Override // org.apdplat.word.util.ResourceLoader
            public void add(String str) {
                try {
                    String[] split = str.split("=");
                    WordRefiner.GENERIC_TRIE.put(split[0].trim(), split[1].trim().replaceAll("\\s+", " "));
                } catch (Exception e) {
                    WordRefiner.LOGGER.error("错误的WordRefiner数据：" + str);
                }
            }

            @Override // org.apdplat.word.util.ResourceLoader
            public void remove(String str) {
                try {
                    WordRefiner.GENERIC_TRIE.remove(str.split("=")[0].trim());
                } catch (Exception e) {
                    WordRefiner.LOGGER.error("错误的WordRefiner数据：" + str);
                }
            }
        }, WordConfTools.get("word.refine.path", "classpath:word_refine.txt"));
    }

    public static List<Word> split(Word word) {
        String str = GENERIC_TRIE.get(word.getText());
        if (str == null) {
            return null;
        }
        ArrayList arrayList = new ArrayList();
        for (String str2 : str.split("\\s+")) {
            arrayList.add(new Word(str2));
        }
        if (arrayList.isEmpty()) {
            return null;
        }
        return arrayList;
    }

    public static Word combine(List<Word> list) {
        if (list == null || list.size() < 2) {
            return null;
        }
        String str = "";
        Iterator<Word> it = list.iterator();
        while (it.hasNext()) {
            str = (str + it.next().getText()) + " ";
        }
        String str2 = GENERIC_TRIE.get(str.trim());
        if (str2 == null) {
            return null;
        }
        return new Word(str2);
    }

    public static List<Word> refine(List<Word> list) {
        if (LOGGER.isDebugEnabled()) {
            LOGGER.debug("对分词结果进行refine之前：{}", list);
        }
        ArrayList arrayList = new ArrayList(list.size());
        for (Word word : list) {
            List<Word> split = split(word);
            if (split == null) {
                arrayList.add(word);
            } else {
                if (LOGGER.isDebugEnabled()) {
                    LOGGER.debug("词： " + word.getText() + " 被拆分为：" + split);
                }
                arrayList.addAll(split);
            }
        }
        if (LOGGER.isDebugEnabled()) {
            LOGGER.debug("对分词结果进行refine阶段的拆词之后：{}", arrayList);
        }
        if (arrayList.size() < 2) {
            return arrayList;
        }
        int i = WordConfTools.getInt("word.refine.combine.max.length", 3);
        if (i < 2) {
            i = 2;
        }
        ArrayList arrayList2 = new ArrayList(arrayList.size());
        int i2 = 0;
        while (i2 < arrayList.size()) {
            List list2 = null;
            Word word2 = null;
            int i3 = 2;
            while (true) {
                if (i3 > i) {
                    break;
                }
                int i4 = i2 + i3;
                if (i4 > arrayList.size()) {
                    i4 = arrayList.size();
                }
                list2 = arrayList.subList(i2, i4);
                word2 = combine(list2);
                if (word2 != null) {
                    i2 = (i2 + i3) - 1;
                    break;
                }
                i3++;
            }
            if (word2 == null) {
                arrayList2.add(arrayList.get(i2));
            } else {
                if (LOGGER.isDebugEnabled()) {
                    LOGGER.debug("词： " + list2 + " 被合并为：" + word2);
                }
                arrayList2.add(word2);
            }
            i2++;
        }
        if (LOGGER.isDebugEnabled()) {
            LOGGER.debug("对分词结果进行refine阶段的组词之后：{}", arrayList2);
        }
        return arrayList2;
    }

    public static void main(String[] strArr) {
        List<Word> seg = SegmentationFactory.getSegmentation(SegmentationAlgorithm.BidirectionalMaximumMatching).seg("我国工人阶级和广大劳动群众要更加紧密地团结在党中央周围");
        System.out.println(seg);
        System.out.println(refine(seg));
        List<Word> seg2 = SegmentationFactory.getSegmentation(SegmentationAlgorithm.BidirectionalMaximumMatching).seg("在实现“两个一百年”奋斗目标的伟大征程上再创新的业绩");
        System.out.println(seg2);
        System.out.println(refine(seg2));
    }

    static {
        reload();
    }
}
