package org.apdplat.word.lucene;

import java.io.BufferedReader;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import java.util.Queue;
import java.util.concurrent.LinkedTransferQueue;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apdplat.word.recognition.StopWord;
import org.apdplat.word.segmentation.Segmentation;
import org.apdplat.word.segmentation.SegmentationAlgorithm;
import org.apdplat.word.segmentation.SegmentationFactory;
import org.apdplat.word.segmentation.Word;
import org.apdplat.word.tagging.AntonymTagging;
import org.apdplat.word.tagging.PinyinTagging;
import org.apdplat.word.tagging.SynonymTagging;
import org.apdplat.word.util.WordConfTools;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/apdplat/word/lucene/ChineseWordTokenizer.class */
public class ChineseWordTokenizer extends Tokenizer {
    private Segmentation segmentation;
    private static final Logger LOGGER = LoggerFactory.getLogger(ChineseWordTokenizer.class);
    private static final boolean FULL_PINYIN = WordConfTools.getBoolean("tagging.pinyin.full", false);
    private static final boolean ACRONYM_PINYIN = WordConfTools.getBoolean("tagging.pinyin.acronym", false);
    private static final boolean SYNONYM = WordConfTools.getBoolean("tagging.synonym", false);
    private static final boolean ANTONYM = WordConfTools.getBoolean("tagging.antonym", false);
    private final CharTermAttribute charTermAttribute = addAttribute(CharTermAttribute.class);
    private final OffsetAttribute offsetAttribute = addAttribute(OffsetAttribute.class);
    private final PositionIncrementAttribute positionIncrementAttribute = addAttribute(PositionIncrementAttribute.class);
    private BufferedReader reader = null;
    private final Queue<Word> words = new LinkedTransferQueue();
    private final Queue<String> tokens = new LinkedTransferQueue();
    private int startOffset = 0;

    public ChineseWordTokenizer() {
        this.segmentation = null;
        this.segmentation = SegmentationFactory.getSegmentation(SegmentationAlgorithm.BidirectionalMinimumMatching);
    }

    public ChineseWordTokenizer(String str) {
        this.segmentation = null;
        try {
            this.segmentation = SegmentationFactory.getSegmentation(SegmentationAlgorithm.valueOf(str));
        } catch (Exception e) {
            this.segmentation = SegmentationFactory.getSegmentation(SegmentationAlgorithm.BidirectionalMinimumMatching);
        }
    }

    public ChineseWordTokenizer(SegmentationAlgorithm segmentationAlgorithm) {
        this.segmentation = null;
        this.segmentation = SegmentationFactory.getSegmentation(segmentationAlgorithm);
    }

    public ChineseWordTokenizer(Segmentation segmentation) {
        this.segmentation = null;
        this.segmentation = segmentation;
    }

    private Word getWord() throws IOException {
        Word poll = this.words.poll();
        if (poll == null) {
            if (this.reader == null) {
                this.reader = new BufferedReader(this.input);
            }
            while (true) {
                String readLine = this.reader.readLine();
                if (readLine == null) {
                    break;
                }
                this.words.addAll(this.segmentation.seg(readLine));
            }
            this.startOffset = 0;
            poll = this.words.poll();
        }
        return poll;
    }

    private String getToken() throws IOException {
        String poll = this.tokens.poll();
        if (poll == null) {
            Word word = getWord();
            if (word != null) {
                int i = 1;
                while (StopWord.is(word.getText())) {
                    i++;
                    this.startOffset += word.getText().length();
                    if (LOGGER.isDebugEnabled()) {
                        LOGGER.debug("忽略停用词：" + word.getText());
                    }
                    word = getWord();
                    if (word == null) {
                        return null;
                    }
                }
                this.offsetAttribute.setOffset(this.startOffset, this.startOffset + word.getText().length());
                this.positionIncrementAttribute.setPositionIncrement(i);
                this.startOffset += word.getText().length();
                this.tokens.offer(word.getText());
                if (FULL_PINYIN || ACRONYM_PINYIN) {
                    PinyinTagging.process(Arrays.asList(word));
                    if (FULL_PINYIN && !"".equals(word.getFullPinYin())) {
                        this.tokens.offer(word.getFullPinYin());
                    }
                    if (ACRONYM_PINYIN && !"".equals(word.getAcronymPinYin())) {
                        this.tokens.offer(word.getAcronymPinYin());
                    }
                }
                if (SYNONYM) {
                    SynonymTagging.process(Arrays.asList(word));
                    StringBuilder sb = new StringBuilder();
                    word.getSynonym().forEach(word2 -> {
                        if (!"".equals(word2.getText())) {
                            this.tokens.offer(word2.getText());
                        }
                        sb.append(word2.getText()).append(" ");
                    });
                }
                if (ANTONYM) {
                    AntonymTagging.process((List<Word>) Arrays.asList(word));
                    StringBuilder sb2 = new StringBuilder();
                    word.getAntonym().forEach(word3 -> {
                        if (!"".equals(word3.getText())) {
                            this.tokens.offer(word3.getText());
                        }
                        sb2.append(word3.getText()).append(" ");
                    });
                }
                poll = this.tokens.poll();
            }
        }
        return poll;
    }

    public final boolean incrementToken() throws IOException {
        String token = getToken();
        if (token == null) {
            return false;
        }
        this.charTermAttribute.setEmpty().append(token);
        return true;
    }
}
