package org.apdplat.word.lucene;

import java.io.IOException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apdplat.word.segmentation.Segmentation;
import org.apdplat.word.segmentation.SegmentationAlgorithm;
import org.apdplat.word.segmentation.SegmentationFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/apdplat/word/lucene/ChineseWordAnalyzer.class */
public class ChineseWordAnalyzer extends Analyzer {
    private static final Logger LOGGER = LoggerFactory.getLogger(ChineseWordAnalyzer.class);
    private Segmentation segmentation;

    public ChineseWordAnalyzer() {
        this.segmentation = null;
        this.segmentation = SegmentationFactory.getSegmentation(SegmentationAlgorithm.BidirectionalMinimumMatching);
    }

    public ChineseWordAnalyzer(String str) {
        this.segmentation = null;
        try {
            this.segmentation = SegmentationFactory.getSegmentation(SegmentationAlgorithm.valueOf(str));
        } catch (Exception e) {
            this.segmentation = SegmentationFactory.getSegmentation(SegmentationAlgorithm.BidirectionalMinimumMatching);
        }
    }

    public ChineseWordAnalyzer(SegmentationAlgorithm segmentationAlgorithm) {
        this.segmentation = null;
        this.segmentation = SegmentationFactory.getSegmentation(segmentationAlgorithm);
    }

    public ChineseWordAnalyzer(Segmentation segmentation) {
        this.segmentation = null;
        this.segmentation = segmentation;
    }

    protected Analyzer.TokenStreamComponents createComponents(String str) {
        return new Analyzer.TokenStreamComponents(new ChineseWordTokenizer(this.segmentation));
    }

    public static void main(String[] strArr) throws IOException {
        ChineseWordAnalyzer chineseWordAnalyzer = new ChineseWordAnalyzer();
        TokenStream tokenStream = chineseWordAnalyzer.tokenStream("text", "杨尚川是APDPlat应用级产品开发平台的作者");
        tokenStream.reset();
        while (tokenStream.incrementToken()) {
            CharTermAttribute attribute = tokenStream.getAttribute(CharTermAttribute.class);
            OffsetAttribute attribute2 = tokenStream.getAttribute(OffsetAttribute.class);
            LOGGER.info(attribute.toString() + " (" + attribute2.startOffset() + " - " + attribute2.endOffset() + ") " + tokenStream.getAttribute(PositionIncrementAttribute.class).getPositionIncrement());
        }
        tokenStream.close();
        TokenStream tokenStream2 = chineseWordAnalyzer.tokenStream("text", "word是一个中文分词项目，作者是杨尚川，杨尚川的英文名叫ysc");
        tokenStream2.reset();
        while (tokenStream2.incrementToken()) {
            CharTermAttribute attribute3 = tokenStream2.getAttribute(CharTermAttribute.class);
            OffsetAttribute attribute4 = tokenStream2.getAttribute(OffsetAttribute.class);
            LOGGER.info(attribute3.toString() + " (" + attribute4.startOffset() + " - " + attribute4.endOffset() + ") " + tokenStream2.getAttribute(PositionIncrementAttribute.class).getPositionIncrement());
        }
        tokenStream2.close();
        TokenStream tokenStream3 = chineseWordAnalyzer.tokenStream("text", "5月初有哪些电影值得观看");
        tokenStream3.reset();
        while (tokenStream3.incrementToken()) {
            CharTermAttribute attribute5 = tokenStream3.getAttribute(CharTermAttribute.class);
            OffsetAttribute attribute6 = tokenStream3.getAttribute(OffsetAttribute.class);
            LOGGER.info(attribute5.toString() + " (" + attribute6.startOffset() + " - " + attribute6.endOffset() + ") " + tokenStream3.getAttribute(PositionIncrementAttribute.class).getPositionIncrement());
        }
        tokenStream3.close();
    }
}
