package org.apache.lucene.analysis.pattern;

import java.io.IOException;
import java.io.Reader;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.util.AttributeFactory;

/* loaded from: input_file:org/apache/lucene/analysis/pattern/PatternTokenizer.class */
public final class PatternTokenizer extends Tokenizer {
    private final CharTermAttribute termAtt;
    private final OffsetAttribute offsetAtt;
    private final StringBuilder str;
    private int index;
    private final int group;
    private final Matcher matcher;
    final char[] buffer;

    public PatternTokenizer(Reader reader, Pattern pattern, int i) {
        this(DEFAULT_TOKEN_ATTRIBUTE_FACTORY, reader, pattern, i);
    }

    public PatternTokenizer(AttributeFactory attributeFactory, Reader reader, Pattern pattern, int i) {
        super(attributeFactory, reader);
        this.termAtt = (CharTermAttribute) addAttribute(CharTermAttribute.class);
        this.offsetAtt = (OffsetAttribute) addAttribute(OffsetAttribute.class);
        this.str = new StringBuilder();
        this.buffer = new char[8192];
        this.group = i;
        this.matcher = pattern.matcher("");
        if (i >= 0 && i > this.matcher.groupCount()) {
            throw new IllegalArgumentException("invalid group specified: pattern only has: " + this.matcher.groupCount() + " capturing groups");
        }
    }

    @Override // org.apache.lucene.analysis.TokenStream
    public boolean incrementToken() {
        if (this.index >= this.str.length()) {
            return false;
        }
        clearAttributes();
        if (this.group < 0) {
            while (this.matcher.find()) {
                if (this.matcher.start() - this.index > 0) {
                    this.termAtt.setEmpty().append((CharSequence) this.str, this.index, this.matcher.start());
                    this.offsetAtt.setOffset(correctOffset(this.index), correctOffset(this.matcher.start()));
                    this.index = this.matcher.end();
                    return true;
                }
                this.index = this.matcher.end();
            }
            if (this.str.length() - this.index == 0) {
                this.index = Integer.MAX_VALUE;
                return false;
            }
            this.termAtt.setEmpty().append((CharSequence) this.str, this.index, this.str.length());
            this.offsetAtt.setOffset(correctOffset(this.index), correctOffset(this.str.length()));
            this.index = Integer.MAX_VALUE;
            return true;
        }
        while (this.matcher.find()) {
            this.index = this.matcher.start(this.group);
            int end = this.matcher.end(this.group);
            if (this.index != end) {
                this.termAtt.setEmpty().append((CharSequence) this.str, this.index, end);
                this.offsetAtt.setOffset(correctOffset(this.index), correctOffset(end));
                return true;
            }
        }
        this.index = Integer.MAX_VALUE;
        return false;
    }

    @Override // org.apache.lucene.analysis.TokenStream
    public void end() throws IOException {
        super.end();
        int correctOffset = correctOffset(this.str.length());
        this.offsetAtt.setOffset(correctOffset, correctOffset);
    }

    @Override // org.apache.lucene.analysis.Tokenizer, org.apache.lucene.analysis.TokenStream
    public void reset() throws IOException {
        super.reset();
        fillBuffer(this.str, this.input);
        this.matcher.reset(this.str);
        this.index = 0;
    }

    private void fillBuffer(StringBuilder sb, Reader reader) throws IOException {
        sb.setLength(0);
        while (true) {
            int read = reader.read(this.buffer);
            if (read <= 0) {
                return;
            } else {
                sb.append(this.buffer, 0, read);
            }
        }
    }
}
