package org.apache.hadoop.streaming;

import java.io.BufferedInputStream;
import java.io.IOException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.configuration.tree.DefaultExpressionEngine;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.io.DataOutputBuffer;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileSplit;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.Reporter;
import org.jets3t.service.utils.gatekeeper.GatekeeperMessage;

/* loaded from: input_file:hadoop-tools-dist-2.7.4.0/share/hadoop/tools/lib/hadoop-streaming-2.7.4.0.jar:org/apache/hadoop/streaming/StreamXmlRecordReader.class */
public class StreamXmlRecordReader extends StreamBaseRecordReader {
    int numNext;
    static final int CDATA_IN = 10;
    static final int CDATA_OUT = 11;
    static final int CDATA_UNK = 12;
    static final int RECORD_ACCEPT = 13;
    static final int CDATA_BEGIN = 20;
    static final int CDATA_END = 21;
    static final int RECORD_MAYBE = 22;
    String beginMark_;
    String endMark_;
    Pattern beginPat_;
    Pattern endPat_;
    boolean slowMatch_;
    int lookAhead_;
    int maxRecSize_;
    BufferedInputStream bin_;
    long pos_;
    static final int NA = -1;
    int firstMatchStart_;
    int firstMatchEnd_;
    boolean synched_;

    public StreamXmlRecordReader(FSDataInputStream fSDataInputStream, FileSplit fileSplit, Reporter reporter, JobConf jobConf, FileSystem fileSystem) throws IOException {
        super(fSDataInputStream, fileSplit, reporter, jobConf, fileSystem);
        this.numNext = 0;
        this.firstMatchStart_ = 0;
        this.firstMatchEnd_ = 0;
        this.beginMark_ = checkJobGet("stream.recordreader.begin");
        this.endMark_ = checkJobGet("stream.recordreader.end");
        this.maxRecSize_ = this.job_.getInt("stream.recordreader.maxrec", 50000);
        this.lookAhead_ = this.job_.getInt("stream.recordreader.lookahead", 2 * this.maxRecSize_);
        this.synched_ = false;
        this.slowMatch_ = this.job_.getBoolean("stream.recordreader.slowmatch", false);
        if (this.slowMatch_) {
            this.beginPat_ = makePatternCDataOrMark(this.beginMark_);
            this.endPat_ = makePatternCDataOrMark(this.endMark_);
        }
        init();
    }

    public void init() throws IOException {
        LOG.info("StreamBaseRecordReader.init:  start_=" + this.start_ + " end_=" + this.end_ + " length_=" + this.length_ + " start_ > in_.getPos() =" + (this.start_ > this.in_.getPos()) + " " + this.start_ + " > " + this.in_.getPos());
        if (this.start_ > this.in_.getPos()) {
            this.in_.seek(this.start_);
        }
        this.pos_ = this.start_;
        this.bin_ = new BufferedInputStream(this.in_);
        seekNextRecordBoundary();
    }

    @Override // org.apache.hadoop.streaming.StreamBaseRecordReader
    public synchronized boolean next(Text text, Text text2) throws IOException {
        this.numNext++;
        if (this.pos_ >= this.end_) {
            return false;
        }
        DataOutputBuffer dataOutputBuffer = new DataOutputBuffer();
        if (!readUntilMatchBegin() || this.pos_ >= this.end_ || !readUntilMatchEnd(dataOutputBuffer)) {
            return false;
        }
        byte[] bArr = new byte[dataOutputBuffer.getLength()];
        System.arraycopy(dataOutputBuffer.getData(), 0, bArr, 0, bArr.length);
        numRecStats(bArr, 0, bArr.length);
        text.set(bArr);
        text2.set("");
        return true;
    }

    @Override // org.apache.hadoop.streaming.StreamBaseRecordReader
    public void seekNextRecordBoundary() throws IOException {
        readUntilMatchBegin();
    }

    boolean readUntilMatchBegin() throws IOException {
        return this.slowMatch_ ? slowReadUntilMatch(this.beginPat_, false, null) : fastReadUntilMatch(this.beginMark_, false, null);
    }

    private boolean readUntilMatchEnd(DataOutputBuffer dataOutputBuffer) throws IOException {
        return this.slowMatch_ ? slowReadUntilMatch(this.endPat_, true, dataOutputBuffer) : fastReadUntilMatch(this.endMark_, true, dataOutputBuffer);
    }

    private boolean slowReadUntilMatch(Pattern pattern, boolean z, DataOutputBuffer dataOutputBuffer) throws IOException {
        int i;
        byte[] bArr = new byte[Math.max(this.lookAhead_, this.maxRecSize_)];
        this.bin_.mark(Math.max(this.lookAhead_, this.maxRecSize_) + 2);
        int read = this.bin_.read(bArr);
        if (read == -1) {
            return false;
        }
        String str = new String(bArr, 0, read, "UTF-8");
        Matcher matcher = pattern.matcher(str);
        this.firstMatchStart_ = -1;
        this.firstMatchEnd_ = -1;
        int i2 = 0;
        int i3 = this.synched_ ? 11 : 12;
        int i4 = 0;
        while (matcher.find(i2)) {
            if (matcher.group(1) != null) {
                i = 20;
            } else if (matcher.group(2) != null) {
                i = 21;
                this.firstMatchStart_ = -1;
            } else {
                i = 22;
            }
            if (i == 22 && this.firstMatchStart_ == -1) {
                this.firstMatchStart_ = matcher.start();
                this.firstMatchEnd_ = matcher.end();
            }
            i3 = nextState(i3, i, matcher.start());
            if (i3 == 13) {
                break;
            }
            i2 = matcher.end();
            i4++;
        }
        if (i3 != 12) {
            this.synched_ = true;
        }
        boolean z2 = this.firstMatchStart_ != -1 && (i3 == 13 || i3 == 12);
        if (z2) {
            int i5 = z ? this.firstMatchEnd_ : this.firstMatchStart_;
            this.bin_.reset();
            long j = i5;
            while (true) {
                long j2 = j;
                if (j2 <= 0) {
                    break;
                }
                j = j2 - this.bin_.skip(j2);
            }
            this.pos_ += i5;
            if (dataOutputBuffer != null) {
                dataOutputBuffer.writeBytes(str.substring(0, i5));
            }
        }
        return z2;
    }

    int nextState(int i, int i2, int i3) {
        switch (i) {
            case 10:
                return i2 == 21 ? 11 : 10;
            case 11:
            case 12:
                switch (i2) {
                    case 20:
                        return 10;
                    case 21:
                        if (i == 11) {
                        }
                        return 11;
                    case 22:
                        return i == 12 ? 12 : 13;
                }
        }
        throw new IllegalStateException(i + " " + i2 + " " + i3 + " " + this.splitName_);
    }

    Pattern makePatternCDataOrMark(String str) {
        StringBuffer stringBuffer = new StringBuffer();
        addGroup(stringBuffer, StreamUtil.regexpEscape("CDATA["));
        addGroup(stringBuffer, StreamUtil.regexpEscape("]]>"));
        addGroup(stringBuffer, str);
        return Pattern.compile(stringBuffer.toString());
    }

    void addGroup(StringBuffer stringBuffer, String str) {
        if (stringBuffer.length() > 0) {
            stringBuffer.append(GatekeeperMessage.DELIM);
        }
        stringBuffer.append(DefaultExpressionEngine.DEFAULT_INDEX_START);
        stringBuffer.append(str);
        stringBuffer.append(DefaultExpressionEngine.DEFAULT_INDEX_END);
    }

    boolean fastReadUntilMatch(String str, boolean z, DataOutputBuffer dataOutputBuffer) throws IOException {
        byte[] bytes = str.getBytes("UTF-8");
        int i = 0;
        boolean z2 = false;
        int length = bytes.length;
        this.bin_.mark(1200000);
        while (true) {
            int read = this.bin_.read();
            if (read == -1) {
                break;
            }
            byte b = (byte) read;
            if (b == bytes[i]) {
                i++;
                if (i == length) {
                    z2 = true;
                    break;
                }
            } else {
                this.bin_.mark(1200000);
                if (dataOutputBuffer != null) {
                    dataOutputBuffer.write(bytes, 0, i);
                    dataOutputBuffer.write(b);
                }
                this.pos_ += i + 1;
                i = 0;
            }
        }
        if (!z && z2) {
            this.bin_.reset();
        } else if (dataOutputBuffer != null) {
            dataOutputBuffer.write(bytes);
            this.pos_ += length;
        }
        return z2;
    }

    String checkJobGet(String str) throws IOException {
        String str2 = this.job_.get(str);
        if (str2 == null) {
            throw new IOException("JobConf: missing required property: " + str);
        }
        return str2;
    }
}
