package water.parser;

import java.io.IOException;
import java.io.InputStream;
import java.util.Arrays;
import java.util.Iterator;
import java.util.zip.GZIPInputStream;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
import jsr166y.CountedCompleter;
import water.DKV;
import water.Freezable;
import water.Futures;
import water.H2O;
import water.Iced;
import water.Job;
import water.Key;
import water.Keyed;
import water.Lockable;
import water.MRTask;
import water.MemoryManager;
import water.Value;
import water.fvec.AppendableVec;
import water.fvec.ByteVec;
import water.fvec.C0DChunk;
import water.fvec.C4Chunk;
import water.fvec.CStrChunk;
import water.fvec.Chunk;
import water.fvec.Frame;
import water.fvec.NewChunk;
import water.fvec.Vec;
import water.nbhm.NonBlockingHashMap;
import water.nbhm.NonBlockingSetInt;
import water.parser.Parser;
import water.parser.ZipUtil;
import water.util.ArrayUtils;
import water.util.FrameUtils;
import water.util.Log;
import water.util.PrettyPrint;

/* loaded from: input_file:water/parser/ParseDataset.class */
public final class ParseDataset extends Job<Frame> {
    private MultiFileParseTask _mfpt;
    static final /* synthetic */ boolean $assertionsDisabled;

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:water/parser/ParseDataset$EnumFetchTask.class */
    public static class EnumFetchTask extends MRTask<EnumFetchTask> {
        private final Key _k;
        private final int[] _ecols;
        private Categorical[] _gEnums;
        public Categorical[][] _lEnums;
        static final /* synthetic */ boolean $assertionsDisabled;

        private EnumFetchTask(Key key, int[] iArr) {
            this._k = key;
            this._ecols = iArr;
        }

        /* JADX WARN: Type inference failed for: r1v2, types: [water.parser.Categorical[], water.parser.Categorical[][]] */
        @Override // water.MRTask
        public void setupLocal() {
            this._lEnums = new Categorical[H2O.CLOUD.size()];
            if (MultiFileParseTask._enums.containsKey(this._k)) {
                Categorical[][] categoricalArr = this._lEnums;
                int index = H2O.SELF.index();
                Categorical[] categoricalArr2 = (Categorical[]) MultiFileParseTask._enums.get(this._k);
                this._gEnums = categoricalArr2;
                categoricalArr[index] = categoricalArr2;
                for (int i = 0; i < this._gEnums.length; i++) {
                    if (this._gEnums[i].size() == 0) {
                        this._gEnums[i] = null;
                    }
                }
                this._gEnums = (Categorical[]) this._gEnums.clone();
                for (int i2 = 0; i2 < this._gEnums.length; i2++) {
                    if (this._gEnums[i2] != null) {
                        this._gEnums[i2] = this._gEnums[i2].deepCopy();
                    }
                }
                MultiFileParseTask._enums.remove(this._k);
            }
        }

        @Override // water.MRTask
        public void reduce(EnumFetchTask enumFetchTask) {
            if (this._gEnums == null) {
                this._gEnums = enumFetchTask._gEnums;
                this._lEnums = enumFetchTask._lEnums;
                return;
            }
            if (enumFetchTask._gEnums != null) {
                for (int i : this._ecols) {
                    if (this._gEnums[i] == null) {
                        this._gEnums[i] = enumFetchTask._gEnums[i];
                    } else if (enumFetchTask._gEnums[i] != null) {
                        this._gEnums[i].merge(enumFetchTask._gEnums[i]);
                    }
                }
                for (int i2 = 0; i2 < this._lEnums.length; i2++) {
                    if (this._lEnums[i2] == null) {
                        this._lEnums[i2] = enumFetchTask._lEnums[i2];
                    } else if (!$assertionsDisabled && enumFetchTask._lEnums[i2] != null) {
                        throw new AssertionError();
                    }
                }
            }
        }

        static {
            $assertionsDisabled = !ParseDataset.class.desiredAssertionStatus();
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:water/parser/ParseDataset$EnumMapping.class */
    public static class EnumMapping extends Iced {
        final int[][] map;

        public EnumMapping(int[][] iArr) {
            this.map = iArr;
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:water/parser/ParseDataset$EnumUpdateTask.class */
    public static class EnumUpdateTask extends MRTask<EnumUpdateTask> {
        private final ValueString[][] _gDomain;
        private final EnumMapping[] _emaps;
        private final int[] _chunk2Enum;

        private EnumUpdateTask(ValueString[][] valueStringArr, EnumMapping[] enumMappingArr, int[] iArr) {
            this._gDomain = valueStringArr;
            this._emaps = enumMappingArr;
            this._chunk2Enum = iArr;
        }

        private int[][] emap(int i) {
            return this._emaps[i].map;
        }

        @Override // water.MRTask
        public void map(Chunk[] chunkArr) {
            int[][] emap = emap(this._chunk2Enum[chunkArr[0].cidx()]);
            int cidx = chunkArr[0].cidx();
            for (int i = 0; i < chunkArr.length; i++) {
                Chunk chunk = chunkArr[i];
                if (this._gDomain[i] == null) {
                    DKV.put(chunk.vec().chunkKey(chunk.cidx()), new C0DChunk(Double.NaN, chunk._len));
                } else if (chunk instanceof CStrChunk) {
                    continue;
                } else {
                    for (int i2 = 0; i2 < chunk._len; i2++) {
                        if (!chunk.isNA(i2)) {
                            long at8 = chunk.at8(i2);
                            if (at8 < 0 || at8 >= emap[i].length) {
                                chunk.reportBrokenEnum(i, i2, at8, emap, this._gDomain[i].length);
                            }
                            if (emap[i][(int) at8] < 0) {
                                throw new RuntimeException(H2O.SELF.toString() + ": missing enum at col:" + i + ", line: " + (chunk.start() + i2) + ", val = " + at8 + ", chunk=" + chunk.getClass().getSimpleName() + ", map = " + Arrays.toString(emap[i]));
                            }
                            chunk.set(i2, emap[i][(int) at8]);
                        }
                    }
                }
                chunk.close(cidx, this._fs);
            }
        }
    }

    /* loaded from: input_file:water/parser/ParseDataset$FVecDataIn.class */
    private static class FVecDataIn implements Parser.DataIn {
        final Vec _vec;
        Chunk _chk;
        int _idx;
        final long _firstLine;

        public FVecDataIn(Chunk chunk) {
            this._chk = chunk;
            this._idx = this._chk.cidx();
            this._firstLine = chunk.start();
            this._vec = chunk.vec();
        }

        @Override // water.parser.Parser.DataIn
        public byte[] getChunkData(int i) {
            Chunk chunk;
            if (i != this._idx) {
                if (i < this._vec.nChunks()) {
                    Vec vec = this._vec;
                    this._idx = i;
                    chunk = vec.chunkForChunkIdx(i);
                } else {
                    chunk = null;
                }
                this._chk = chunk;
            }
            if (this._chk == null) {
                return null;
            }
            return this._chk.getBytes();
        }

        @Override // water.parser.Parser.DataIn
        public int getChunkDataStart(int i) {
            return -1;
        }

        @Override // water.parser.Parser.DataIn
        public void setChunkDataStart(int i, int i2) {
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:water/parser/ParseDataset$FVecDataOut.class */
    public static class FVecDataOut extends Iced implements Parser.StreamDataOut {
        protected transient NewChunk[] _nvs;
        protected AppendableVec[] _vecs;
        protected final Categorical[] _enums;
        protected transient byte[] _ctypes;
        long _nLines;
        int _nCols;
        int _col;
        final int _cidx;
        final int _vecIdStart;
        boolean _closedVecs;
        private final Vec.VectorGroup _vg;
        static final byte UCOL = 0;
        static final byte NCOL = 1;
        static final byte ECOL = 2;
        static final byte TCOL = 3;
        static final byte ICOL = 4;
        static final byte SCOL = 5;
        static final /* synthetic */ boolean $assertionsDisabled;

        public static String ctypeToDataTypeName(byte b) {
            switch (b) {
                case 0:
                    return "unknown";
                case 1:
                    return "numeric";
                case 2:
                    return "enum";
                case 3:
                    return "time";
                case 4:
                    return "uuid";
                case 5:
                    return "string";
                default:
                    throw new RuntimeException("ctypeToDataTypeName case unhandled");
            }
        }

        private FVecDataOut(Vec.VectorGroup vectorGroup, int i, int i2, int i3, Categorical[] categoricalArr, byte[] bArr) {
            this._col = -1;
            this._closedVecs = false;
            this._ctypes = bArr == null ? MemoryManager.malloc1(i2) : bArr;
            this._vecs = new AppendableVec[i2];
            this._nvs = new NewChunk[i2];
            this._enums = categoricalArr;
            this._nCols = i2;
            this._cidx = i;
            this._vg = vectorGroup;
            this._vecIdStart = i3;
            for (int i4 = 0; i4 < i2; i4++) {
                AppendableVec appendableVec = new AppendableVec(vectorGroup.vecKey(i3 + i4));
                this._vecs[i4] = appendableVec;
                this._nvs[i4] = appendableVec.chunkForChunkIdx(this._cidx);
            }
        }

        @Override // water.parser.Parser.StreamDataOut
        public FVecDataOut reduce(Parser.StreamDataOut streamDataOut) {
            FVecDataOut fVecDataOut = (FVecDataOut) streamDataOut;
            if (fVecDataOut == null) {
                return this;
            }
            this._nCols = Math.max(this._nCols, fVecDataOut._nCols);
            if (fVecDataOut._vecs.length > this._vecs.length) {
                AppendableVec[] appendableVecArr = this._vecs;
                this._vecs = fVecDataOut._vecs;
                fVecDataOut._vecs = appendableVecArr;
            }
            for (int i = 0; i < fVecDataOut._vecs.length; i++) {
                if (this._vecs[i].isString() && !fVecDataOut._vecs[i].isString()) {
                    fVecDataOut.enumCol2StrCol(i);
                } else if (!this._vecs[i].isString() && fVecDataOut._vecs[i].isString()) {
                    enumCol2StrCol(i);
                    this._ctypes[i] = 5;
                }
                this._vecs[i].reduce(fVecDataOut._vecs[i]);
            }
            return this;
        }

        @Override // water.parser.Parser.StreamDataOut
        public FVecDataOut close() {
            Futures futures = new Futures();
            close(futures);
            futures.blockForPending();
            return this;
        }

        @Override // water.parser.Parser.StreamDataOut
        public FVecDataOut close(Futures futures) {
            if (this._nvs == null) {
                return this;
            }
            for (NewChunk newChunk : this._nvs) {
                newChunk.close(this._cidx, futures);
            }
            this._nvs = null;
            return this;
        }

        @Override // water.parser.Parser.StreamDataOut
        public FVecDataOut nextChunk() {
            return new FVecDataOut(this._vg, this._cidx + 1, this._nCols, this._vecIdStart, this._enums, null);
        }

        /* JADX INFO: Access modifiers changed from: private */
        public Vec[] closeVecs() {
            Futures futures = new Futures();
            this._closedVecs = true;
            Vec[] vecArr = new Vec[this._vecs.length];
            for (int i = 0; i < this._vecs[0]._espc.length; i++) {
                int i2 = 0;
                while (i2 < this._vecs.length && this._vecs[i2]._espc[i] == 0) {
                    i2++;
                }
                if (i2 == this._vecs.length) {
                    break;
                }
                long j = this._vecs[i2]._espc[i];
                for (AppendableVec appendableVec : this._vecs) {
                    if (appendableVec._espc[i] == 0) {
                        appendableVec._espc[i] = j;
                    } else if (!$assertionsDisabled && appendableVec._espc[i] != j) {
                        throw new AssertionError("incompatible number of lines: " + appendableVec._espc[i] + " != " + j);
                    }
                }
            }
            for (int i3 = 0; i3 < this._vecs.length; i3++) {
                vecArr[i3] = this._vecs[i3].close(futures);
            }
            this._vecs = null;
            futures.blockForPending();
            return vecArr;
        }

        @Override // water.parser.Parser.DataOut
        public void newLine() {
            if (this._col >= 0) {
                this._nLines++;
                for (int i = this._col + 1; i < this._nCols; i++) {
                    addInvalidCol(i);
                }
            }
            this._col = -1;
        }

        @Override // water.parser.Parser.DataOut
        public void addNumCol(int i, long j, int i2) {
            if (i < this._nCols) {
                NewChunk[] newChunkArr = this._nvs;
                this._col = i;
                newChunkArr[i].addNum(j, i2);
                if (this._ctypes[i] == 0) {
                    this._ctypes[i] = 1;
                }
            }
        }

        @Override // water.parser.Parser.DataOut
        public final void addInvalidCol(int i) {
            if (i < this._nCols) {
                NewChunk[] newChunkArr = this._nvs;
                this._col = i;
                newChunkArr[i].addNA();
            }
        }

        @Override // water.parser.Parser.DataOut
        public final boolean isString(int i) {
            return i < this._nCols && (this._ctypes[i] == 2 || this._ctypes[i] == 5);
        }

        @Override // water.parser.Parser.DataOut
        public final void addStrCol(int i, ValueString valueString) {
            if (i < this._nvs.length) {
                if (this._ctypes[i] == 1) {
                    addInvalidCol(i);
                    return;
                }
                if (this._ctypes[i] == 0 && ParseTime.attemptTimeParse(valueString) > 0) {
                    this._ctypes[i] = 3;
                }
                if (this._ctypes[i] == 0) {
                    int i2 = valueString.get_off();
                    ParseTime.attemptUUIDParse0(valueString);
                    ParseTime.attemptUUIDParse1(valueString);
                    if (valueString.get_off() != -1) {
                        this._ctypes[i] = 4;
                    }
                    valueString.setOff(i2);
                }
                if (this._ctypes[i] == 3) {
                    long attemptTimeParse = ParseTime.attemptTimeParse(valueString);
                    if (attemptTimeParse == Long.MIN_VALUE) {
                        addInvalidCol(i);
                        return;
                    }
                    int decodePat = ParseTime.decodePat(attemptTimeParse);
                    addNumCol(i, ParseTime.decodeTime(attemptTimeParse), 0);
                    int[] iArr = this._nvs[this._col]._timCnt;
                    iArr[decodePat] = iArr[decodePat] + 1;
                    return;
                }
                if (this._ctypes[i] == 4) {
                    long attemptUUIDParse0 = ParseTime.attemptUUIDParse0(valueString);
                    long attemptUUIDParse1 = ParseTime.attemptUUIDParse1(valueString);
                    if (valueString.get_off() == -1) {
                        attemptUUIDParse0 = Long.MAX_VALUE;
                        attemptUUIDParse1 = 0;
                    }
                    if (i < this._nCols) {
                        NewChunk[] newChunkArr = this._nvs;
                        this._col = i;
                        newChunkArr[i].addUUID(attemptUUIDParse0, attemptUUIDParse1);
                        return;
                    }
                    return;
                }
                if (this._ctypes[i] == 5) {
                    NewChunk[] newChunkArr2 = this._nvs;
                    this._col = i;
                    newChunkArr2[i].addStr(valueString);
                } else {
                    if (this._enums[i].isMapFull()) {
                        byte[] bArr = this._ctypes;
                        this._col = i;
                        bArr[i] = 5;
                        enumCol2StrCol(i);
                        this._nvs[i].addStr(valueString);
                        return;
                    }
                    Categorical[] categoricalArr = this._enums;
                    this._col = i;
                    int addKey = categoricalArr[i].addKey(valueString);
                    if (this._ctypes[i] == 0 && addKey > 1) {
                        this._ctypes[i] = 2;
                    }
                    this._nvs[i].addEnum(addKey);
                }
            }
        }

        private void enumCol2StrCol(int i) {
            Categorical deepCopy = this._enums[i].deepCopy();
            ValueString[] valueStringArr = new ValueString[deepCopy.maxId() + 1];
            for (ValueString valueString : (ValueString[]) deepCopy._map.keySet().toArray(new ValueString[deepCopy.size()])) {
                valueStringArr[deepCopy._map.get(valueString).intValue() - 1] = valueString;
            }
            this._nvs[i] = this._nvs[i].convertEnum2Str(valueStringArr);
        }

        @Override // water.parser.Parser.DataOut
        public void addNumCol(int i, double d) {
            if (Double.isNaN(d)) {
                addInvalidCol(i);
                return;
            }
            double d2 = d;
            int i2 = 0;
            while (true) {
                long j = (long) d2;
                if (j == d2) {
                    addNumCol(i, j, i2);
                    return;
                } else {
                    d2 *= 10.0d;
                    i2--;
                }
            }
        }

        @Override // water.parser.Parser.DataOut
        public void setColumnNames(String[] strArr) {
        }

        @Override // water.parser.Parser.DataOut
        public final void rollbackLine() {
        }

        @Override // water.parser.Parser.DataOut
        public void invalidLine(String str) {
            newLine();
        }

        static {
            $assertionsDisabled = !ParseDataset.class.desiredAssertionStatus();
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:water/parser/ParseDataset$MultiFileParseTask.class */
    public static class MultiFileParseTask extends MRTask<MultiFileParseTask> {
        private final ParseSetup _setup;
        private final Vec.VectorGroup _vg;
        private final int _vecIdStart;
        private static NonBlockingHashMap<Key, Categorical[]> _enums;
        private final Key _eKey = Key.make();
        private final boolean _delete_on_done;
        private int[] _chunk2Enum;
        private final Key _job_key;
        private final int[] _fileChunkOffsets;
        FVecDataOut _dout;
        String[] _errors;
        static final /* synthetic */ boolean $assertionsDisabled;

        /* loaded from: input_file:water/parser/ParseDataset$MultiFileParseTask$DParse.class */
        private static class DParse extends MRTask<DParse> {
            private final ParseSetup _setup;
            private final int _vecIdStart;
            private final int _startChunkIdx;
            private final Vec.VectorGroup _vg;
            private FVecDataOut _dout;
            private final Key _eKey;
            private final Key _job_key;
            private final transient MultiFileParseTask _outerMFPT;
            private final transient Key _srckey;
            private transient NonBlockingSetInt _visited;

            DParse(Vec.VectorGroup vectorGroup, ParseSetup parseSetup, int i, int i2, MultiFileParseTask multiFileParseTask, Key key) {
                super(multiFileParseTask);
                this._vg = vectorGroup;
                this._setup = parseSetup;
                this._vecIdStart = i;
                this._startChunkIdx = i2;
                this._outerMFPT = multiFileParseTask;
                this._eKey = multiFileParseTask._eKey;
                this._job_key = multiFileParseTask._job_key;
                this._srckey = key;
            }

            @Override // water.MRTask
            public void setupLocal() {
                super.setupLocal();
                this._visited = new NonBlockingSetInt();
            }

            @Override // water.MRTask
            public void map(Chunk chunk) {
                Parser sVMLightParser;
                FVecDataOut sVMLightFVecDataOut;
                Categorical[] enums = MultiFileParseTask.enums(this._eKey, this._setup._ncols);
                Parser.DataIn fVecDataIn = new FVecDataIn(chunk);
                switch (this._setup._pType) {
                    case CSV:
                        sVMLightParser = new CsvParser(this._setup);
                        sVMLightFVecDataOut = new FVecDataOut(this._vg, this._startChunkIdx + chunk.cidx(), this._setup._ncols, this._vecIdStart, enums, null);
                        break;
                    case ARFF:
                        sVMLightParser = new CsvParser(this._setup);
                        sVMLightFVecDataOut = new FVecDataOut(this._vg, this._startChunkIdx + chunk.cidx(), this._setup._ncols, this._vecIdStart, enums, this._setup._ctypes);
                        break;
                    case SVMLight:
                        sVMLightParser = new SVMLightParser(this._setup);
                        sVMLightFVecDataOut = new SVMLightFVecDataOut(this._vg, this._startChunkIdx + chunk.cidx(), enums);
                        break;
                    default:
                        throw H2O.unimpl();
                }
                sVMLightParser.parallelParse(chunk.cidx(), fVecDataIn, sVMLightFVecDataOut);
                FVecDataOut fVecDataOut = sVMLightFVecDataOut;
                this._dout = fVecDataOut;
                fVecDataOut.close(this._fs);
                Job.update(chunk._len, this._job_key);
                freeMem(chunk, 0);
                freeMem(chunk, 1);
            }

            private void freeMem(Chunk chunk, int i) {
                Value value;
                int cidx = chunk.cidx() + i;
                if (this._visited.add(cidx) || (value = H2O.get(chunk.vec().chunkKey(cidx))) == null || !value.isPersisted()) {
                    return;
                }
                value.freePOJO();
                value.freeMem();
            }

            @Override // water.MRTask
            public void reduce(DParse dParse) {
                this._dout.reduce((Parser.StreamDataOut) dParse._dout);
            }

            @Override // water.MRTask
            public void postGlobal() {
                super.postGlobal();
                this._outerMFPT._dout = this._dout;
                this._dout = null;
                Value value = DKV.get(this._srckey);
                if (value == null) {
                    return;
                }
                Iced iced = value.get();
                if (iced instanceof ByteVec) {
                    if (this._outerMFPT._delete_on_done) {
                        ((ByteVec) iced).remove();
                        return;
                    }
                    return;
                }
                Frame frame = (Frame) iced;
                if (this._outerMFPT._delete_on_done) {
                    frame.delete(this._outerMFPT._job_key, new Futures()).blockForPending();
                } else if (frame._key != null) {
                    frame.unlock(this._outerMFPT._job_key);
                }
            }
        }

        MultiFileParseTask(Vec.VectorGroup vectorGroup, ParseSetup parseSetup, Key key, Key[] keyArr, boolean z) {
            this._vg = vectorGroup;
            this._setup = parseSetup;
            this._vecIdStart = this._vg.reserveKeys(this._setup._pType == ParserType.SVMLight ? 100000000 : parseSetup._ncols);
            this._delete_on_done = z;
            this._job_key = key;
            this._fileChunkOffsets = new int[keyArr.length];
            int i = 0;
            for (int i2 = 0; i2 < keyArr.length; i2++) {
                this._fileChunkOffsets[i2] = i;
                i += ParseDataset.getByteVec(keyArr[i2]).nChunks();
            }
            this._chunk2Enum = MemoryManager.malloc4(i);
            Arrays.fill(this._chunk2Enum, -1);
        }

        /* JADX INFO: Access modifiers changed from: private */
        public static Categorical[] enums(Key key, int i) {
            Categorical[] categoricalArr = _enums.get(key);
            if (categoricalArr != null) {
                return categoricalArr;
            }
            Categorical[] categoricalArr2 = new Categorical[i];
            for (int i2 = 0; i2 < categoricalArr2.length; i2++) {
                categoricalArr2[i2] = new Categorical();
            }
            _enums.putIfAbsent(key, categoricalArr2);
            return _enums.get(key);
        }

        private void chunksAreLocal(Vec vec, int i, Key key) {
            for (int i2 = 0; i2 < vec.nChunks(); i2++) {
                this._chunk2Enum[i + i2] = H2O.SELF.index();
            }
            Iced iced = DKV.get(key).get();
            if (iced == vec) {
                if (this._delete_on_done) {
                    vec.remove();
                    return;
                }
                return;
            }
            Frame frame = (Frame) iced;
            if (this._delete_on_done) {
                frame.delete(this._job_key, new Futures()).blockForPending();
            } else if (frame._key != null) {
                frame.unlock(this._job_key);
            }
        }

        @Override // water.MRTask
        public void map(Key key) {
            ByteVec byteVec = ParseDataset.getByteVec(key);
            int i = this._fileChunkOffsets[this._lo];
            byte[] firstBytes = byteVec.getFirstBytes();
            ZipUtil.Compression guessCompressionMethod = ZipUtil.guessCompressionMethod(firstBytes);
            ParseSetup guessSetup = this._setup.guessSetup(ZipUtil.unzipBytes(firstBytes, guessCompressionMethod), 0);
            if (!guessSetup._isValid) {
                this._errors = guessSetup._errors;
                chunksAreLocal(byteVec, i, key);
                return;
            }
            try {
                switch (guessCompressionMethod) {
                    case NONE:
                        if (guessSetup._pType._parallelParseSupported) {
                            DParse dParse = new DParse(this._vg, guessSetup, this._vecIdStart, i, this, key);
                            addToPendingCount(1);
                            dParse.setCompleter(this);
                            dParse.asyncExec(byteVec);
                            for (int i2 = 0; i2 < byteVec.nChunks(); i2++) {
                                this._chunk2Enum[i + i2] = byteVec.chunkKey(i2).home_node().index();
                            }
                            break;
                        } else {
                            InputStream openStream = byteVec.openStream(this._job_key);
                            this._dout = streamParse(openStream, guessSetup, this._vecIdStart, i, openStream);
                            chunksAreLocal(byteVec, i, key);
                            break;
                        }
                    case ZIP:
                        InputStream openStream2 = byteVec.openStream(this._job_key);
                        ZipInputStream zipInputStream = new ZipInputStream(openStream2);
                        ZipEntry nextEntry = zipInputStream.getNextEntry();
                        if (nextEntry == null || nextEntry.isDirectory()) {
                            zipInputStream.close();
                        } else {
                            this._dout = streamParse(zipInputStream, guessSetup, this._vecIdStart, i, openStream2);
                        }
                        chunksAreLocal(byteVec, i, key);
                        break;
                    case GZIP:
                        InputStream openStream3 = byteVec.openStream(this._job_key);
                        this._dout = streamParse(new GZIPInputStream(openStream3), guessSetup, this._vecIdStart, i, openStream3);
                        chunksAreLocal(byteVec, i, key);
                        break;
                }
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
        }

        @Override // water.MRTask
        public void reduce(MultiFileParseTask multiFileParseTask) {
            if (!$assertionsDisabled && this == multiFileParseTask) {
                throw new AssertionError();
            }
            if (this._dout == null) {
                this._dout = multiFileParseTask._dout;
            } else {
                this._dout.reduce((Parser.StreamDataOut) multiFileParseTask._dout);
            }
            if (this._chunk2Enum == null) {
                this._chunk2Enum = multiFileParseTask._chunk2Enum;
            } else if (this._chunk2Enum != multiFileParseTask._chunk2Enum) {
                for (int i = 0; i < this._chunk2Enum.length; i++) {
                    if (this._chunk2Enum[i] == -1) {
                        this._chunk2Enum[i] = multiFileParseTask._chunk2Enum[i];
                    } else if (!$assertionsDisabled && multiFileParseTask._chunk2Enum[i] != -1) {
                        throw new AssertionError(Arrays.toString(this._chunk2Enum) + " :: " + Arrays.toString(multiFileParseTask._chunk2Enum));
                    }
                }
            }
            this._errors = ArrayUtils.append(this._errors, multiFileParseTask._errors);
        }

        private FVecDataOut streamParse(InputStream inputStream, ParseSetup parseSetup, int i, int i2, InputStream inputStream2) throws IOException {
            FVecDataOut fVecDataOut = new FVecDataOut(this._vg, i2, parseSetup._ncols, i, enums(this._eKey, this._setup._ncols), null);
            Parser parser = parseSetup.parser();
            if (parseSetup._pType._parallelParseSupported) {
                parser.streamParseZip(inputStream, fVecDataOut, inputStream2);
            } else {
                parser.streamParse(inputStream, fVecDataOut);
            }
            fVecDataOut.close(this._fs);
            return fVecDataOut;
        }

        /* JADX INFO: Access modifiers changed from: private */
        public Futures onExceptionCleanup(Futures futures) {
            int length = this._chunk2Enum.length;
            int i = this._setup._ncols;
            for (int i2 = 0; i2 < i; i2++) {
                Key vecKey = this._vg.vecKey(this._vecIdStart + i2);
                Keyed.remove(vecKey, futures);
                for (int i3 = 0; i3 < length; i3++) {
                    DKV.remove(Vec.chunkKey(vecKey, i3), futures);
                }
            }
            cancel(true);
            return futures;
        }

        static {
            $assertionsDisabled = !ParseDataset.class.desiredAssertionStatus();
            _enums = new NonBlockingHashMap<>();
        }
    }

    /* loaded from: input_file:water/parser/ParseDataset$ParserFJTask.class */
    public static class ParserFJTask extends H2O.H2OCountedCompleter {
        final ParseDataset _job;
        final Key[] _keys;
        final ParseSetup _setup;
        final boolean _delete_on_done;

        public ParserFJTask(ParseDataset parseDataset, Key[] keyArr, ParseSetup parseSetup, boolean z) {
            this._job = parseDataset;
            this._keys = keyArr;
            this._setup = parseSetup;
            this._delete_on_done = z;
        }

        @Override // water.H2O.H2OCountedCompleter
        public void compute2() {
            ParseDataset.parse_impl(this._job, this._keys, this._setup, this._delete_on_done);
            tryComplete();
        }

        @Override // water.H2O.H2OCountedCompleter, jsr166y.CountedCompleter
        public boolean onExceptionalCompletion(Throwable th, CountedCompleter countedCompleter) {
            if (this._job == null) {
                return true;
            }
            this._job.cancel2(th);
            return true;
        }

        @Override // jsr166y.CountedCompleter
        public void onCompletion(CountedCompleter countedCompleter) {
            this._job.done();
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:water/parser/ParseDataset$SVFTask.class */
    public static class SVFTask extends MRTask<SVFTask> {
        private final Frame _f;

        private SVFTask(Frame frame) {
            this._f = frame;
        }

        @Override // water.MRTask
        public void map(Key key) {
            Vec anyVec = this._f.anyVec();
            for (int i = 0; i < anyVec.nChunks(); i++) {
                if (anyVec.chunkKey(i).home()) {
                    int i2 = 0;
                    Vec[] vecs = this._f.vecs();
                    int length = vecs.length;
                    int i3 = 0;
                    while (true) {
                        if (i3 >= length) {
                            break;
                        }
                        Value value = H2O.get(vecs[i3].chunkKey(i));
                        if (value != null) {
                            i2 = ((Chunk) value.get())._len;
                            break;
                        }
                        i3++;
                    }
                    for (Vec vec : this._f.vecs()) {
                        Key chunkKey = vec.chunkKey(i);
                        if (chunkKey.home() && H2O.get(chunkKey) == null) {
                            H2O.putIfMatch(chunkKey, new Value(chunkKey, (Freezable) new C0DChunk(0.0d, i2)), null);
                        }
                    }
                }
            }
        }
    }

    /* loaded from: input_file:water/parser/ParseDataset$SVMLightFVecDataOut.class */
    private static class SVMLightFVecDataOut extends FVecDataOut {
        protected final Vec.VectorGroup _vg;
        static final /* synthetic */ boolean $assertionsDisabled;

        private SVMLightFVecDataOut(Vec.VectorGroup vectorGroup, int i, Categorical[] categoricalArr) {
            super(vectorGroup, i, 0, vectorGroup.reserveKeys(10000000), categoricalArr, null);
            this._nvs = new NewChunk[0];
            this._vg = vectorGroup;
            this._col = 0;
        }

        private void addColumns(int i) {
            if (i > this._nCols) {
                this._nvs = (NewChunk[]) Arrays.copyOf(this._nvs, i);
                this._vecs = (AppendableVec[]) Arrays.copyOf(this._vecs, i);
                this._ctypes = Arrays.copyOf(this._ctypes, i);
                for (int i2 = this._nCols; i2 < i; i2++) {
                    this._vecs[i2] = new AppendableVec(this._vg.vecKey(this._vecIdStart + i2 + 1));
                    this._nvs[i2] = new NewChunk(this._vecs[i2], this._cidx);
                    for (int i3 = 0; i3 < this._nLines; i3++) {
                        this._nvs[i2].addNum(0L, 0);
                    }
                }
                this._nCols = i;
            }
        }

        @Override // water.parser.ParseDataset.FVecDataOut, water.parser.Parser.DataOut
        public void addNumCol(int i, long j, int i2) {
            if (!$assertionsDisabled && i < this._col) {
                throw new AssertionError();
            }
            addColumns(i + 1);
            for (int i3 = this._col; i3 < i; i3++) {
                super.addNumCol(i3, 0L, 0);
            }
            super.addNumCol(i, j, i2);
            this._col = i + 1;
        }

        @Override // water.parser.ParseDataset.FVecDataOut, water.parser.Parser.DataOut
        public void newLine() {
            if (this._col < this._nCols) {
                addNumCol(this._nCols - 1, 0L, 0);
            }
            super.newLine();
            this._col = 0;
        }

        static {
            $assertionsDisabled = !ParseDataset.class.desiredAssertionStatus();
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:water/parser/ParseDataset$UnifyStrVecTask.class */
    public static class UnifyStrVecTask extends MRTask<UnifyStrVecTask> {
        private UnifyStrVecTask() {
        }

        @Override // water.MRTask
        public void map(Chunk[] chunkArr) {
            for (Chunk chunk : chunkArr) {
                Vec vec = chunk.vec();
                if (vec.isString() && (chunk instanceof C4Chunk)) {
                    Key chunkKey = vec.chunkKey(chunk.cidx());
                    NewChunk newChunk = new NewChunk(vec, chunk.cidx());
                    for (int i = 0; i < chunk._len; i++) {
                        if (chunk.isNA(i)) {
                            newChunk.addNA();
                        } else {
                            newChunk.addStr(new ValueString(vec.domain()[(int) chunk.at8(i)]));
                        }
                    }
                    H2O.putIfMatch(chunkKey, new Value(chunkKey, (Freezable) newChunk.new_close()), H2O.get(chunkKey));
                }
            }
        }
    }

    public static Frame parse(Key key, Key... keyArr) {
        return parse(key, keyArr, true, false, 0);
    }

    public static Frame parse(Key key, Key[] keyArr, boolean z, boolean z2, int i) {
        return parse(key, keyArr, z, setup(keyArr[0], z2, i));
    }

    public static Frame parse(Key key, Key[] keyArr, boolean z, ParseSetup parseSetup) {
        return parse(key, keyArr, z, parseSetup, true).get();
    }

    public static ParseDataset parse(Key key, Key[] keyArr, boolean z, ParseSetup parseSetup, boolean z2) {
        ParseDataset forkParseDataset = forkParseDataset(key, keyArr, parseSetup, z);
        if (z2) {
            try {
                forkParseDataset.get();
            } catch (Throwable th) {
                Futures futures = new Futures();
                if (forkParseDataset != null) {
                    Keyed.remove(forkParseDataset._dest, futures);
                    if (forkParseDataset._mfpt != null) {
                        forkParseDataset._mfpt.onExceptionCleanup(futures);
                    }
                }
                for (Key key2 : keyArr) {
                    Keyed.remove(key2, futures);
                }
                futures.blockForPending();
                if ($assertionsDisabled || ((Job) DKV.getGet(forkParseDataset._key)).isStopped()) {
                    throw th;
                }
                throw new AssertionError();
            }
        }
        return forkParseDataset;
    }

    private static ParseSetup setup(Key key, boolean z, int i) {
        ParseSetup guessSetup = ParseSetup.guessSetup(ZipUtil.getFirstUnzippedBytes(getByteVec(key)), z, i);
        if (guessSetup._ncols <= 0) {
            throw new UnsupportedOperationException(guessSetup.toString());
        }
        return guessSetup;
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public static ByteVec getByteVec(Key key) {
        Iced iced = DKV.get(key).get();
        return (ByteVec) (iced instanceof ByteVec ? iced : ((Frame) iced).vecs()[0]);
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public static String[] genericColumnNames(int i) {
        String[] strArr = new String[i];
        for (int i2 = 0; i2 < strArr.length; i2++) {
            strArr[i2] = "C" + String.valueOf(i2 + 1);
        }
        return strArr;
    }

    public static ParseDataset forkParseDataset(Key key, Key[] keyArr, ParseSetup parseSetup, boolean z) {
        Iterator<String> it = parseSetup.checkDupColumnNames().iterator();
        if (it.hasNext()) {
            throw new IllegalArgumentException("Found duplicate column name " + it.next());
        }
        long j = 0;
        for (int i = 0; i < keyArr.length; i++) {
            Key key2 = keyArr[i];
            if (key.equals(key2)) {
                throw new IllegalArgumentException("Destination key " + key + " must be different from all sources");
            }
            if (z) {
                for (int i2 = i + 1; i2 < keyArr.length; i2++) {
                    if (key2 == keyArr[i2]) {
                        throw new IllegalArgumentException("Source key " + key2 + " appears twice, delete_on_done must be false");
                    }
                }
            }
            j += getByteVec(key2).length();
        }
        long memsz = H2O.CLOUD.memsz();
        if (j > memsz * 4) {
            throw new IllegalArgumentException("Total input file size of " + PrettyPrint.bytes(j) + " is much larger than total cluster memory of " + PrettyPrint.bytes(memsz) + ", please use either a larger cluster or smaller data.");
        }
        ParseDataset parseDataset = new ParseDataset(key);
        new Frame(parseDataset.dest(), new String[0], new Vec[0]).delete_and_lock(parseDataset._key);
        for (Key key3 : keyArr) {
            Lockable.read_lock(key3, parseDataset._key);
        }
        parseDataset.start(new ParserFJTask(parseDataset, keyArr, parseSetup, z), j);
        return parseDataset;
    }

    private ParseDataset(Key key) {
        super(key, "Parse");
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* JADX WARN: Multi-variable type inference failed */
    /* JADX WARN: Type inference failed for: r0v102, types: [int[], int[][]] */
    /* JADX WARN: Type inference failed for: r0v60, types: [water.parser.ValueString[], water.parser.ValueString[][]] */
    public static void parse_impl(ParseDataset parseDataset, Key[] keyArr, ParseSetup parseSetup, boolean z) {
        Frame frame;
        if (!$assertionsDisabled && parseSetup._ncols <= 0) {
            throw new AssertionError();
        }
        if (keyArr.length == 0) {
            parseDataset.cancel();
            return;
        }
        MultiFileParseTask multiFileParseTask = new MultiFileParseTask(getByteVec(keyArr[0]).group(), parseSetup, parseDataset._key, keyArr, z);
        parseDataset._mfpt = multiFileParseTask;
        multiFileParseTask.doAll(keyArr);
        int i = 0;
        int[] iArr = new int[multiFileParseTask._dout._nCols];
        for (int i2 = 0; i2 < iArr.length; i2++) {
            if (multiFileParseTask._dout._vecs[i2].shouldBeEnum()) {
                int i3 = i;
                i++;
                iArr[i3] = i2;
            }
        }
        int[] copyOf = Arrays.copyOf(iArr, i);
        if (i > 0) {
            EnumFetchTask doAllNodes = new EnumFetchTask(multiFileParseTask._eKey, copyOf).doAllNodes();
            Categorical[] categoricalArr = doAllNodes._gEnums;
            ?? r0 = new ValueString[copyOf.length];
            EnumMapping[] enumMappingArr = new EnumMapping[H2O.CLOUD.size()];
            int i4 = 0;
            for (int i5 : copyOf) {
                AppendableVec appendableVec = multiFileParseTask._dout._vecs[i5];
                int i6 = i4;
                i4++;
                ValueString[] computeColumnDomain = categoricalArr[i5].computeColumnDomain();
                r0[i6] = computeColumnDomain;
                appendableVec.setDomain(ValueString.toString(computeColumnDomain));
            }
            for (int i7 = 0; i7 < H2O.CLOUD.size(); i7++) {
                if (doAllNodes._lEnums[i7] != null) {
                    ?? r02 = new int[copyOf.length];
                    for (int i8 = 0; i8 < copyOf.length; i8++) {
                        Categorical categorical = doAllNodes._lEnums[i7][copyOf[i8]];
                        if (categorical != null) {
                            r02[i8] = MemoryManager.malloc4(categorical.maxId() + 1);
                            Arrays.fill(r02[i8], -1);
                            for (int i9 = 0; i9 < r0[i8].length; i9++) {
                                ValueString valueString = r0[i8][i9];
                                if (categorical.containsKey(valueString)) {
                                    if (!$assertionsDisabled && categorical.getTokenId(valueString) > categorical.maxId()) {
                                        throw new AssertionError("maxIdx = " + categorical.maxId() + ", got " + categorical.getTokenId(valueString));
                                    }
                                    r02[i8][categorical.getTokenId(valueString)] = i9;
                                }
                            }
                        }
                    }
                    enumMappingArr[i7] = new EnumMapping(r02);
                }
            }
            frame = new Frame(parseDataset.dest(), parseSetup._columnNames != null ? parseSetup._columnNames : genericColumnNames(multiFileParseTask._dout._nCols), multiFileParseTask._dout.closeVecs());
            Vec[] vecs = frame.vecs();
            int i10 = 0;
            for (int i11 = 0; i11 < copyOf.length; i11++) {
                if (vecs[copyOf[i11]].isEnum()) {
                    copyOf[i10] = copyOf[i11];
                    r0[i10] = r0[i11];
                    for (int i12 = 0; i12 < enumMappingArr.length; i12++) {
                        if (enumMappingArr[i12] != null) {
                            enumMappingArr[i12].map[i10] = enumMappingArr[i12].map[i11];
                        }
                    }
                    i10++;
                }
            }
            Vec[] vecArr = new Vec[i10];
            for (int i13 = 0; i13 < vecArr.length; i13++) {
                vecArr[i13] = frame.vecs()[copyOf[i13]];
            }
            new EnumUpdateTask(r0, enumMappingArr, multiFileParseTask._chunk2Enum).doAll(vecArr);
        } else {
            frame = new Frame(parseDataset.dest(), parseSetup._columnNames != null ? parseSetup._columnNames : genericColumnNames(multiFileParseTask._dout._nCols), multiFileParseTask._dout.closeVecs());
        }
        new SVFTask(frame).doAllNodes();
        new UnifyStrVecTask().doAll(frame);
        if (multiFileParseTask._errors != null) {
            for (String str : multiFileParseTask._errors) {
                Log.warn(str);
            }
        }
        logParseResults(parseDataset, frame);
        frame.unlock(parseDataset._key);
        if (z) {
            for (Key key : keyArr) {
                if (!$assertionsDisabled && DKV.get(key) != null) {
                    throw new AssertionError("Input key " + key + " not deleted during parse");
                }
            }
        }
    }

    private static void logParseResults(ParseDataset parseDataset, Frame frame) {
        boolean z;
        try {
            Log.info("Parse result for " + parseDataset.dest() + " (" + Long.toString(frame.anyVec().length()) + " rows):");
            Futures futures = new Futures();
            Vec[] vecs = frame.vecs();
            for (Vec vec : vecs) {
                vec.startRollupStats(futures);
            }
            futures.blockForPending();
            int i = 0;
            for (String str : frame.names()) {
                i = Math.max(i, str.length());
            }
            String str2 = " %" + i + "s %11s %12s %12s %11s %8s %6s";
            Log.info(String.format(str2, "ColV2", "type", "min", "max", "NAs", "constant", "numLevels"));
            for (int i2 = 0; i2 < vecs.length; i2++) {
                Vec vec2 = vecs[i2];
                boolean isEnum = vec2.isEnum();
                boolean isConst = vec2.isConst();
                boolean isString = vec2.isString();
                String format = String.format("%" + i + "s:", frame.names()[i2]);
                Object[] objArr = new Object[1];
                objArr[0] = vec2.isUUID() ? "UUID" : isEnum ? "categorical" : isString ? "string" : "numeric";
                String format2 = String.format("%s", objArr);
                String format3 = isString ? "" : String.format("%g", Double.valueOf(vec2.min()));
                String format4 = isString ? "" : String.format("%g", Double.valueOf(vec2.max()));
                long naCnt = vec2.naCnt();
                String format5 = naCnt > 0 ? String.format("%d", Long.valueOf(naCnt)) : "";
                String str3 = isConst ? "constant" : "";
                String format6 = isEnum ? String.format("%d", Integer.valueOf(vec2.domain().length)) : isString ? String.format("%d", Long.valueOf(vec2.nzCnt())) : "";
                boolean z2 = false;
                if (vecs.length <= 20) {
                    z = true;
                } else if (i2 < 10) {
                    z = true;
                } else if (i2 == 10) {
                    z2 = true;
                    z = false;
                } else {
                    z = i2 + 10 >= vecs.length;
                }
                if (z2) {
                    System.out.println("Additional column information only sent to log file...");
                }
                String format7 = String.format(str2, format, format2, format3, format4, format5, str3, format6);
                if (z) {
                    Log.info(format7);
                } else {
                    Log.info_no_stdout(format7);
                }
            }
            Log.info(FrameUtils.chunkSummary(frame).toString());
        } catch (Exception e) {
        }
    }

    static {
        $assertionsDisabled = !ParseDataset.class.desiredAssertionStatus();
    }
}
