package org.apache.sysds.runtime.io;

import java.io.IOException;
import java.io.InputStream;
import java.util.HashSet;
import java.util.Set;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.InputFormat;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.sysds.common.Types;
import org.apache.sysds.conf.ConfigurationManager;
import org.apache.sysds.parser.DataExpression;
import org.apache.sysds.runtime.DMLRuntimeException;
import org.apache.sysds.runtime.frame.data.FrameBlock;
import org.apache.sysds.runtime.matrix.data.Pair;
import org.apache.sysds.runtime.transform.TfUtils;
import org.apache.sysds.runtime.util.InputStreamInputFormat;

/* loaded from: input_file:org/apache/sysds/runtime/io/FrameReaderTextCSV.class */
public class FrameReaderTextCSV extends FrameReader {
    protected final FileFormatPropertiesCSV _props;

    public FrameReaderTextCSV(FileFormatPropertiesCSV fileFormatPropertiesCSV) {
        this._props = fileFormatPropertiesCSV != null ? fileFormatPropertiesCSV : new FileFormatPropertiesCSV();
    }

    @Override // org.apache.sysds.runtime.io.FrameReader
    public final FrameBlock readFrameFromHDFS(String str, Types.ValueType[] valueTypeArr, String[] strArr, long j, long j2) throws IOException, DMLRuntimeException {
        LOG.debug("readFrameFromHDFS csv");
        JobConf jobConf = new JobConf(ConfigurationManager.getCachedJobConf());
        Path path = new Path(str);
        FileSystem fileSystem = IOUtilFunctions.getFileSystem(path, jobConf);
        FileInputFormat.addInputPath(jobConf, path);
        checkValidInputFile(fileSystem, path);
        if (j <= 0 || j2 <= 0) {
            Pair<Integer, Integer> computeCSVSize = computeCSVSize(path, jobConf, fileSystem);
            j = computeCSVSize.getKey().intValue();
            j2 = computeCSVSize.getValue().intValue();
        }
        Types.ValueType[] createOutputSchema = createOutputSchema(valueTypeArr, j2);
        String[] createOutputNames = createOutputNames(strArr, j2);
        FrameBlock createOutputFrameBlock = createOutputFrameBlock(createOutputSchema, createOutputNames, j);
        readCSVFrameFromHDFS(path, jobConf, fileSystem, createOutputFrameBlock, createOutputSchema, createOutputNames, j, j2);
        return createOutputFrameBlock;
    }

    @Override // org.apache.sysds.runtime.io.FrameReader
    public FrameBlock readFrameFromInputStream(InputStream inputStream, Types.ValueType[] valueTypeArr, String[] strArr, long j, long j2) throws IOException, DMLRuntimeException {
        FrameBlock createOutputFrameBlock = createOutputFrameBlock(createOutputSchema(valueTypeArr, j2), createOutputNames(strArr, j2), j);
        InputStreamInputFormat inputStreamInputFormat = new InputStreamInputFormat(inputStream);
        readCSVFrameFromInputSplit(inputStreamInputFormat.getSplits(null, 1)[0], inputStreamInputFormat, null, createOutputFrameBlock, valueTypeArr, strArr, j, j2, 0, true);
        return createOutputFrameBlock;
    }

    protected void readCSVFrameFromHDFS(Path path, JobConf jobConf, FileSystem fileSystem, FrameBlock frameBlock, Types.ValueType[] valueTypeArr, String[] strArr, long j, long j2) throws IOException {
        LOG.debug("readCSVFrameFromHDFS csv");
        TextInputFormat textInputFormat = new TextInputFormat();
        textInputFormat.configure(jobConf);
        InputSplit[] sortInputSplits = IOUtilFunctions.sortInputSplits(textInputFormat.getSplits(jobConf, 1));
        int i = 0;
        int i2 = 0;
        while (i < sortInputSplits.length) {
            i2 = readCSVFrameFromInputSplit(sortInputSplits[i], textInputFormat, jobConf, frameBlock, valueTypeArr, strArr, j, j2, i2, i == 0);
            i++;
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public final int readCSVFrameFromInputSplit(InputSplit inputSplit, InputFormat<LongWritable, Text> inputFormat, JobConf jobConf, FrameBlock frameBlock, Types.ValueType[] valueTypeArr, String[] strArr, long j, long j2, int i, boolean z) throws IOException {
        if (i > j) {
            throw new DMLRuntimeException("Invalid offset");
        }
        boolean hasHeader = this._props.hasHeader();
        boolean isFill = this._props.isFill();
        double fillValue = this._props.getFillValue();
        String valueOf = String.valueOf(this._props.getFillValue());
        HashSet<String> nAStrings = this._props.getNAStrings();
        String delim = this._props.getDelim();
        RecordReader recordReader = inputFormat.getRecordReader(inputSplit, jobConf, Reporter.NULL);
        LongWritable longWritable = new LongWritable();
        Text text = new Text();
        int i2 = i;
        int numColumns = frameBlock.getNumColumns();
        if (z && hasHeader) {
            recordReader.next(longWritable, text);
            frameBlock.setColumnNames(text.toString().split(delim));
        }
        String[] strArr2 = null;
        while (recordReader.next(longWritable, text)) {
            try {
                try {
                    String trim = IOUtilFunctions.trim(text.toString());
                    strArr2 = IOUtilFunctions.splitCSV(trim, delim, strArr2);
                    boolean equals = strArr2[0].equals(TfUtils.TXMTD_MVPREFIX);
                    boolean equals2 = strArr2[0].equals(TfUtils.TXMTD_NDPREFIX);
                    if (!equals && !equals2) {
                        assignColumns(i2, numColumns, frameBlock, strArr2, nAStrings, isFill, fillValue, valueOf);
                        IOUtilFunctions.checkAndRaiseErrorCSVEmptyField(trim, isFill, false);
                        IOUtilFunctions.checkAndRaiseErrorCSVNumColumns("", trim, strArr2, j2);
                        i2++;
                    } else if (strArr2.length != frameBlock.getNumColumns() + 1) {
                        LOG.warn("Invalid metadata ");
                        strArr2 = null;
                    } else {
                        if (equals) {
                            for (int i3 = 0; i3 < frameBlock.getNumColumns(); i3++) {
                                frameBlock.getColumnMetadata(i3).setMvValue(strArr2[i3 + 1]);
                            }
                        } else if (equals2) {
                            for (int i4 = 0; i4 < frameBlock.getNumColumns(); i4++) {
                                frameBlock.getColumnMetadata(i4).setNumDistinct(Long.parseLong(strArr2[i4 + 1]));
                            }
                        }
                        strArr2 = null;
                    }
                } catch (Exception e) {
                    throw new DMLRuntimeException("Failed parsing string: \"" + text + "\"", e);
                }
            } finally {
                IOUtilFunctions.closeSilently((RecordReader<?, ?>) recordReader);
            }
        }
        return i2;
    }

    private boolean assignColumns(int i, int i2, FrameBlock frameBlock, String[] strArr, Set<String> set, boolean z, double d, String str) {
        return (z || set != null) ? assignColumnsGeneric(i, i2, frameBlock, strArr, set, z, d, str) : assignColumnsNoFillNoNan(i, i2, frameBlock, strArr);
    }

    private boolean assignColumnsGeneric(int i, int i2, FrameBlock frameBlock, String[] strArr, Set<String> set, boolean z, double d, String str) {
        boolean z2 = false;
        for (int i3 = 0; i3 < i2; i3++) {
            String trim = IOUtilFunctions.trim(strArr[i3]);
            if (trim.isEmpty() || (set != null && set.contains(trim))) {
                if (z && d != DataExpression.DEFAULT_DELIM_FILL_VALUE) {
                    frameBlock.set(i, i3, str);
                }
                z2 = true;
            } else {
                frameBlock.set(i, i3, trim);
            }
        }
        return z2;
    }

    private boolean assignColumnsNoFillNoNan(int i, int i2, FrameBlock frameBlock, String[] strArr) {
        boolean z = false;
        for (int i3 = 0; i3 < i2; i3++) {
            String trim = IOUtilFunctions.trim(strArr[i3]);
            if (trim.isEmpty()) {
                z = true;
            } else {
                frameBlock.set(i, i3, trim);
            }
        }
        return z;
    }

    protected Pair<Integer, Integer> computeCSVSize(Path path, JobConf jobConf, FileSystem fileSystem) throws IOException {
        TextInputFormat textInputFormat = new TextInputFormat();
        textInputFormat.configure(jobConf);
        InputSplit[] sortInputSplits = IOUtilFunctions.sortInputSplits(textInputFormat.getSplits(jobConf, 1));
        int countNumColumnsCSV = IOUtilFunctions.countNumColumnsCSV(sortInputSplits, textInputFormat, jobConf, this._props.getDelim());
        int i = 0;
        int i2 = 0;
        while (i2 < sortInputSplits.length) {
            i += countLinesInReader(sortInputSplits[i2], textInputFormat, jobConf, countNumColumnsCSV, i2 == 0 && this._props.hasHeader());
            i2++;
        }
        return new Pair<>(Integer.valueOf(i), Integer.valueOf(countNumColumnsCSV));
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public static int countLinesInReader(InputSplit inputSplit, TextInputFormat textInputFormat, JobConf jobConf, long j, boolean z) throws IOException {
        RecordReader recordReader = textInputFormat.getRecordReader(inputSplit, jobConf, Reporter.NULL);
        try {
            int countLinesInReader = countLinesInReader(recordReader, j, z);
            IOUtilFunctions.closeSilently((RecordReader<?, ?>) recordReader);
            return countLinesInReader;
        } catch (Throwable th) {
            IOUtilFunctions.closeSilently((RecordReader<?, ?>) recordReader);
            throw th;
        }
    }

    protected static int countLinesInReader(RecordReader<LongWritable, Text> recordReader, long j, boolean z) throws IOException {
        LongWritable longWritable = new LongWritable();
        Text text = new Text();
        int i = 0;
        if (z) {
            try {
                recordReader.next(longWritable, text);
            } finally {
                IOUtilFunctions.closeSilently((RecordReader<?, ?>) recordReader);
            }
        }
        while (recordReader.next(longWritable, text)) {
            i += containsMetaTag(text) ? 0 : 1;
        }
        return i;
    }

    private static final boolean containsMetaTag(Text text) {
        if (text.charAt(0) == 35) {
            return text.find(TfUtils.TXMTD_MVPREFIX) > -1 || text.find(TfUtils.TXMTD_NDPREFIX) > -1;
        }
        return false;
    }
}
