package org.apache.sysds.runtime.io;

import java.io.IOException;
import java.io.InputStream;
import java.util.HashSet;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.InputFormat;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.sysds.common.Types;
import org.apache.sysds.conf.ConfigurationManager;
import org.apache.sysds.parser.DataExpression;
import org.apache.sysds.runtime.DMLRuntimeException;
import org.apache.sysds.runtime.matrix.data.FrameBlock;
import org.apache.sysds.runtime.matrix.data.Pair;
import org.apache.sysds.runtime.transform.TfUtils;
import org.apache.sysds.runtime.util.InputStreamInputFormat;
import org.apache.sysds.runtime.util.UtilFunctions;

/* loaded from: input_file:org/apache/sysds/runtime/io/FrameReaderTextCSV.class */
public class FrameReaderTextCSV extends FrameReader {
    protected final FileFormatPropertiesCSV _props;

    public FrameReaderTextCSV(FileFormatPropertiesCSV fileFormatPropertiesCSV) {
        this._props = fileFormatPropertiesCSV != null ? fileFormatPropertiesCSV : new FileFormatPropertiesCSV();
    }

    @Override // org.apache.sysds.runtime.io.FrameReader
    public final FrameBlock readFrameFromHDFS(String str, Types.ValueType[] valueTypeArr, String[] strArr, long j, long j2) throws IOException, DMLRuntimeException {
        LOG.debug("readFrameFromHDFS csv");
        JobConf jobConf = new JobConf(ConfigurationManager.getCachedJobConf());
        Path path = new Path(str);
        FileSystem fileSystem = IOUtilFunctions.getFileSystem(path, jobConf);
        FileInputFormat.addInputPath(jobConf, path);
        checkValidInputFile(fileSystem, path);
        if (j <= 0 || j2 <= 0) {
            Pair<Integer, Integer> computeCSVSize = computeCSVSize(path, jobConf, fileSystem);
            j = computeCSVSize.getKey().intValue();
            j2 = computeCSVSize.getValue().intValue();
        }
        Types.ValueType[] createOutputSchema = createOutputSchema(valueTypeArr, j2);
        String[] createOutputNames = createOutputNames(strArr, j2);
        FrameBlock createOutputFrameBlock = createOutputFrameBlock(createOutputSchema, createOutputNames, j);
        readCSVFrameFromHDFS(path, jobConf, fileSystem, createOutputFrameBlock, createOutputSchema, createOutputNames, j, j2);
        return createOutputFrameBlock;
    }

    @Override // org.apache.sysds.runtime.io.FrameReader
    public FrameBlock readFrameFromInputStream(InputStream inputStream, Types.ValueType[] valueTypeArr, String[] strArr, long j, long j2) throws IOException, DMLRuntimeException {
        LOG.debug("readFrameFromInputStream csv");
        FrameBlock createOutputFrameBlock = createOutputFrameBlock(createOutputSchema(valueTypeArr, j2), createOutputNames(strArr, j2), j);
        InputStreamInputFormat inputStreamInputFormat = new InputStreamInputFormat(inputStream);
        readCSVFrameFromInputSplit(inputStreamInputFormat.getSplits(null, 1)[0], inputStreamInputFormat, null, createOutputFrameBlock, valueTypeArr, strArr, j, j2, 0, true);
        return createOutputFrameBlock;
    }

    protected void readCSVFrameFromHDFS(Path path, JobConf jobConf, FileSystem fileSystem, FrameBlock frameBlock, Types.ValueType[] valueTypeArr, String[] strArr, long j, long j2) throws IOException {
        LOG.debug("readCSVFrameFromHDFS csv");
        TextInputFormat textInputFormat = new TextInputFormat();
        textInputFormat.configure(jobConf);
        InputSplit[] sortInputSplits = IOUtilFunctions.sortInputSplits(textInputFormat.getSplits(jobConf, 1));
        int i = 0;
        int i2 = 0;
        while (i < sortInputSplits.length) {
            i2 = readCSVFrameFromInputSplit(sortInputSplits[i], textInputFormat, jobConf, frameBlock, valueTypeArr, strArr, j, j2, i2, i == 0);
            i++;
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public final int readCSVFrameFromInputSplit(InputSplit inputSplit, InputFormat<LongWritable, Text> inputFormat, JobConf jobConf, FrameBlock frameBlock, Types.ValueType[] valueTypeArr, String[] strArr, long j, long j2, int i, boolean z) throws IOException {
        boolean hasHeader = this._props.hasHeader();
        boolean isFill = this._props.isFill();
        double fillValue = this._props.getFillValue();
        String valueOf = String.valueOf(this._props.getFillValue());
        HashSet<String> nAStrings = this._props.getNAStrings();
        String delim = this._props.getDelim();
        RecordReader recordReader = inputFormat.getRecordReader(inputSplit, jobConf, Reporter.NULL);
        LongWritable longWritable = new LongWritable();
        Text text = new Text();
        int i2 = i;
        if (z && hasHeader) {
            recordReader.next(longWritable, text);
            frameBlock.setColumnNames(text.toString().split(delim));
        }
        while (recordReader.next(longWritable, text)) {
            try {
                String trim = text.toString().trim();
                boolean z2 = false;
                int i3 = 0;
                String[] splitCSV = IOUtilFunctions.splitCSV(trim, delim);
                if (!splitCSV[0].equals(TfUtils.TXMTD_MVPREFIX) && !splitCSV[0].equals(TfUtils.TXMTD_NDPREFIX)) {
                    for (String str : splitCSV) {
                        String trim2 = str.trim();
                        if (trim2.isEmpty() || nAStrings.contains(trim2)) {
                            if (isFill && fillValue != DataExpression.DEFAULT_DELIM_FILL_VALUE) {
                                frameBlock.set(i2, i3, UtilFunctions.stringToObject(valueTypeArr[i3], valueOf));
                            }
                            z2 = true;
                        } else {
                            frameBlock.set(i2, i3, UtilFunctions.stringToObject(valueTypeArr[i3], trim2));
                        }
                        i3++;
                    }
                    IOUtilFunctions.checkAndRaiseErrorCSVEmptyField(trim, isFill, z2);
                    IOUtilFunctions.checkAndRaiseErrorCSVNumColumns("", trim, splitCSV, j2);
                    i2++;
                } else if (splitCSV[0].equals(TfUtils.TXMTD_MVPREFIX)) {
                    for (int i4 = 0; i4 < frameBlock.getNumColumns(); i4++) {
                        frameBlock.getColumnMetadata(i4).setMvValue(splitCSV[i4 + 1]);
                    }
                } else if (splitCSV[0].equals(TfUtils.TXMTD_NDPREFIX)) {
                    for (int i5 = 0; i5 < frameBlock.getNumColumns(); i5++) {
                        frameBlock.getColumnMetadata(i5).setNumDistinct(Long.parseLong(splitCSV[i5 + 1]));
                    }
                }
            } finally {
                IOUtilFunctions.closeSilently((RecordReader<?, ?>) recordReader);
            }
        }
        return i2;
    }

    protected Pair<Integer, Integer> computeCSVSize(Path path, JobConf jobConf, FileSystem fileSystem) throws IOException {
        TextInputFormat textInputFormat = new TextInputFormat();
        textInputFormat.configure(jobConf);
        InputSplit[] sortInputSplits = IOUtilFunctions.sortInputSplits(textInputFormat.getSplits(jobConf, 1));
        int countNumColumnsCSV = IOUtilFunctions.countNumColumnsCSV(sortInputSplits, textInputFormat, jobConf, this._props.getDelim());
        int i = 0;
        for (int i2 = 0; i2 < sortInputSplits.length; i2++) {
            RecordReader recordReader = textInputFormat.getRecordReader(sortInputSplits[i2], jobConf, Reporter.NULL);
            LongWritable longWritable = new LongWritable();
            Text text = new Text();
            if (i2 == 0) {
                try {
                    if (this._props.hasHeader()) {
                        recordReader.next(longWritable, text);
                    }
                } finally {
                    IOUtilFunctions.closeSilently((RecordReader<?, ?>) recordReader);
                }
            }
            while (recordReader.next(longWritable, text)) {
                String text2 = text.toString();
                i += (text2.startsWith(TfUtils.TXMTD_MVPREFIX) || text2.startsWith(TfUtils.TXMTD_NDPREFIX)) ? 0 : 1;
            }
        }
        return new Pair<>(Integer.valueOf(i), Integer.valueOf(countNumColumnsCSV));
    }
}
