package com.zavtech.morpheus.source;

import com.univocity.parsers.common.ParsingContext;
import com.univocity.parsers.common.processor.RowProcessor;
import com.univocity.parsers.csv.CsvParser;
import com.univocity.parsers.csv.CsvParserSettings;
import com.zavtech.morpheus.array.Array;
import com.zavtech.morpheus.frame.DataFrame;
import com.zavtech.morpheus.frame.DataFrameColumns;
import com.zavtech.morpheus.frame.DataFrameContent;
import com.zavtech.morpheus.frame.DataFrameException;
import com.zavtech.morpheus.frame.DataFrameSource;
import com.zavtech.morpheus.index.Index;
import com.zavtech.morpheus.util.Resource;
import com.zavtech.morpheus.util.http.HttpClient;
import com.zavtech.morpheus.util.text.Formats;
import com.zavtech.morpheus.util.text.parser.Parser;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.LinkedTransferQueue;
import java.util.function.Consumer;
import java.util.function.Function;
import java.util.function.Predicate;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import java.util.stream.Stream;

/* loaded from: input_file:com/zavtech/morpheus/source/CsvSource.class */
public class CsvSource<R> extends DataFrameSource<R, String, CsvSourceOptions<R>> {

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:com/zavtech/morpheus/source/CsvSource$CsvRequestHandler.class */
    public class CsvRequestHandler implements RowProcessor, Runnable {
        private int rowCounter;
        private String[] headers;
        private int[] colIndexes;
        private int logBatchSize;
        private String[] rowValues;
        private volatile boolean done;
        private DataBatch<R> batch;
        private Parser<?>[] parsers;
        private CsvSourceOptions<R> options;
        private DataFrame<R, String> frame;
        private CountDownLatch countDownLatch;
        private Predicate<String[]> rowPredicate;
        private Function<String[], R> rowKeyParser;
        private LinkedTransferQueue<DataBatch<R>> queue;
        private final Object lock = new Object();

        CsvRequestHandler(CsvSourceOptions<R> csvSourceOptions) {
            this.options = csvSourceOptions;
            this.rowPredicate = csvSourceOptions.getRowPredicate().orElse(null);
            this.rowKeyParser = csvSourceOptions.getRowKeyParser().orElse(null);
            this.logBatchSize = csvSourceOptions.getLogBatchSize();
            if (csvSourceOptions.isParallel()) {
                this.countDownLatch = new CountDownLatch(1);
                this.queue = new LinkedTransferQueue<>();
                Thread thread = new Thread(this, "DataFrameCsvReaderThread");
                thread.setDaemon(true);
                thread.start();
            }
        }

        public DataFrame<R, String> getFrame() {
            try {
                if (!this.options.isParallel()) {
                    return this.frame;
                }
                this.countDownLatch.await();
                return this.frame;
            } catch (Exception e) {
                throw new DataFrameException("Failed to resolve frame", e);
            }
        }

        private boolean isComplete() {
            boolean z;
            synchronized (this.lock) {
                z = this.done && this.queue.isEmpty();
            }
            return z;
        }

        @Override // java.lang.Runnable
        public void run() {
            while (!isComplete()) {
                try {
                    try {
                        DataBatch<R> take = this.queue.take();
                        if (take != null && take.rowCount() > 0) {
                            processBatch(take);
                        }
                    } catch (Exception e) {
                        throw new DataFrameException("Failed to process CSV data batch", e);
                    }
                } catch (Exception e2) {
                    e2.printStackTrace();
                    return;
                } finally {
                    this.countDownLatch.countDown();
                }
            }
        }

        public void processStarted(ParsingContext parsingContext) {
        }

        public void rowProcessed(String[] strArr, ParsingContext parsingContext) {
            try {
                if (this.batch == null) {
                    initBatch(strArr.length, parsingContext);
                }
                if (this.rowPredicate == null || this.rowPredicate.test(strArr)) {
                    this.rowCounter++;
                    if (this.logBatchSize > 0 && this.rowCounter % this.logBatchSize == 0) {
                        System.out.println("Loaded " + this.rowCounter + " rows...");
                    }
                    for (int i = 0; i < this.colIndexes.length; i++) {
                        int i2 = this.colIndexes[i];
                        this.rowValues[i] = strArr.length > i2 ? strArr[i2] : null;
                    }
                    if (this.rowKeyParser == null) {
                        this.batch.addRow(this.rowCounter - 1, this.rowValues);
                    } else {
                        this.batch.addRow((DataBatch<R>) this.rowKeyParser.apply(strArr), this.rowValues);
                    }
                    if (this.batch.rowCount() == this.options.getReadBatchSize()) {
                        if (this.options.isParallel()) {
                            synchronized (this.lock) {
                                this.queue.add(this.batch);
                                this.batch = new DataBatch<>(this.options, this.colIndexes.length);
                                this.lock.notify();
                            }
                        } else {
                            processBatch(this.batch);
                            this.batch.clear();
                        }
                    }
                }
            } catch (DataFrameException e) {
                throw e;
            } catch (Exception e2) {
                throw new DataFrameException("Failed to parse row: " + Arrays.toString(strArr), e2);
            }
        }

        public void processEnded(ParsingContext parsingContext) {
            try {
                if (this.options.isParallel()) {
                    synchronized (this.lock) {
                        this.done = true;
                        this.batch = this.batch != null ? this.batch : new DataBatch<>(this.options, 0);
                        this.queue.add(this.batch);
                    }
                } else {
                    this.batch = this.batch != null ? this.batch : new DataBatch<>(this.options, 0);
                    processBatch(this.batch);
                }
            } catch (DataFrameException e) {
                throw e;
            } catch (Exception e2) {
                throw new DataFrameException("Failed to process CSV parse end", e2);
            }
        }

        private void initBatch(int i, ParsingContext parsingContext) {
            int initHeader = initHeader(i, parsingContext);
            this.rowValues = new String[initHeader];
            this.batch = new DataBatch<>(this.options, initHeader);
            this.parsers = new Parser[initHeader];
        }

        private int initHeader(int i, ParsingContext parsingContext) {
            this.headers = this.options.isHeader() ? parsingContext.headers() : (String[]) IntStream.range(0, i).mapToObj(i2 -> {
                return "Column-" + i2;
            }).toArray(i3 -> {
                return new String[i3];
            });
            this.headers = (String[]) IntStream.range(0, this.headers.length).mapToObj(i4 -> {
                return this.headers[i4] != null ? this.headers[i4] : "Column-" + i4;
            }).toArray(i5 -> {
                return new String[i5];
            });
            this.colIndexes = IntStream.range(0, this.headers.length).toArray();
            this.options.getColIndexPredicate().ifPresent(predicate -> {
                Map map = (Map) IntStream.range(0, this.headers.length).boxed().collect(Collectors.toMap(num -> {
                    return this.headers[num.intValue()];
                }, num2 -> {
                    return Integer.valueOf(this.colIndexes[num2.intValue()]);
                }));
                this.headers = (String[]) Arrays.stream(this.headers).filter(str -> {
                    return predicate.test(map.get(str));
                }).toArray(i6 -> {
                    return new String[i6];
                });
                Stream stream = Arrays.stream(this.headers);
                map.getClass();
                this.colIndexes = stream.mapToInt((v1) -> {
                    return r2.get(v1);
                }).toArray();
            });
            this.options.getColNamePredicate().ifPresent(predicate2 -> {
                Map map = (Map) IntStream.range(0, this.headers.length).boxed().collect(Collectors.toMap(num -> {
                    return this.headers[num.intValue()];
                }, num2 -> {
                    return Integer.valueOf(this.colIndexes[num2.intValue()]);
                }));
                this.headers = (String[]) Arrays.stream(this.headers).filter(predicate2).toArray(i6 -> {
                    return new String[i6];
                });
                Stream stream = Arrays.stream(this.headers);
                map.getClass();
                this.colIndexes = stream.mapToInt((v1) -> {
                    return r2.get(v1);
                }).toArray();
            });
            this.options.getColumnNameMapping().ifPresent(objectIntBiFunction -> {
                this.headers = (String[]) IntStream.range(0, this.headers.length).mapToObj(i6 -> {
                    return (String) objectIntBiFunction.apply(this.headers[i6], i6);
                }).toArray(i7 -> {
                    return new String[i7];
                });
            });
            return this.colIndexes.length;
        }

        private void initFrame(DataBatch<R> dataBatch) {
            if (this.headers == null) {
                this.frame = DataFrame.of((Iterable) Index.of(this.options.getRowAxisType(), 1), (Iterable) Index.of(String.class, 1), (Class<?>) Object.class);
                return;
            }
            int length = this.headers.length;
            Formats formats = this.options.getFormats();
            this.frame = DataFrame.of((Iterable) Index.of(this.options.getRowAxisType(), this.options.getRowCapacity().orElse(10000).intValue()), (Iterable) Index.of(String.class, length), (Class<?>) Object.class);
            for (int i = 0; i < length; i++) {
                String str = this.headers[i] != null ? this.headers[i] : "Column-" + i;
                try {
                    String[] colData = dataBatch.colData(i);
                    Optional<Parser<?>> parser = CsvSource.getParser(this.options.getFormats(), str);
                    Optional<Class<?>> columnType = getColumnType(str);
                    if (columnType.isPresent()) {
                        Class<?> cls = columnType.get();
                        this.parsers[i] = parser.orElse(formats.getParserOrFail(cls, Object.class));
                        this.frame.cols().add((DataFrameColumns<R, String>) str, cls);
                    } else {
                        Parser<?> orElse = parser.orElse(formats.findParser(colData).orElse(formats.getParserOrFail(String.class)));
                        Set set = (Set) Arrays.stream(colData).map(orElse).filter(obj -> {
                            return obj != null;
                        }).map((v0) -> {
                            return v0.getClass();
                        }).collect(Collectors.toSet());
                        Class<?> cls2 = set.size() == 1 ? (Class) set.iterator().next() : Object.class;
                        this.parsers[i] = orElse;
                        this.frame.cols().add((DataFrameColumns<R, String>) str, cls2);
                    }
                } catch (Exception e) {
                    throw new DataFrameException("Failed to inspect seed values in column: " + str, e);
                }
            }
        }

        private Optional<Class<?>> getColumnType(String str) {
            Optional<Class<?>> columnType = this.options.getColumnType(str);
            if (columnType.isPresent()) {
                return columnType;
            }
            for (Map.Entry<String, Class<?>> entry : this.options.getColTypeMap().entrySet()) {
                if (str.matches(entry.getKey())) {
                    return Optional.of(entry.getValue());
                }
            }
            return Optional.empty();
        }

        private void processBatch(DataBatch<R> dataBatch) {
            int i = -1;
            try {
                if (this.frame == null) {
                    initFrame(dataBatch);
                }
                if (dataBatch.rowCount() > 0) {
                    int rowCount = dataBatch.rowCount();
                    Array<R> keys = dataBatch.keys();
                    int rowCount2 = this.frame.rowCount();
                    this.frame.rows().addAll(rowCount < this.options.getReadBatchSize() ? keys.copy(0, rowCount) : keys);
                    DataFrameContent<R, String> data = this.frame.data();
                    for (int i2 = 0; i2 < this.colIndexes.length; i2++) {
                        String[] colData = dataBatch.colData(i2);
                        Parser<?> parser = this.parsers[i2];
                        for (int i3 = 0; i3 < rowCount; i3++) {
                            i = rowCount2 + i3;
                            String str = colData[i3];
                            switch (parser.getStyle()) {
                                case BOOLEAN:
                                    data.setBoolean(i, i2, parser.applyAsBoolean(str));
                                    break;
                                case INTEGER:
                                    data.setInt(i, i2, parser.applyAsInt(str));
                                    break;
                                case LONG:
                                    data.setLong(i, i2, parser.applyAsLong(str));
                                    break;
                                case DOUBLE:
                                    data.setDouble(i, i2, parser.applyAsDouble(str));
                                    break;
                                default:
                                    data.setValue(i, i2, (int) parser.apply(str));
                                    break;
                            }
                        }
                    }
                    if (this.frame.rowCount() % 100000 == 0) {
                        System.out.println("Processed " + this.frame.rowCount() + " rows...");
                    }
                }
            } catch (Exception e) {
                throw new DataFrameException("Failed to process CSV batch, line no " + (this.options.isHeader() ? i + 2 : i + 1), e);
            }
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    /* loaded from: input_file:com/zavtech/morpheus/source/CsvSource$DataBatch.class */
    public static class DataBatch<X> {
        private Array<X> keys;
        private int rowCount;
        private String[][] data;

        private DataBatch(CsvSourceOptions<X> csvSourceOptions, int i) {
            this(csvSourceOptions.getRowAxisType(), csvSourceOptions.getReadBatchSize(), i);
        }

        /* JADX INFO: Access modifiers changed from: protected */
        public DataBatch(Class<X> cls, int i, int i2) {
            this.keys = Array.of(cls, i);
            this.data = new String[i2][i];
        }

        /* JADX INFO: Access modifiers changed from: protected */
        public int rowCount() {
            return this.rowCount;
        }

        /* JADX INFO: Access modifiers changed from: protected */
        public Array<X> keys() {
            return this.keys;
        }

        /* JADX INFO: Access modifiers changed from: protected */
        public String[] colData(int i) {
            return this.data[i];
        }

        /* JADX INFO: Access modifiers changed from: protected */
        public void clear() {
            this.rowCount = 0;
            this.keys.fill(null);
            for (int i = 0; i < this.data.length; i++) {
                for (int i2 = 0; i2 < this.data[i].length; i2++) {
                    this.data[i][i2] = null;
                }
            }
        }

        /* JADX INFO: Access modifiers changed from: protected */
        public int addRow(X x, String[] strArr) {
            this.keys.setValue(this.rowCount, x);
            for (int i = 0; i < strArr.length; i++) {
                this.data[i][this.rowCount] = strArr[i];
            }
            int i2 = this.rowCount;
            this.rowCount = i2 + 1;
            return i2;
        }

        /* JADX INFO: Access modifiers changed from: protected */
        public int addRow(int i, String[] strArr) {
            this.keys.setInt(this.rowCount, i);
            for (int i2 = 0; i2 < strArr.length; i2++) {
                this.data[i2][this.rowCount] = strArr[i2];
            }
            int i3 = this.rowCount;
            this.rowCount = i3 + 1;
            return i3;
        }
    }

    @Override // com.zavtech.morpheus.frame.DataFrameSource
    public DataFrame<R, String> read(Consumer<CsvSourceOptions<R>> consumer) throws DataFrameException {
        try {
            CsvSourceOptions<R> csvSourceOptions = (CsvSourceOptions) initOptions(new CsvSourceOptions(), consumer);
            Resource resource = csvSourceOptions.getResource();
            switch (resource.getType()) {
                case FILE:
                    return parse(csvSourceOptions, new FileInputStream(resource.asFile()));
                case URL:
                    return parse(csvSourceOptions, resource.asURL());
                case INPUT_STREAM:
                    return parse(csvSourceOptions, resource.asInputStream());
                default:
                    throw new DataFrameException("Unsupported resource specified in CSVRequest: " + resource);
            }
        } catch (DataFrameException e) {
            throw e;
        } catch (Exception e2) {
            throw new DataFrameException("Failed to create DataFrame from CSV source", e2);
        }
    }

    private DataFrame<R, String> parse(CsvSourceOptions<R> csvSourceOptions, URL url) throws IOException {
        Objects.requireNonNull(url, "The URL cannot be null");
        return !url.getProtocol().startsWith("http") ? parse(csvSourceOptions, url.openStream()) : (DataFrame) HttpClient.getDefault().doGet(httpRequest -> {
            httpRequest.setUrl(url);
            httpRequest.setResponseHandler(httpResponse -> {
                try {
                    InputStream stream = httpResponse.getStream();
                    Throwable th = null;
                    try {
                        try {
                            Optional ofNullable = Optional.ofNullable(parse(csvSourceOptions, stream));
                            if (stream != null) {
                                if (0 != 0) {
                                    try {
                                        stream.close();
                                    } catch (Throwable th2) {
                                        th.addSuppressed(th2);
                                    }
                                } else {
                                    stream.close();
                                }
                            }
                            return ofNullable;
                        } finally {
                        }
                    } finally {
                    }
                } catch (IOException e) {
                    throw new RuntimeException("Failed to load DataFrame from csv: " + url, e);
                }
            });
        }).orElse(null);
    }

    private DataFrame<R, String> parse(CsvSourceOptions<R> csvSourceOptions, InputStream inputStream) throws IOException {
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream, csvSourceOptions.getCharset().orElse(StandardCharsets.UTF_8)));
        Throwable th = null;
        try {
            try {
                CsvRequestHandler csvRequestHandler = new CsvRequestHandler(csvSourceOptions);
                CsvParserSettings csvParserSettings = new CsvParserSettings();
                csvParserSettings.getFormat().setDelimiter(csvSourceOptions.getDelimiter());
                csvParserSettings.setHeaderExtractionEnabled(csvSourceOptions.isHeader());
                csvParserSettings.setLineSeparatorDetectionEnabled(true);
                csvParserSettings.setRowProcessor(csvRequestHandler);
                csvParserSettings.setIgnoreTrailingWhitespaces(true);
                csvParserSettings.setIgnoreLeadingWhitespaces(true);
                csvParserSettings.setMaxColumns(10000);
                csvParserSettings.setReadInputOnSeparateThread(false);
                new CsvParser(csvParserSettings).parse(bufferedReader);
                DataFrame<R, String> frame = csvRequestHandler.getFrame();
                if (bufferedReader != null) {
                    if (0 != 0) {
                        try {
                            bufferedReader.close();
                        } catch (Throwable th2) {
                            th.addSuppressed(th2);
                        }
                    } else {
                        bufferedReader.close();
                    }
                }
                return frame;
            } finally {
            }
        } catch (Throwable th3) {
            if (bufferedReader != null) {
                if (th != null) {
                    try {
                        bufferedReader.close();
                    } catch (Throwable th4) {
                        th.addSuppressed(th4);
                    }
                } else {
                    bufferedReader.close();
                }
            }
            throw th3;
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public static Optional<Parser<?>> getParser(Formats formats, String str) {
        Parser parser = formats.getParser(str);
        if (parser != null) {
            return Optional.of(parser);
        }
        for (Object obj : formats.getParserKeys()) {
            if (obj instanceof String) {
                String obj2 = obj.toString();
                if (str.matches(obj2)) {
                    return Optional.ofNullable(formats.getParserOrFail(obj2));
                }
            }
        }
        return Optional.empty();
    }

    public static void main(String[] strArr) {
        long currentTimeMillis = System.currentTimeMillis();
        DataFrame<R, String> csv = DataFrame.read().csv(csvSourceOptions -> {
            csvSourceOptions.setResource("/Users/witdxav/Dropbox/data/uk-house-prices/uk-house-prices-2006.csv");
            csvSourceOptions.setHeader(false);
            csvSourceOptions.setParallel(true);
        });
        long currentTimeMillis2 = System.currentTimeMillis();
        csv.out().print();
        System.out.printf("\n\nLoaded DataFrame with %s row in %s millis", Integer.valueOf(csv.rowCount()), Long.valueOf(currentTimeMillis2 - currentTimeMillis));
    }
}
