package org.wso2.carbon.ml.core.impl;

import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.SortedMap;
import java.util.TreeMap;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.commons.math3.random.EmpiricalDistribution;
import org.apache.commons.math3.stat.descriptive.DescriptiveStatistics;
import org.apache.commons.math3.stat.descriptive.SummaryStatistics;
import org.wso2.carbon.metrics.manager.Level;
import org.wso2.carbon.metrics.manager.MetricManager;
import org.wso2.carbon.metrics.manager.Timer;
import org.wso2.carbon.ml.commons.constants.MLConstants;
import org.wso2.carbon.ml.commons.domain.SamplePoints;
import org.wso2.carbon.ml.commons.domain.SummaryStats;
import org.wso2.carbon.ml.commons.domain.config.SummaryStatisticsSettings;
import org.wso2.carbon.ml.core.exceptions.MLDataProcessingException;
import org.wso2.carbon.ml.core.interfaces.DatasetProcessor;
import org.wso2.carbon.ml.core.utils.MLCoreServiceValueHolder;
import org.wso2.carbon.ml.database.DatabaseService;
import org.wso2.carbon.ml.database.exceptions.DatabaseHandlerException;

/* loaded from: input_file:org/wso2/carbon/ml/core/impl/SummaryStatsGenerator.class */
public class SummaryStatsGenerator implements Runnable {
    private SummaryStatisticsSettings summarySettings;
    private static final Log logger = LogFactory.getLog(SummaryStatsGenerator.class);
    private EmpiricalDistribution[] histogram;
    private int[] unique;
    private int[] missing;
    private int[] stringCellCount;
    private int[] decimalCellCount;
    private String[] type;
    private Map<String, Integer> headerMap;
    private DatasetProcessor datasetProcessor;
    private long datasetSchemaId;
    private long datasetVersionId;
    private List<Integer> numericDataColumnPositions = new ArrayList();
    private List<Integer> stringDataColumnPositions = new ArrayList();
    private List<List<String>> columnData = new ArrayList();
    private List<DescriptiveStatistics> descriptiveStats = new ArrayList();
    private List<SortedMap<?, Integer>> graphFrequencies = new ArrayList();
    private SamplePoints samplePoints = new SamplePoints();

    public SummaryStatsGenerator(long j, long j2, SummaryStatisticsSettings summaryStatisticsSettings, DatasetProcessor datasetProcessor) {
        this.datasetSchemaId = j;
        this.datasetVersionId = j2;
        this.summarySettings = summaryStatisticsSettings;
        this.datasetProcessor = datasetProcessor;
    }

    /* JADX WARN: Finally extract failed */
    @Override // java.lang.Runnable
    public void run() {
        Timer.Context start = MetricManager.timer(Level.INFO, "org.wso2.carbon.ml.dataset-summary-generation-time").start();
        this.samplePoints.setGenerated(false);
        try {
            try {
                this.samplePoints = this.datasetProcessor.takeSample();
                this.samplePoints.setGenerated(true);
                this.headerMap = this.samplePoints.getHeader();
                this.columnData = this.samplePoints.getSamplePoints();
                this.missing = this.samplePoints.getMissing();
                this.stringCellCount = this.samplePoints.getStringCellCount();
                this.decimalCellCount = this.samplePoints.getDecimalCellCount();
                int size = this.headerMap.size();
                this.unique = new int[size];
                this.type = new String[size];
                this.histogram = new EmpiricalDistribution[size];
                for (int i = 0; i < size; i++) {
                    this.descriptiveStats.add(new DescriptiveStatistics());
                    this.graphFrequencies.add(new TreeMap());
                }
                identifyColumnDataType();
                calculateDescriptiveStats();
                calculateStringColumnFrequencies();
                calculateNumericColumnFrequencies();
                SummaryStats summaryStats = new SummaryStats(this.headerMap, this.type, this.graphFrequencies, this.missing, this.unique, this.descriptiveStats);
                DatabaseService databaseService = MLCoreServiceValueHolder.getInstance().getDatabaseService();
                databaseService.updateSamplePoints(this.datasetVersionId, this.samplePoints);
                databaseService.updateSummaryStatistics(this.datasetSchemaId, this.datasetVersionId, summaryStats);
                if (logger.isDebugEnabled()) {
                    logger.debug("Summary statistics successfully generated for dataset version: " + this.datasetVersionId);
                }
                start.stop();
            } catch (DatabaseHandlerException | MLDataProcessingException e) {
                try {
                    try {
                        MLCoreServiceValueHolder.getInstance().getDatabaseService().updateSamplePoints(this.datasetVersionId, this.samplePoints);
                        logger.error("Error occurred while calculating summary statistics for dataset version " + this.datasetVersionId + ": " + e.getMessage(), e);
                    } catch (DatabaseHandlerException e2) {
                        logger.error("Error occurred while updating sample point generation status for dataset version " + this.datasetVersionId + ": " + e2.getMessage(), e2);
                        logger.error("Error occurred while calculating summary statistics for dataset version " + this.datasetVersionId + ": " + e.getMessage(), e);
                        start.stop();
                    }
                    start.stop();
                } catch (Throwable th) {
                    logger.error("Error occurred while calculating summary statistics for dataset version " + this.datasetVersionId + ": " + e.getMessage(), e);
                    throw th;
                }
            }
        } catch (Throwable th2) {
            start.stop();
            throw th2;
        }
    }

    protected String[] identifyColumnDataType() {
        for (int i = 0; i < this.headerMap.size(); i++) {
            if (this.stringCellCount[i] > 0) {
                this.stringDataColumnPositions.add(Integer.valueOf(i));
                this.type[i] = "CATEGORICAL";
            } else {
                this.numericDataColumnPositions.add(Integer.valueOf(i));
                this.type[i] = "NUMERICAL";
            }
        }
        double categoricalThreshold = this.summarySettings.getCategoricalThreshold();
        for (int i2 = 0; i2 < this.headerMap.size(); i2++) {
            if (this.numericDataColumnPositions.contains(Integer.valueOf(i2))) {
                List<String> list = this.columnData.get(i2);
                if (list.size() == 0) {
                    logger.warn(String.format("Column %s is empty in the selected sample rows in dataset version %s", Integer.valueOf(i2), Long.valueOf(this.datasetVersionId)));
                } else {
                    int i3 = 0;
                    Iterator it = new HashSet(list).iterator();
                    while (it.hasNext()) {
                        if (Collections.frequency(list, (String) it.next()) > 1) {
                            i3++;
                        }
                    }
                    if (this.decimalCellCount[i2] == 0 && (i3 / r0.size()) * 100 >= categoricalThreshold) {
                        this.type[i2] = "CATEGORICAL";
                    }
                }
            }
        }
        return this.type;
    }

    protected List<DescriptiveStatistics> calculateDescriptiveStats() {
        for (int i = 0; i < this.headerMap.size(); i++) {
            if (this.numericDataColumnPositions.contains(Integer.valueOf(i))) {
                for (int i2 = 0; i2 < this.columnData.get(i).size(); i2++) {
                    if (this.columnData.get(i).get(i2) != null && !MLConstants.MISSING_VALUES.contains(this.columnData.get(i).get(i2))) {
                        this.descriptiveStats.get(i).addValue(Double.parseDouble(this.columnData.get(i).get(i2)));
                    }
                }
            }
        }
        return this.descriptiveStats;
    }

    protected List<SortedMap<?, Integer>> calculateStringColumnFrequencies() {
        Iterator<Integer> it = this.stringDataColumnPositions.iterator();
        while (it.hasNext()) {
            int intValue = it.next().intValue();
            TreeMap treeMap = new TreeMap();
            HashSet<String> hashSet = new HashSet(this.columnData.get(intValue));
            this.unique[intValue] = hashSet.size();
            for (String str : hashSet) {
                if (str != null) {
                    treeMap.put(str.toString(), Integer.valueOf(Collections.frequency(this.columnData.get(intValue), str)));
                }
            }
            this.graphFrequencies.set(intValue, treeMap);
        }
        return this.graphFrequencies;
    }

    protected List<SortedMap<?, Integer>> calculateNumericColumnFrequencies() {
        int histogramBins = this.summarySettings.getHistogramBins();
        Iterator<Integer> it = this.numericDataColumnPositions.iterator();
        while (it.hasNext()) {
            int intValue = it.next().intValue();
            HashSet<String> hashSet = new HashSet(this.columnData.get(intValue));
            this.unique[intValue] = hashSet.size();
            if ("CATEGORICAL".equals(this.type[intValue])) {
                TreeMap treeMap = new TreeMap();
                for (String str : hashSet) {
                    if (str != null && !str.isEmpty()) {
                        treeMap.put(str, Integer.valueOf(Collections.frequency(this.columnData.get(intValue), str)));
                    }
                }
                this.graphFrequencies.set(intValue, treeMap);
            } else {
                calculateIntervalFreqs(intValue, histogramBins);
            }
        }
        return this.graphFrequencies;
    }

    protected List<SortedMap<?, Integer>> calculateIntervalFreqs(int i, int i2) {
        TreeMap treeMap = new TreeMap();
        double[] dArr = new double[this.columnData.get(i).size()];
        for (int i3 = 0; i3 < this.columnData.get(i).size(); i3++) {
            if (this.columnData.get(i).get(i3) != null && !MLConstants.MISSING_VALUES.contains(this.columnData.get(i).get(i3))) {
                dArr[i3] = Double.parseDouble(this.columnData.get(i).get(i3));
            }
        }
        this.histogram[i] = new EmpiricalDistribution(i2);
        this.histogram[i].load(dArr);
        int i4 = 0;
        Iterator it = this.histogram[i].getBinStats().iterator();
        while (it.hasNext()) {
            int i5 = i4;
            i4++;
            treeMap.put(Integer.valueOf(i5), Integer.valueOf((int) ((SummaryStatistics) it.next()).getN()));
        }
        this.graphFrequencies.set(i, treeMap);
        return this.graphFrequencies;
    }
}
