package org.wso2.carbon.ml.core.utils;

import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.Hashtable;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Properties;
import java.util.SortedMap;
import java.util.TreeMap;
import java.util.regex.Pattern;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.lang.math.NumberUtils;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.SQLContext;
import org.wso2.carbon.analytics.api.AnalyticsDataAPI;
import org.wso2.carbon.analytics.datasource.commons.ColumnDefinition;
import org.wso2.carbon.analytics.datasource.commons.exception.AnalyticsException;
import org.wso2.carbon.analytics.datasource.commons.exception.AnalyticsTableNotAvailableException;
import org.wso2.carbon.context.PrivilegedCarbonContext;
import org.wso2.carbon.ml.commons.constants.MLConstants;
import org.wso2.carbon.ml.commons.domain.Feature;
import org.wso2.carbon.ml.commons.domain.MLDatasetVersion;
import org.wso2.carbon.ml.commons.domain.SamplePoints;
import org.wso2.carbon.ml.commons.domain.Workflow;
import org.wso2.carbon.ml.commons.domain.config.MLProperty;
import org.wso2.carbon.ml.core.exceptions.MLMalformedDatasetException;
import org.wso2.carbon.ml.core.spark.transformations.DiscardedRowsFilter;
import org.wso2.carbon.ml.core.spark.transformations.HeaderFilter;
import org.wso2.carbon.ml.core.spark.transformations.LineToTokens;
import org.wso2.carbon.ml.core.spark.transformations.RowsToLines;

/* loaded from: input_file:org/wso2/carbon/ml/core/utils/MLUtils.class */
public class MLUtils {

    /* loaded from: input_file:org/wso2/carbon/ml/core/utils/MLUtils$ColumnSeparatorFactory.class */
    public static class ColumnSeparatorFactory {
        public static String getColumnSeparator(String str) {
            return DataTypeFactory.getCSVFormat(str).getDelimiter() + "";
        }
    }

    /* loaded from: input_file:org/wso2/carbon/ml/core/utils/MLUtils$DataTypeFactory.class */
    public static class DataTypeFactory {
        public static CSVFormat getCSVFormat(String str) {
            return "TSV".equalsIgnoreCase(str) ? CSVFormat.TDF : CSVFormat.RFC4180;
        }
    }

    public static SamplePoints getSample(String str, String str2, int i, boolean z, String str3, int i2) throws MLMalformedDatasetException {
        try {
            ArrayList arrayList = new ArrayList();
            CSVFormat cSVFormat = DataTypeFactory.getCSVFormat(str2);
            JavaRDD textFile = MLCoreServiceValueHolder.getInstance().getSparkContext().textFile(str);
            String str4 = (String) textFile.first();
            if (str4.contains("" + cSVFormat.getDelimiter())) {
                return getSamplePoints(i, z, null, arrayList, cSVFormat, textFile);
            }
            throw new MLMalformedDatasetException(String.format("File content does not match the data format. [First Line] %s [Data Format] %s", str4, str2));
        } catch (Exception e) {
            throw new MLMalformedDatasetException("Failed to extract the sample points from path: " + str + ". Cause: " + e, e);
        }
    }

    public static String getFirstLine(String str) {
        return (String) MLCoreServiceValueHolder.getInstance().getSparkContext().textFile(str).first();
    }

    public static SamplePoints getSampleFromDAS(String str, int i, String str2, int i2) throws MLMalformedDatasetException {
        try {
            return getSamplePoints(i, true, generateHeaderMap(extractHeaderLine(str, i2), CSVFormat.RFC4180), new ArrayList(), CSVFormat.RFC4180, getLinesFromDASTable(str, i2, MLCoreServiceValueHolder.getInstance().getSparkContext()));
        } catch (Exception e) {
            throw new MLMalformedDatasetException("Failed to extract the sample points from path: " + str + ". Cause: " + e, e);
        }
    }

    public static JavaRDD<String> getLinesFromDASTable(String str, int i, JavaSparkContext javaSparkContext) throws AnalyticsTableNotAvailableException, AnalyticsException {
        String extractTableSchema = extractTableSchema(str, i);
        SQLContext sQLContext = new SQLContext(javaSparkContext);
        sQLContext.sql("CREATE TEMPORARY TABLE ML_REF USING org.wso2.carbon.analytics.spark.core.sources.AnalyticsRelationProvider OPTIONS (tenantId \"" + i + "\", tableName \"" + str + "\", schema \"" + extractTableSchema + "\")");
        return sQLContext.sql("select * from ML_REF").drop("_timestamp").javaRDD().map(new RowsToLines.Builder().separator(CSVFormat.RFC4180.getDelimiter() + "").build());
    }

    private static SamplePoints getSamplePoints(int i, boolean z, Map<String, Integer> map, List<List<String>> list, CSVFormat cSVFormat, JavaRDD<String> javaRDD) {
        int featureSize = getFeatureSize((String) javaRDD.first(), cSVFormat);
        ArrayList arrayList = new ArrayList();
        for (int i2 = 0; i2 < featureSize; i2++) {
            arrayList.add(Integer.valueOf(i2));
        }
        String valueOf = String.valueOf(cSVFormat.getDelimiter());
        JavaRDD cache = javaRDD.filter(new HeaderFilter.Builder().header((String) javaRDD.first()).build()).cache();
        JavaRDD map2 = cache.map(new LineToTokens.Builder().separator(getPatternFromDelimiter(valueOf)).build());
        cache.unpersist();
        map2.cache();
        int[] iArr = new int[featureSize];
        int[] iArr2 = new int[featureSize];
        int[] iArr3 = new int[featureSize];
        if (i >= 0 && featureSize > 0) {
            i /= featureSize;
        }
        for (int i3 = 0; i3 < featureSize; i3++) {
            list.add(new ArrayList());
        }
        if (map == null) {
            map = z ? generateHeaderMap((String) javaRDD.first(), cSVFormat) : generateHeaderMap(featureSize);
        }
        List<String[]> takeSample = map2.takeSample(false, i);
        map2.unpersist();
        for (String[] strArr : takeSample) {
            for (int i4 = 0; i4 < featureSize; i4++) {
                if (i4 < strArr.length) {
                    list.get(i4).add(strArr[i4]);
                    if (MLConstants.MISSING_VALUES.contains(strArr[i4])) {
                        int i5 = i4;
                        iArr[i5] = iArr[i5] + 1;
                    } else if (!NumberUtils.isNumber(strArr[i4])) {
                        int i6 = i4;
                        iArr2[i6] = iArr2[i6] + 1;
                    } else if (strArr[i4].indexOf(46) != -1) {
                        int i7 = i4;
                        iArr3[i7] = iArr3[i7] + 1;
                    }
                } else {
                    list.get(i4).add(null);
                    int i8 = i4;
                    iArr[i8] = iArr[i8] + 1;
                }
            }
        }
        SamplePoints samplePoints = new SamplePoints();
        samplePoints.setHeader(map);
        samplePoints.setSamplePoints(list);
        samplePoints.setMissing(iArr);
        samplePoints.setStringCellCount(iArr2);
        samplePoints.setDecimalCellCount(iArr3);
        return samplePoints;
    }

    public static String extractTableSchema(String str, int i) throws AnalyticsTableNotAvailableException, AnalyticsException {
        if (str == null) {
            return null;
        }
        AnalyticsDataAPI analyticsDataAPI = (AnalyticsDataAPI) PrivilegedCarbonContext.getThreadLocalCarbonContext().getOSGiService(AnalyticsDataAPI.class, (Hashtable) null);
        StringBuilder sb = new StringBuilder();
        for (Map.Entry entry : analyticsDataAPI.getTableSchema(i, str).getColumns().entrySet()) {
            sb.append(((String) entry.getKey()) + " " + ((ColumnDefinition) entry.getValue()).getType().name() + ",");
        }
        return sb.substring(0, sb.length() - 1);
    }

    public static String extractHeaderLine(String str, int i) throws AnalyticsTableNotAvailableException, AnalyticsException {
        if (str == null) {
            return null;
        }
        AnalyticsDataAPI analyticsDataAPI = (AnalyticsDataAPI) PrivilegedCarbonContext.getThreadLocalCarbonContext().getOSGiService(AnalyticsDataAPI.class, (Hashtable) null);
        StringBuilder sb = new StringBuilder();
        Iterator it = analyticsDataAPI.getTableSchema(i, str).getColumns().keySet().iterator();
        while (it.hasNext()) {
            sb.append(((String) it.next()) + ",");
        }
        return sb.substring(0, sb.length() - 1);
    }

    public static List<Integer> getImputeFeatureIndices(Workflow workflow, List<Integer> list, String str) {
        ArrayList arrayList = new ArrayList();
        for (Feature feature : workflow.getFeatures()) {
            if (feature.getImputeOption().equals(str) && feature.isInclude()) {
                int index = feature.getIndex();
                arrayList.add(Integer.valueOf(list.indexOf(Integer.valueOf(index)) != -1 ? list.indexOf(Integer.valueOf(index)) : index));
            }
        }
        return arrayList;
    }

    public static int getFeatureIndex(String str, String str2, String str3) {
        int i = 0;
        String[] split = str2.split(str3);
        int i2 = 0;
        while (true) {
            if (i2 < split.length) {
                if (split[i2] != null && str.equals(split[i2].replace("\"", "").trim())) {
                    i = i2;
                    break;
                }
                i2++;
            } else {
                break;
            }
        }
        return i;
    }

    public static int getFeatureIndex(String str, List<Feature> list) {
        if (str == null || list == null) {
            return -1;
        }
        for (Feature feature : list) {
            if (str.equals(feature.getName())) {
                return feature.getIndex();
            }
        }
        return -1;
    }

    public static SortedMap<Integer, String> getIncludedFeaturesAfterReordering(Workflow workflow, List<Integer> list, int i) {
        TreeMap treeMap = new TreeMap();
        for (Feature feature : workflow.getFeatures()) {
            if (feature.isInclude() && feature.getIndex() != i) {
                treeMap.put(Integer.valueOf(list.indexOf(Integer.valueOf(feature.getIndex()))), feature.getName());
            }
        }
        return treeMap;
    }

    public static SortedMap<Integer, String> getIncludedFeatures(Workflow workflow, int i) {
        TreeMap treeMap = new TreeMap();
        for (Feature feature : workflow.getFeatures()) {
            if (feature.isInclude() && feature.getIndex() != i) {
                treeMap.put(Integer.valueOf(feature.getIndex()), feature.getName());
            }
        }
        return treeMap;
    }

    public static MLDatasetVersion getMLDatsetVersion(int i, long j, String str, String str2, String str3, String str4) {
        MLDatasetVersion mLDatasetVersion = new MLDatasetVersion();
        mLDatasetVersion.setTenantId(i);
        mLDatasetVersion.setDatasetId(j);
        mLDatasetVersion.setName(str2);
        mLDatasetVersion.setVersion(str3);
        mLDatasetVersion.setTargetPath(str4);
        mLDatasetVersion.setUserName(str);
        return mLDatasetVersion;
    }

    public static String getDate() {
        return new SimpleDateFormat("yyyy-MM-dd_HH-mm-ss").format(new Date());
    }

    public static Properties getProperties(List<MLProperty> list) {
        Properties properties = new Properties();
        for (MLProperty mLProperty : list) {
            if (mLProperty != null) {
                properties.put(mLProperty.getName(), mLProperty.getValue());
            }
        }
        return properties;
    }

    public static double[] toDoubleArray(String[] strArr) {
        double[] dArr = new double[strArr.length];
        int i = 0;
        for (String str : strArr) {
            dArr[i] = Double.parseDouble(str);
            i++;
        }
        return dArr;
    }

    public static Map<String, Integer> generateHeaderMap(int i) {
        HashMap hashMap = new HashMap();
        for (int i2 = 1; i2 <= i; i2++) {
            hashMap.put("V" + i2, Integer.valueOf(i2 - 1));
        }
        return hashMap;
    }

    public static Map<String, Integer> generateHeaderMap(String str, CSVFormat cSVFormat) {
        HashMap hashMap = new HashMap();
        int i = 0;
        for (String str2 : str.split("" + cSVFormat.getDelimiter())) {
            hashMap.put(str2, Integer.valueOf(i));
            i++;
        }
        return hashMap;
    }

    public static int getFeatureSize(String str, CSVFormat cSVFormat) {
        return str.split("" + cSVFormat.getDelimiter()).length;
    }

    public static String[] getFeatures(String str, CSVFormat cSVFormat) {
        return str.split("" + cSVFormat.getDelimiter());
    }

    public static JavaRDD<String[]> filterRows(String str, String str2, JavaRDD<String> javaRDD, List<Integer> list) {
        String valueOf = String.valueOf(str);
        return javaRDD.filter(new HeaderFilter.Builder().header(str2).build()).cache().map(new LineToTokens.Builder().separator(getPatternFromDelimiter(valueOf)).build()).cache().filter(new DiscardedRowsFilter.Builder().indices(list).build()).cache();
    }

    public static String getErrorMsg(String str, Exception exc) {
        return exc != null ? str + " Cause: " + exc.getClass().getName() + " - " + exc.getMessage() : str;
    }

    public static <T, E> T getKeyByValue(Map<T, E> map, E e) {
        for (Map.Entry<T, E> entry : map.entrySet()) {
            if (Objects.equals(e, entry.getValue())) {
                return entry.getKey();
            }
        }
        return null;
    }

    public static String arrayToCsvString(String[] strArr, char c) {
        StringBuilder sb = new StringBuilder();
        for (String str : strArr) {
            sb.append(str);
            sb.append(c);
        }
        return sb.toString();
    }

    public static Pattern getPatternFromDelimiter(String str) {
        return Pattern.compile(str + "(?=([^\"]*\"[^\"]*\")*(?![^\"]*\"))");
    }
}
