package com.datumbox.framework.core.machinelearning.common.abstracts.featureselectors;

import com.datumbox.framework.common.Configuration;
import com.datumbox.framework.common.dataobjects.Dataframe;
import com.datumbox.framework.common.dataobjects.Record;
import com.datumbox.framework.common.dataobjects.TypeInference;
import com.datumbox.framework.common.persistentstorage.interfaces.BigMap;
import com.datumbox.framework.common.persistentstorage.interfaces.DatabaseConnector;
import com.datumbox.framework.core.machinelearning.common.abstracts.AbstractTrainer;
import com.datumbox.framework.core.machinelearning.common.abstracts.featureselectors.AbstractCategoricalFeatureSelector.AbstractModelParameters;
import com.datumbox.framework.core.machinelearning.common.abstracts.featureselectors.AbstractCategoricalFeatureSelector.AbstractTrainingParameters;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:com/datumbox/framework/core/machinelearning/common/abstracts/featureselectors/AbstractCategoricalFeatureSelector.class */
public abstract class AbstractCategoricalFeatureSelector<MP extends AbstractModelParameters, TP extends AbstractTrainingParameters> extends AbstractFeatureSelector<MP, TP> {

    /* loaded from: input_file:com/datumbox/framework/core/machinelearning/common/abstracts/featureselectors/AbstractCategoricalFeatureSelector$AbstractModelParameters.class */
    public static abstract class AbstractModelParameters extends AbstractTrainer.AbstractModelParameters {

        @BigMap(mapType = DatabaseConnector.MapType.HASHMAP, storageHint = DatabaseConnector.StorageHint.IN_MEMORY, concurrent = true)
        private Map<Object, Double> featureScores;

        /* JADX INFO: Access modifiers changed from: protected */
        public AbstractModelParameters(DatabaseConnector databaseConnector) {
            super(databaseConnector);
        }

        public Map<Object, Double> getFeatureScores() {
            return this.featureScores;
        }

        protected void setFeatureScores(Map<Object, Double> map) {
            this.featureScores = map;
        }
    }

    /* loaded from: input_file:com/datumbox/framework/core/machinelearning/common/abstracts/featureselectors/AbstractCategoricalFeatureSelector$AbstractTrainingParameters.class */
    public static abstract class AbstractTrainingParameters extends AbstractTrainer.AbstractTrainingParameters {
        private Integer rareFeatureThreshold = null;
        private Integer maxFeatures = null;
        private boolean ignoringNumericalFeatures = true;

        public Integer getRareFeatureThreshold() {
            return this.rareFeatureThreshold;
        }

        public void setRareFeatureThreshold(Integer num) {
            this.rareFeatureThreshold = num;
        }

        public Integer getMaxFeatures() {
            return this.maxFeatures;
        }

        public void setMaxFeatures(Integer num) {
            this.maxFeatures = num;
        }

        public boolean isIgnoringNumericalFeatures() {
            return this.ignoringNumericalFeatures;
        }

        public void setIgnoringNumericalFeatures(boolean z) {
            this.ignoringNumericalFeatures = z;
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public AbstractCategoricalFeatureSelector(String str, Configuration configuration, Class<MP> cls, Class<TP> cls2) {
        super(str, configuration, cls, cls2);
    }

    @Override // com.datumbox.framework.core.machinelearning.common.abstracts.AbstractTrainer
    protected void _fit(Dataframe dataframe) {
        DatabaseConnector dbc = kb().getDbc();
        HashMap hashMap = new HashMap();
        Map<List<Object>, Integer> bigMap = dbc.getBigMap("tmp_featureClassCounts", DatabaseConnector.MapType.HASHMAP, DatabaseConnector.StorageHint.IN_MEMORY, false, true);
        Map<Object, Double> bigMap2 = dbc.getBigMap("tmp_featureCounts", DatabaseConnector.MapType.HASHMAP, DatabaseConnector.StorageHint.IN_MEMORY, false, true);
        buildFeatureStatistics(dataframe, hashMap, bigMap, bigMap2);
        estimateFeatureScores(hashMap, bigMap, bigMap2);
        dbc.dropBigMap("tmp_featureClassCounts", bigMap);
        dbc.dropBigMap("tmp_featureCounts", bigMap2);
    }

    @Override // com.datumbox.framework.core.machinelearning.common.abstracts.featureselectors.AbstractFeatureSelector
    protected void filterFeatures(Dataframe dataframe) {
        filterData(dataframe, kb().getDbc(), ((AbstractModelParameters) kb().getModelParameters()).getFeatureScores(), ((AbstractTrainingParameters) kb().getTrainingParameters()).isIgnoringNumericalFeatures());
    }

    private static void filterData(Dataframe dataframe, DatabaseConnector databaseConnector, Map<Object, Double> map, boolean z) {
        Logger logger = LoggerFactory.getLogger(AbstractCategoricalFeatureSelector.class);
        logger.debug("filterData()");
        Map bigMap = databaseConnector.getBigMap("tmp_removedColumns", DatabaseConnector.MapType.HASHMAP, DatabaseConnector.StorageHint.IN_MEMORY, false, true);
        for (Map.Entry entry : dataframe.getXDataTypes().entrySet()) {
            Object key = entry.getKey();
            if (!z || entry.getValue() != TypeInference.DataType.NUMERICAL) {
                if (!map.containsKey(key)) {
                    bigMap.put(key, true);
                }
            }
        }
        logger.debug("Removing Columns");
        dataframe.dropXColumns(bigMap.keySet());
        databaseConnector.dropBigMap("tmp_removedColumns", bigMap);
    }

    private void removeRareFeatures(Dataframe dataframe, Map<Object, Double> map) {
        this.logger.debug("removeRareFeatures()");
        DatabaseConnector dbc = kb().getDbc();
        AbstractTrainingParameters abstractTrainingParameters = (AbstractTrainingParameters) kb().getTrainingParameters();
        Integer rareFeatureThreshold = abstractTrainingParameters.getRareFeatureThreshold();
        boolean isIgnoringNumericalFeatures = abstractTrainingParameters.isIgnoringNumericalFeatures();
        Map xDataTypes = dataframe.getXDataTypes();
        this.logger.debug("Estimating featureCounts");
        Iterator it = dataframe.iterator();
        while (it.hasNext()) {
            for (Map.Entry entry : ((Record) it.next()).getX().entrySet()) {
                Object key = entry.getKey();
                if (!isIgnoringNumericalFeatures || xDataTypes.get(key) != TypeInference.DataType.NUMERICAL) {
                    Double d = TypeInference.toDouble(entry.getValue());
                    if (d != null && d.doubleValue() != 0.0d) {
                        Double d2 = map.get(key);
                        if (d2 == null) {
                            d2 = Double.valueOf(0.0d);
                        }
                        map.put(key, Double.valueOf(d2.doubleValue() + 1.0d));
                    }
                }
            }
        }
        if (rareFeatureThreshold == null || rareFeatureThreshold.intValue() <= 0) {
            return;
        }
        this.logger.debug("Removing rare features");
        Iterator<Map.Entry<Object, Double>> it2 = map.entrySet().iterator();
        while (it2.hasNext()) {
            if (it2.next().getValue().doubleValue() <= rareFeatureThreshold.intValue()) {
                it2.remove();
            }
        }
        filterData(dataframe, dbc, map, isIgnoringNumericalFeatures);
    }

    private void buildFeatureStatistics(Dataframe dataframe, Map<Object, Integer> map, Map<List<Object>, Integer> map2, Map<Object, Double> map3) {
        this.logger.debug("buildFeatureStatistics()");
        boolean isIgnoringNumericalFeatures = ((AbstractTrainingParameters) kb().getTrainingParameters()).isIgnoringNumericalFeatures();
        removeRareFeatures(dataframe, map3);
        Map xDataTypes = dataframe.getXDataTypes();
        this.logger.debug("Estimating classCounts and featureClassCounts");
        Iterator it = dataframe.iterator();
        while (it.hasNext()) {
            Record record = (Record) it.next();
            Object y = record.getY();
            Integer num = map.get(y);
            if (num == null) {
                num = 0;
            }
            map.put(y, Integer.valueOf(num.intValue() + 1));
            for (Map.Entry entry : record.getX().entrySet()) {
                Object key = entry.getKey();
                if (!isIgnoringNumericalFeatures || xDataTypes.get(key) != TypeInference.DataType.NUMERICAL) {
                    Double d = TypeInference.toDouble(entry.getValue());
                    if (d != null && d.doubleValue() != 0.0d) {
                        List<Object> asList = Arrays.asList(key, y);
                        Integer num2 = map2.get(asList);
                        if (num2 == null) {
                            num2 = 0;
                        }
                        map2.put(asList, Integer.valueOf(num2.intValue() + 1));
                    }
                }
            }
        }
    }

    protected abstract void estimateFeatureScores(Map<Object, Integer> map, Map<List<Object>, Integer> map2, Map<Object, Double> map3);
}
