package com.google.refine.clustering.binning;

import com.fasterxml.jackson.annotation.JsonIgnore;
import com.fasterxml.jackson.annotation.JsonInclude;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.annotation.JsonValue;
import com.google.refine.browsing.Engine;
import com.google.refine.browsing.RowVisitor;
import com.google.refine.clustering.ClusteredEntry;
import com.google.refine.clustering.Clusterer;
import com.google.refine.clustering.ClustererConfig;
import com.google.refine.model.Cell;
import com.google.refine.model.Project;
import com.google.refine.model.Row;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import java.util.stream.Collectors;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:com/google/refine/clustering/binning/BinningClusterer.class */
public class BinningClusterer extends Clusterer {
    protected Keyer _keyer;
    protected BinningParameters _parameters;
    static final Logger logger = LoggerFactory.getLogger("binning_clusterer");
    List<Map<String, Integer>> _clusters;

    /* loaded from: input_file:com/google/refine/clustering/binning/BinningClusterer$BinningClustererConfig.class */
    public static class BinningClustererConfig extends ClustererConfig {

        @JsonIgnore
        private String _keyerName;

        @JsonIgnore
        private Keyer _keyer;

        @JsonIgnore
        private BinningParameters _parameters = null;

        @JsonIgnore
        public Keyer getKeyer() {
            return this._keyer;
        }

        @JsonProperty("function")
        public void setKeyer(String str) {
            this._keyerName = str;
            this._keyer = KeyerFactory.get(this._keyerName.toLowerCase());
        }

        @JsonProperty("function")
        public String getKeyerName() {
            return this._keyerName;
        }

        @JsonProperty("params")
        @JsonInclude(JsonInclude.Include.NON_NULL)
        public BinningParameters getParameters() {
            return this._parameters;
        }

        @JsonProperty("params")
        public void setParameters(BinningParameters binningParameters) {
            this._parameters = binningParameters;
        }

        @Override // com.google.refine.clustering.ClustererConfig
        public BinningClusterer apply(Project project) {
            BinningClusterer binningClusterer = new BinningClusterer();
            binningClusterer.initializeFromConfig(project, this);
            return binningClusterer;
        }

        @Override // com.google.refine.clustering.ClustererConfig
        public String getType() {
            return "binning";
        }
    }

    /* loaded from: input_file:com/google/refine/clustering/binning/BinningClusterer$BinningParameters.class */
    public static class BinningParameters {

        @JsonProperty("ngram-size")
        @JsonInclude(JsonInclude.Include.NON_DEFAULT)
        public int ngramSize = 0;
    }

    /* loaded from: input_file:com/google/refine/clustering/binning/BinningClusterer$BinningRowVisitor.class */
    class BinningRowVisitor implements RowVisitor {
        Keyer _keyer;
        Object[] _params;
        BinningParameters _parameters;
        Map<String, Map<String, Integer>> _map = new HashMap();

        public BinningRowVisitor(Keyer keyer, BinningParameters binningParameters) {
            this._keyer = keyer;
            this._parameters = binningParameters;
            if (!(keyer instanceof NGramFingerprintKeyer) || this._parameters == null) {
                return;
            }
            this._params = new Object[1];
            this._params[0] = Integer.valueOf(this._parameters.ngramSize);
        }

        @Override // com.google.refine.browsing.RowVisitor, com.google.refine.browsing.RecordVisitor
        public void start(Project project) {
        }

        @Override // com.google.refine.browsing.RowVisitor, com.google.refine.browsing.RecordVisitor
        public void end(Project project) {
        }

        @Override // com.google.refine.browsing.RowVisitor
        public boolean visit(Project project, int i, Row row) {
            Cell cell = row.getCell(BinningClusterer.this._colindex);
            if (cell == null || cell.value == null) {
                return false;
            }
            Serializable serializable = cell.value;
            String obj = serializable instanceof String ? (String) serializable : serializable.toString();
            String key = this._keyer.key(obj, this._params);
            if (!this._map.containsKey(key)) {
                TreeMap treeMap = new TreeMap();
                treeMap.put(obj, 1);
                this._map.put(key, treeMap);
                return false;
            }
            Map<String, Integer> map = this._map.get(key);
            if (map.containsKey(obj)) {
                map.put(obj, Integer.valueOf(map.get(obj).intValue() + 1));
                return false;
            }
            map.put(obj, 1);
            return false;
        }

        public Map<String, Map<String, Integer>> getMap() {
            return this._map;
        }
    }

    /* loaded from: input_file:com/google/refine/clustering/binning/BinningClusterer$EntriesComparator.class */
    public static class EntriesComparator implements Comparator<Map.Entry<String, Integer>>, Serializable {
        private static final long serialVersionUID = 2763378036791777964L;

        @Override // java.util.Comparator
        public int compare(Map.Entry<String, Integer> entry, Map.Entry<String, Integer> entry2) {
            return entry2.getValue().intValue() - entry.getValue().intValue();
        }
    }

    /* loaded from: input_file:com/google/refine/clustering/binning/BinningClusterer$SizeComparator.class */
    public static class SizeComparator implements Comparator<Map<String, Integer>>, Serializable {
        private static final long serialVersionUID = -1390696157208674054L;

        @Override // java.util.Comparator
        public int compare(Map<String, Integer> map, Map<String, Integer> map2) {
            int size = map.size();
            int size2 = map2.size();
            if (map != map2) {
                return size2 - size;
            }
            int i = 0;
            Iterator<Integer> it = map.values().iterator();
            while (it.hasNext()) {
                i += it.next().intValue();
            }
            int i2 = 0;
            Iterator<Integer> it2 = map2.values().iterator();
            while (it2.hasNext()) {
                i2 += it2.next().intValue();
            }
            return i2 - i;
        }
    }

    public void initializeFromConfig(Project project, BinningClustererConfig binningClustererConfig) {
        super.initializeFromConfig(project, (ClustererConfig) binningClustererConfig);
        this._keyer = binningClustererConfig.getKeyer();
        this._parameters = binningClustererConfig.getParameters();
    }

    @Override // com.google.refine.clustering.Clusterer
    public void computeClusters(Engine engine) {
        BinningRowVisitor binningRowVisitor = new BinningRowVisitor(this._keyer, this._parameters);
        engine.getAllFilteredRows().accept(this._project, binningRowVisitor);
        this._clusters = new ArrayList(binningRowVisitor.getMap().values());
        Collections.sort(this._clusters, new SizeComparator());
    }

    protected static Map<String, Object> entryToMap(Map.Entry<String, Integer> entry) {
        HashMap hashMap = new HashMap();
        hashMap.put("v", entry.getKey());
        hashMap.put("c", entry.getValue());
        return hashMap;
    }

    @JsonValue
    public List<List<ClusteredEntry>> getJsonRepresentation() {
        EntriesComparator entriesComparator = new EntriesComparator();
        return (List) this._clusters.stream().filter(map -> {
            return map.size() > 1;
        }).map(map2 -> {
            return (List) map2.entrySet().stream().sorted(entriesComparator).map(entry -> {
                return new ClusteredEntry((Serializable) entry.getKey(), ((Integer) entry.getValue()).intValue());
            }).collect(Collectors.toList());
        }).collect(Collectors.toList());
    }
}
