package com.google.refine.clustering.knn;

import com.fasterxml.jackson.annotation.JsonIgnore;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.annotation.JsonValue;
import com.google.refine.browsing.Engine;
import com.google.refine.browsing.RowVisitor;
import com.google.refine.clustering.ClusteredEntry;
import com.google.refine.clustering.Clusterer;
import com.google.refine.clustering.ClustererConfig;
import com.google.refine.model.Cell;
import com.google.refine.model.Project;
import com.google.refine.model.Row;
import edu.mit.simile.vicino.clustering.NGramClusterer;
import edu.mit.simile.vicino.clustering.VPTreeClusterer;
import edu.mit.simile.vicino.distances.Distance;
import java.io.Serializable;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:com/google/refine/clustering/knn/kNNClusterer.class */
public class kNNClusterer extends Clusterer {
    private SimilarityDistance _distance;
    private kNNClustererConfigParameters _params;
    List<Set<Serializable>> _clusters;
    Map<Serializable, Integer> _counts = new HashMap();
    static final Logger logger = LoggerFactory.getLogger("kNN_clusterer");

    /* loaded from: input_file:com/google/refine/clustering/knn/kNNClusterer$BlockingClusteringRowVisitor.class */
    class BlockingClusteringRowVisitor implements RowVisitor {
        SimilarityDistance _distance;
        double _radius;
        int _blockingNgramSize;
        HashSet<String> _data = new HashSet<>();
        NGramClusterer _clusterer;

        /* loaded from: input_file:com/google/refine/clustering/knn/kNNClusterer$BlockingClusteringRowVisitor$DistanceWrapper.class */
        private class DistanceWrapper extends Distance {
            private final SimilarityDistance _d;

            protected DistanceWrapper(SimilarityDistance similarityDistance) {
                this._d = similarityDistance;
            }

            public double d(String str, String str2) {
                return this._d.compute(str, str2);
            }
        }

        public BlockingClusteringRowVisitor(SimilarityDistance similarityDistance, kNNClustererConfigParameters knnclustererconfigparameters) {
            this._radius = 1.0d;
            this._blockingNgramSize = 6;
            this._distance = similarityDistance;
            this._blockingNgramSize = knnclustererconfigparameters.blockingNgramSize;
            this._radius = knnclustererconfigparameters.radius;
            this._clusterer = new NGramClusterer(new DistanceWrapper(this._distance), this._blockingNgramSize);
        }

        @Override // com.google.refine.browsing.RowVisitor, com.google.refine.browsing.RecordVisitor
        public void start(Project project) {
        }

        @Override // com.google.refine.browsing.RowVisitor, com.google.refine.browsing.RecordVisitor
        public void end(Project project) {
        }

        @Override // com.google.refine.browsing.RowVisitor
        public boolean visit(Project project, int i, Row row) {
            Cell cell = row.getCell(kNNClusterer.this._colindex);
            if (cell == null || cell.value == null) {
                return false;
            }
            Serializable serializable = cell.value;
            String intern = serializable instanceof String ? (String) serializable : serializable.toString().intern();
            this._clusterer.populate(intern);
            kNNClusterer.this.count(intern);
            return false;
        }

        public List<Set<Serializable>> getClusters() {
            return this._clusterer.getClusters(this._radius);
        }
    }

    /* loaded from: input_file:com/google/refine/clustering/knn/kNNClusterer$VPTreeClusteringRowVisitor.class */
    class VPTreeClusteringRowVisitor implements RowVisitor {
        Distance _distance;
        kNNClustererConfigParameters _params;
        VPTreeClusterer _clusterer;

        public VPTreeClusteringRowVisitor(Distance distance, kNNClustererConfigParameters knnclustererconfigparameters) {
            this._distance = distance;
            this._clusterer = new VPTreeClusterer(this._distance);
            this._params = knnclustererconfigparameters;
        }

        @Override // com.google.refine.browsing.RowVisitor, com.google.refine.browsing.RecordVisitor
        public void start(Project project) {
        }

        @Override // com.google.refine.browsing.RowVisitor, com.google.refine.browsing.RecordVisitor
        public void end(Project project) {
        }

        @Override // com.google.refine.browsing.RowVisitor
        public boolean visit(Project project, int i, Row row) {
            Cell cell = row.getCell(kNNClusterer.this._colindex);
            if (cell == null || cell.value == null) {
                return false;
            }
            Serializable serializable = cell.value;
            String obj = serializable instanceof String ? (String) serializable : serializable.toString();
            this._clusterer.populate(obj);
            kNNClusterer.this.count(obj);
            return false;
        }

        public List<Set<Serializable>> getClusters() {
            return this._clusterer.getClusters(this._params.radius);
        }
    }

    /* loaded from: input_file:com/google/refine/clustering/knn/kNNClusterer$ValuesComparator.class */
    public static class ValuesComparator implements Comparator<Map.Entry<Serializable, Integer>>, Serializable {
        private static final long serialVersionUID = 204469656070583155L;

        @Override // java.util.Comparator
        public int compare(Map.Entry<Serializable, Integer> entry, Map.Entry<Serializable, Integer> entry2) {
            return entry2.getValue().intValue() - entry.getValue().intValue();
        }
    }

    /* loaded from: input_file:com/google/refine/clustering/knn/kNNClusterer$kNNClustererConfig.class */
    public static class kNNClustererConfig extends ClustererConfig {

        @JsonIgnore
        private String _distanceStr;

        @JsonIgnore
        private SimilarityDistance _distance;

        @JsonIgnore
        private kNNClustererConfigParameters _parameters = null;

        @JsonIgnore
        public SimilarityDistance getDistance() {
            return this._distance;
        }

        @JsonProperty("function")
        public void setDistance(String str) {
            this._distanceStr = str;
            this._distance = DistanceFactory.get(this._distanceStr.toLowerCase());
        }

        @JsonProperty("function")
        public String getDistanceStr() {
            return this._distanceStr;
        }

        @JsonProperty("params")
        public kNNClustererConfigParameters getParameters() {
            return this._parameters;
        }

        @JsonProperty("params")
        public void setParameters(kNNClustererConfigParameters knnclustererconfigparameters) {
            this._parameters = knnclustererconfigparameters;
        }

        @Override // com.google.refine.clustering.ClustererConfig
        public kNNClusterer apply(Project project) {
            kNNClusterer knnclusterer = new kNNClusterer();
            knnclusterer.initializeFromConfig(project, this);
            return knnclusterer;
        }

        @Override // com.google.refine.clustering.ClustererConfig
        public String getType() {
            return "knn";
        }
    }

    /* loaded from: input_file:com/google/refine/clustering/knn/kNNClusterer$kNNClustererConfigParameters.class */
    public static class kNNClustererConfigParameters {
        public static final double defaultRadius = 1.0d;
        public static final int defaultBlockingNgramSize = 6;

        @JsonProperty("radius")
        public double radius = 1.0d;

        @JsonProperty("blocking-ngram-size")
        public int blockingNgramSize = 6;
    }

    public void initializeFromConfig(Project project, kNNClustererConfig knnclustererconfig) {
        super.initializeFromConfig(project, (ClustererConfig) knnclustererconfig);
        this._distance = knnclustererconfig.getDistance();
        this._params = knnclustererconfig.getParameters();
    }

    @Override // com.google.refine.clustering.Clusterer
    public void computeClusters(Engine engine) {
        BlockingClusteringRowVisitor blockingClusteringRowVisitor = new BlockingClusteringRowVisitor(this._distance, this._params);
        engine.getAllFilteredRows().accept(this._project, blockingClusteringRowVisitor);
        this._clusters = blockingClusteringRowVisitor.getClusters();
    }

    protected List<ClusteredEntry> getClusteredEntries(Set<Serializable> set) {
        return (List) set.stream().map(serializable -> {
            return new ClusteredEntry(serializable, this._counts.get(serializable).intValue());
        }).sorted(ClusteredEntry.comparator).collect(Collectors.toList());
    }

    @JsonValue
    public List<List<ClusteredEntry>> getJsonRepresentation() {
        return (List) this._clusters.stream().filter(set -> {
            return set.size() > 1;
        }).map(set2 -> {
            return getClusteredEntries(set2);
        }).collect(Collectors.toList());
    }

    private void count(Serializable serializable) {
        if (this._counts.containsKey(serializable)) {
            this._counts.put(serializable, Integer.valueOf(this._counts.get(serializable).intValue() + 1));
        } else {
            this._counts.put(serializable, 1);
        }
    }
}
