package no.priv.garshol.duke;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import no.priv.garshol.duke.matchers.AbstractMatchListener;
import no.priv.garshol.duke.matchers.MatchListener;
import no.priv.garshol.duke.matchers.PrintMatchListener;
import no.priv.garshol.duke.utils.Utils;
import org.apache.lucene.index.CorruptIndexException;

/* loaded from: input_file:no/priv/garshol/duke/Processor.class */
public class Processor {
    private Configuration config;
    protected Database database;
    private Collection<MatchListener> listeners;
    private Logger logger;
    private List<Property> proporder;
    private double[] accprob;
    private static final int DEFAULT_BATCH_SIZE = 40000;
    private MatchListener passthrough;
    private MatchListener choosebest;

    /* loaded from: input_file:no/priv/garshol/duke/Processor$ChooseBestFilter.class */
    class ChooseBestFilter extends AbstractMatchListener {
        private Record current;
        private Record best;
        private double max;

        ChooseBestFilter() {
        }

        @Override // no.priv.garshol.duke.matchers.AbstractMatchListener, no.priv.garshol.duke.matchers.MatchListener
        public void startRecord(Record record) {
            Processor.this.registerStartRecord(record);
            this.max = 0.0d;
            this.best = null;
            this.current = record;
        }

        @Override // no.priv.garshol.duke.matchers.AbstractMatchListener, no.priv.garshol.duke.matchers.MatchListener
        public void matches(Record record, Record record2, double d) {
            if (d > this.max) {
                this.max = d;
                this.best = record2;
            }
        }

        @Override // no.priv.garshol.duke.matchers.AbstractMatchListener, no.priv.garshol.duke.matchers.MatchListener
        public void matchesPerhaps(Record record, Record record2, double d) {
            matches(record, record2, d);
        }

        @Override // no.priv.garshol.duke.matchers.AbstractMatchListener, no.priv.garshol.duke.matchers.MatchListener
        public void endRecord() {
            if (this.max > Processor.this.config.getThreshold()) {
                Processor.this.registerMatch(this.current, this.best, this.max);
            } else if (Processor.this.config.getMaybeThreshold() == 0.0d || this.max <= Processor.this.config.getMaybeThreshold()) {
                Processor.this.registerNoMatchFor(this.current);
            } else {
                Processor.this.registerMatchPerhaps(this.current, this.best, this.max);
            }
            Processor.this.registerEndRecord();
        }
    }

    /* loaded from: input_file:no/priv/garshol/duke/Processor$PassThroughFilter.class */
    class PassThroughFilter extends AbstractMatchListener {
        private boolean match_found;
        private Record current;

        PassThroughFilter() {
        }

        @Override // no.priv.garshol.duke.matchers.AbstractMatchListener, no.priv.garshol.duke.matchers.MatchListener
        public void startRecord(Record record) {
            this.match_found = false;
            this.current = record;
            Processor.this.registerStartRecord(record);
        }

        @Override // no.priv.garshol.duke.matchers.AbstractMatchListener, no.priv.garshol.duke.matchers.MatchListener
        public void matches(Record record, Record record2, double d) {
            this.match_found = true;
            Processor.this.registerMatch(record, record2, d);
        }

        @Override // no.priv.garshol.duke.matchers.AbstractMatchListener, no.priv.garshol.duke.matchers.MatchListener
        public void matchesPerhaps(Record record, Record record2, double d) {
            this.match_found = true;
            Processor.this.registerMatchPerhaps(record, record2, d);
        }

        @Override // no.priv.garshol.duke.matchers.AbstractMatchListener, no.priv.garshol.duke.matchers.MatchListener
        public void endRecord() {
            if (!this.match_found) {
                Processor.this.registerNoMatchFor(this.current);
            }
            Processor.this.registerEndRecord();
        }
    }

    /* loaded from: input_file:no/priv/garshol/duke/Processor$PropertyComparator.class */
    static class PropertyComparator implements java.util.Comparator<Property> {
        PropertyComparator() {
        }

        @Override // java.util.Comparator
        public int compare(Property property, Property property2) {
            double lowProbability = property.getLowProbability() - property2.getLowProbability();
            if (lowProbability < 0.0d) {
                return -1;
            }
            return lowProbability > 0.0d ? 1 : 0;
        }
    }

    public Processor(Configuration configuration) throws IOException {
        this(configuration, true);
    }

    public Processor(Configuration configuration, boolean z) throws IOException {
        this(configuration, configuration.createDatabase(z));
    }

    public Processor(Configuration configuration, Database database) throws IOException {
        this.config = configuration;
        this.database = database;
        this.listeners = new ArrayList();
        this.logger = new DummyLogger();
        this.passthrough = new PassThroughFilter();
        this.choosebest = new ChooseBestFilter();
        this.proporder = new ArrayList();
        for (Property property : configuration.getProperties()) {
            if (!property.isIdProperty()) {
                this.proporder.add(property);
            }
        }
        Collections.sort(this.proporder, new PropertyComparator());
        double d = 0.5d;
        this.accprob = new double[this.proporder.size()];
        for (int size = this.proporder.size() - 1; size >= 0; size--) {
            d = Utils.computeBayes(d, this.proporder.get(size).getHighProbability());
            this.accprob[size] = d;
        }
    }

    public void setLogger(Logger logger) {
        this.logger = logger;
    }

    public void addMatchListener(MatchListener matchListener) {
        this.listeners.add(matchListener);
    }

    public Collection<MatchListener> getListeners() {
        return this.listeners;
    }

    public Database getDatabase() {
        return this.database;
    }

    public void deduplicate() throws IOException {
        deduplicate(this.config.getDataSources(), DEFAULT_BATCH_SIZE);
    }

    public void deduplicate(int i) throws IOException {
        deduplicate(this.config.getDataSources(), i);
    }

    public void deduplicate(Collection<DataSource> collection, int i) throws IOException {
        ArrayList arrayList = new ArrayList();
        int i2 = 0;
        for (DataSource dataSource : collection) {
            dataSource.setLogger(this.logger);
            RecordIterator records = dataSource.getRecords();
            while (records.hasNext()) {
                try {
                    arrayList.add(records.next());
                    i2++;
                    if (i2 % i == 0) {
                        Iterator<MatchListener> it = this.listeners.iterator();
                        while (it.hasNext()) {
                            it.next().batchReady(arrayList.size());
                        }
                        deduplicate(arrayList);
                        records.batchProcessed();
                        arrayList = new ArrayList();
                    }
                } finally {
                    records.close();
                }
            }
        }
        if (!arrayList.isEmpty()) {
            Iterator<MatchListener> it2 = this.listeners.iterator();
            while (it2.hasNext()) {
                it2.next().batchReady(arrayList.size());
            }
            deduplicate(arrayList);
        }
        Iterator<MatchListener> it3 = this.listeners.iterator();
        while (it3.hasNext()) {
            it3.next().endProcessing();
        }
    }

    public void deduplicate(Collection<Record> collection) {
        this.logger.info("Deduplicating batch of " + collection.size() + " records");
        try {
            Iterator<Record> it = collection.iterator();
            while (it.hasNext()) {
                this.database.index(it.next());
            }
            this.database.commit();
            Iterator<Record> it2 = collection.iterator();
            while (it2.hasNext()) {
                match(it2.next(), this.passthrough);
            }
            Iterator<MatchListener> it3 = this.listeners.iterator();
            while (it3.hasNext()) {
                it3.next().batchDone();
            }
        } catch (IOException e) {
            throw new DukeException(e);
        } catch (CorruptIndexException e2) {
            throw new DukeException((Throwable) e2);
        }
    }

    public void link() throws IOException {
        link(this.config.getDataSources(1), this.config.getDataSources(2), DEFAULT_BATCH_SIZE);
    }

    public void link(Collection<DataSource> collection, Collection<DataSource> collection2, int i) throws IOException {
        index(collection, i);
        linkRecords(collection2, this.choosebest);
    }

    public void linkRecords(Collection<DataSource> collection) throws IOException {
        linkRecords(collection, this.passthrough);
    }

    public void linkRecords(Collection<DataSource> collection, boolean z) throws IOException {
        linkRecords(collection, z ? this.passthrough : this.choosebest);
    }

    private void linkRecords(Collection<DataSource> collection, MatchListener matchListener) throws IOException {
        for (DataSource dataSource : collection) {
            dataSource.setLogger(this.logger);
            RecordIterator records = dataSource.getRecords();
            while (records.hasNext()) {
                match(records.next(), matchListener);
            }
            records.close();
        }
        Iterator<MatchListener> it = this.listeners.iterator();
        while (it.hasNext()) {
            it.next().endProcessing();
        }
    }

    public void index(Collection<DataSource> collection, int i) throws IOException {
        int i2 = 0;
        for (DataSource dataSource : collection) {
            dataSource.setLogger(this.logger);
            RecordIterator records = dataSource.getRecords();
            while (records.hasNext()) {
                this.database.index(records.next());
                i2++;
                if (i2 % i == 0) {
                    Iterator<MatchListener> it = this.listeners.iterator();
                    while (it.hasNext()) {
                        it.next().batchReady(i);
                    }
                }
            }
            records.close();
        }
        if (i2 % i == 0) {
            Iterator<MatchListener> it2 = this.listeners.iterator();
            while (it2.hasNext()) {
                it2.next().batchReady(i2 % i);
            }
        }
        this.database.commit();
    }

    private void match(Record record, MatchListener matchListener) throws IOException {
        Collection<Record> findCandidateMatches = this.database.findCandidateMatches(record);
        if (this.logger.isDebugEnabled()) {
            this.logger.debug("Matching record " + PrintMatchListener.toString(record) + " found " + findCandidateMatches.size() + " candidates");
        }
        compareCandidates(record, findCandidateMatches, matchListener);
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public void compareCandidates(Record record, Collection<Record> collection, MatchListener matchListener) {
        matchListener.startRecord(record);
        for (Record record2 : collection) {
            if (!isSameAs(record, record2)) {
                double compare = compare(record, record2);
                if (compare > this.config.getThreshold()) {
                    matchListener.matches(record, record2, compare);
                } else if (this.config.getMaybeThreshold() != 0.0d && compare > this.config.getMaybeThreshold()) {
                    matchListener.matchesPerhaps(record, record2, compare);
                }
            }
        }
        matchListener.endRecord();
    }

    public double compare(Record record, Record record2) {
        double d = 0.5d;
        for (String str : record.getProperties()) {
            Property propertyByName = this.config.getPropertyByName(str);
            if (!propertyByName.isIdProperty() && !propertyByName.isIgnoreProperty()) {
                Collection<String> values = record.getValues(str);
                Collection<String> values2 = record2.getValues(str);
                if (values != null && !values.isEmpty() && values2 != null && !values2.isEmpty()) {
                    double d2 = 0.0d;
                    for (String str2 : values) {
                        if (!str2.equals("")) {
                            for (String str3 : values2) {
                                if (!str3.equals("")) {
                                    try {
                                        d2 = Math.max(d2, propertyByName.compare(str2, str3));
                                    } catch (Exception e) {
                                        throw new RuntimeException("Comparison of values '" + str2 + "' and '" + str3 + "' failed", e);
                                    }
                                }
                            }
                        }
                    }
                    d = Utils.computeBayes(d, d2);
                }
            }
        }
        return d;
    }

    public void close() throws IOException {
        this.database.close();
    }

    private boolean isSameAs(Record record, Record record2) {
        for (Property property : this.config.getIdentityProperties()) {
            Collection<String> values = record2.getValues(property.getName());
            Collection<String> values2 = record.getValues(property.getName());
            if (values2 != null) {
                Iterator<String> it = values2.iterator();
                while (it.hasNext()) {
                    if (values.contains(it.next())) {
                        return true;
                    }
                }
            }
        }
        return false;
    }

    /* JADX INFO: Access modifiers changed from: private */
    public void registerStartRecord(Record record) {
        Iterator<MatchListener> it = this.listeners.iterator();
        while (it.hasNext()) {
            it.next().startRecord(record);
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    public void registerMatch(Record record, Record record2, double d) {
        Iterator<MatchListener> it = this.listeners.iterator();
        while (it.hasNext()) {
            it.next().matches(record, record2, d);
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    public void registerMatchPerhaps(Record record, Record record2, double d) {
        Iterator<MatchListener> it = this.listeners.iterator();
        while (it.hasNext()) {
            it.next().matchesPerhaps(record, record2, d);
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    public void registerNoMatchFor(Record record) {
        Iterator<MatchListener> it = this.listeners.iterator();
        while (it.hasNext()) {
            it.next().noMatchFor(record);
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    public void registerEndRecord() {
        Iterator<MatchListener> it = this.listeners.iterator();
        while (it.hasNext()) {
            it.next().endRecord();
        }
    }
}
