package no.priv.garshol.duke;

import java.io.PrintWriter;
import java.io.Writer;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.concurrent.CopyOnWriteArrayList;
import no.priv.garshol.duke.matchers.AbstractMatchListener;
import no.priv.garshol.duke.matchers.MatchListener;
import no.priv.garshol.duke.matchers.PrintMatchListener;
import no.priv.garshol.duke.utils.Utils;

/* loaded from: input_file:no/priv/garshol/duke/Processor.class */
public class Processor {
    private Configuration config;
    protected Database database;
    private Collection<MatchListener> listeners;
    private Logger logger;
    private List<Property> proporder;
    private double[] accprob;
    private int threads;
    private static final int DEFAULT_BATCH_SIZE = 40000;
    private long comparisons;
    private long srcread;
    private long indexing;
    private long searching;
    private long comparing;
    private long callbacks;
    private Profiler profiler;

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:no/priv/garshol/duke/Processor$MatchThread.class */
    public class MatchThread extends Thread {
        private Collection<Record> records;
        private boolean matchall;

        public MatchThread(int i, int i2, boolean z) {
            super("MatchThread " + i);
            this.records = new ArrayList(i2);
            this.matchall = z;
        }

        @Override // java.lang.Thread, java.lang.Runnable
        public void run() {
            Iterator<Record> it = this.records.iterator();
            while (it.hasNext()) {
                Processor.this.match(it.next(), this.matchall);
            }
        }

        public void addRecord(Record record) {
            this.records.add(record);
        }
    }

    /* loaded from: input_file:no/priv/garshol/duke/Processor$Profiler.class */
    public class Profiler extends AbstractMatchListener {
        private long processing_start;
        private long batch_start;
        private int batch_size;
        private int records;
        private PrintWriter out = new PrintWriter(System.out);

        public Profiler() {
        }

        public void setOutput(Writer writer) {
            this.out = new PrintWriter(writer);
        }

        @Override // no.priv.garshol.duke.matchers.AbstractMatchListener, no.priv.garshol.duke.matchers.MatchListener
        public void startProcessing() {
            this.processing_start = System.currentTimeMillis();
            System.out.println("Duke version " + Duke.getVersionString());
            System.out.println(Processor.this.getDatabase());
            System.out.println("Threads: " + Processor.this.getThreads());
        }

        @Override // no.priv.garshol.duke.matchers.AbstractMatchListener, no.priv.garshol.duke.matchers.MatchListener
        public void batchReady(int i) {
            this.batch_start = System.currentTimeMillis();
            this.batch_size = i;
        }

        @Override // no.priv.garshol.duke.matchers.AbstractMatchListener, no.priv.garshol.duke.matchers.MatchListener
        public void batchDone() {
            this.records += this.batch_size;
            System.out.println("" + this.records + " processed, " + ((int) ((1000.0d * this.batch_size) / (System.currentTimeMillis() - this.batch_start))) + " records/second; comparisons: " + Processor.this.getComparisonCount());
        }

        @Override // no.priv.garshol.duke.matchers.AbstractMatchListener, no.priv.garshol.duke.matchers.MatchListener
        public void endProcessing() {
            long currentTimeMillis = System.currentTimeMillis();
            System.out.println("Run completed, " + ((int) ((1000.0d * this.records) / (currentTimeMillis - this.processing_start))) + " records/second");
            System.out.println("" + this.records + " records total in " + ((currentTimeMillis - this.processing_start) / 1000) + " seconds");
            long j = Processor.this.srcread + Processor.this.indexing + Processor.this.searching + Processor.this.comparing + Processor.this.callbacks;
            System.out.println("Reading from source: " + seconds(Processor.this.srcread) + " (" + percent(Processor.this.srcread, j) + "%)");
            System.out.println("Indexing: " + seconds(Processor.this.indexing) + " (" + percent(Processor.this.indexing, j) + "%)");
            System.out.println("Searching: " + seconds(Processor.this.searching) + " (" + percent(Processor.this.searching, j) + "%)");
            System.out.println("Comparing: " + seconds(Processor.this.comparing) + " (" + percent(Processor.this.comparing, j) + "%)");
            System.out.println("Callbacks: " + seconds(Processor.this.callbacks) + " (" + percent(Processor.this.callbacks, j) + "%)");
            System.out.println();
            Runtime runtime = Runtime.getRuntime();
            System.out.println("Total memory: " + runtime.totalMemory() + ", free memory: " + runtime.freeMemory() + ", used memory: " + (runtime.totalMemory() - runtime.freeMemory()));
        }

        private String seconds(long j) {
            return "" + ((int) (j / 1000));
        }

        private String percent(long j, long j2) {
            return "" + ((int) ((j * 100) / j2));
        }
    }

    /* loaded from: input_file:no/priv/garshol/duke/Processor$PropertyComparator.class */
    static class PropertyComparator implements java.util.Comparator<Property> {
        PropertyComparator() {
        }

        @Override // java.util.Comparator
        public int compare(Property property, Property property2) {
            double lowProbability = property.getLowProbability() - property2.getLowProbability();
            if (lowProbability < 0.0d) {
                return -1;
            }
            return lowProbability > 0.0d ? 1 : 0;
        }
    }

    public Processor(Configuration configuration) {
        this(configuration, true);
    }

    public Processor(Configuration configuration, boolean z) {
        this(configuration, configuration.getDatabase(z));
    }

    public Processor(Configuration configuration, Database database) {
        this.config = configuration;
        this.database = database;
        this.listeners = new CopyOnWriteArrayList();
        this.logger = new DummyLogger();
        this.threads = 1;
        this.proporder = new ArrayList();
        for (Property property : configuration.getProperties()) {
            if (!property.isIdProperty()) {
                this.proporder.add(property);
            }
        }
        Collections.sort(this.proporder, new PropertyComparator());
        double d = 0.5d;
        this.accprob = new double[this.proporder.size()];
        for (int size = this.proporder.size() - 1; size >= 0; size--) {
            d = Utils.computeBayes(d, this.proporder.get(size).getHighProbability());
            this.accprob[size] = d;
        }
    }

    public void setLogger(Logger logger) {
        this.logger = logger;
    }

    public void setThreads(int i) {
        this.threads = i;
    }

    public int getThreads() {
        return this.threads;
    }

    public void addMatchListener(MatchListener matchListener) {
        this.listeners.add(matchListener);
    }

    public boolean removeMatchListener(MatchListener matchListener) {
        if (matchListener != null) {
            return this.listeners.remove(matchListener);
        }
        return true;
    }

    public Collection<MatchListener> getListeners() {
        return this.listeners;
    }

    public Database getDatabase() {
        return this.database;
    }

    public void setPerformanceProfiling(boolean z) {
        if (z) {
            if (this.profiler != null) {
                return;
            }
            this.profiler = new Profiler();
            addMatchListener(this.profiler);
            return;
        }
        if (this.profiler == null) {
            return;
        }
        removeMatchListener(this.profiler);
        this.profiler = null;
    }

    public Profiler getProfiler() {
        return this.profiler;
    }

    public void deduplicate() {
        deduplicate(this.config.getDataSources(), DEFAULT_BATCH_SIZE);
    }

    public void deduplicate(int i) {
        deduplicate(this.config.getDataSources(), i);
    }

    public void deduplicate(Collection<DataSource> collection, int i) {
        int i2 = 0;
        startProcessing();
        for (DataSource dataSource : collection) {
            dataSource.setLogger(this.logger);
            RecordIterator records = dataSource.getRecords();
            try {
                ArrayList arrayList = new ArrayList();
                long currentTimeMillis = System.currentTimeMillis();
                while (records.hasNext()) {
                    arrayList.add(records.next());
                    i2++;
                    if (i2 % i == 0) {
                        this.srcread += System.currentTimeMillis() - currentTimeMillis;
                        deduplicate(arrayList);
                        records.batchProcessed();
                        arrayList = new ArrayList();
                        currentTimeMillis = System.currentTimeMillis();
                    }
                }
                if (!arrayList.isEmpty()) {
                    deduplicate(arrayList);
                    records.batchProcessed();
                }
            } finally {
                records.close();
            }
        }
        endProcessing();
    }

    public void deduplicate(Collection<Record> collection) {
        this.logger.info("Deduplicating batch of " + collection.size() + " records");
        batchReady(collection.size());
        long currentTimeMillis = System.currentTimeMillis();
        Iterator<Record> it = collection.iterator();
        while (it.hasNext()) {
            this.database.index(it.next());
        }
        this.database.commit();
        this.indexing += System.currentTimeMillis() - currentTimeMillis;
        match(collection, true);
        batchDone();
    }

    private void match(Collection<Record> collection, boolean z) {
        if (this.threads != 1) {
            threadedmatch(collection, z);
            return;
        }
        Iterator<Record> it = collection.iterator();
        while (it.hasNext()) {
            match(it.next(), z);
        }
    }

    private void threadedmatch(Collection<Record> collection, boolean z) {
        MatchThread[] matchThreadArr = new MatchThread[this.threads];
        for (int i = 0; i < matchThreadArr.length; i++) {
            matchThreadArr[i] = new MatchThread(i, collection.size() / matchThreadArr.length, z);
        }
        int i2 = 0;
        Iterator<Record> it = collection.iterator();
        while (it.hasNext()) {
            int i3 = i2;
            i2++;
            matchThreadArr[i3 % matchThreadArr.length].addRecord(it.next());
        }
        for (MatchThread matchThread : matchThreadArr) {
            matchThread.start();
        }
        for (MatchThread matchThread2 : matchThreadArr) {
            try {
                matchThread2.join();
            } catch (InterruptedException e) {
                return;
            }
        }
    }

    public void link() {
        link(this.config.getDataSources(1), this.config.getDataSources(2), DEFAULT_BATCH_SIZE);
    }

    public void link(Collection<DataSource> collection, Collection<DataSource> collection2, int i) {
        link(collection, collection2, true, i);
    }

    public void link(Collection<DataSource> collection, Collection<DataSource> collection2, boolean z, int i) {
        startProcessing();
        index(collection, i);
        linkRecords(collection2, z, i);
    }

    public void linkRecords(Collection<DataSource> collection) {
        linkRecords(collection, true);
    }

    public void linkRecords(Collection<DataSource> collection, boolean z) {
        linkRecords(collection, z, DEFAULT_BATCH_SIZE);
    }

    public void linkRecords(Collection<DataSource> collection, boolean z, int i) {
        for (DataSource dataSource : collection) {
            dataSource.setLogger(this.logger);
            Collection<Record> arrayList = new ArrayList<>(i);
            RecordIterator records = dataSource.getRecords();
            while (records.hasNext()) {
                arrayList.add(records.next());
                if (arrayList.size() == i) {
                    linkBatch(arrayList, z);
                    arrayList.clear();
                }
            }
            records.close();
            if (!arrayList.isEmpty()) {
                linkBatch(arrayList, z);
            }
        }
        endProcessing();
    }

    private void linkBatch(Collection<Record> collection, boolean z) {
        batchReady(collection.size());
        match(collection, z);
        batchDone();
    }

    public void index(Collection<DataSource> collection, int i) {
        int i2 = 0;
        for (DataSource dataSource : collection) {
            dataSource.setLogger(this.logger);
            RecordIterator records = dataSource.getRecords();
            while (records.hasNext()) {
                this.database.index(records.next());
                i2++;
                if (i2 % i == 0) {
                    batchReady(i);
                }
            }
            records.close();
        }
        if (i2 % i == 0) {
            batchReady(i2 % i);
        }
        this.database.commit();
    }

    public long getComparisonCount() {
        return this.comparisons;
    }

    /* JADX INFO: Access modifiers changed from: private */
    public void match(Record record, boolean z) {
        long currentTimeMillis = System.currentTimeMillis();
        Collection<Record> findCandidateMatches = this.database.findCandidateMatches(record);
        this.searching += System.currentTimeMillis() - currentTimeMillis;
        if (this.logger.isDebugEnabled()) {
            this.logger.debug("Matching record " + PrintMatchListener.toString(record, this.config.getProperties()) + " found " + findCandidateMatches.size() + " candidates");
        }
        long currentTimeMillis2 = System.currentTimeMillis();
        if (z) {
            compareCandidatesSimple(record, findCandidateMatches);
        } else {
            compareCandidatesBest(record, findCandidateMatches);
        }
        this.comparing += System.currentTimeMillis() - currentTimeMillis2;
    }

    protected void compareCandidatesSimple(Record record, Collection<Record> collection) {
        boolean z = false;
        for (Record record2 : collection) {
            if (!isSameAs(record, record2)) {
                double compare = compare(record, record2);
                if (compare > this.config.getThreshold()) {
                    z = true;
                    registerMatch(record, record2, compare);
                } else if (this.config.getMaybeThreshold() != 0.0d && compare > this.config.getMaybeThreshold()) {
                    z = true;
                    registerMatchPerhaps(record, record2, compare);
                }
            }
        }
        if (z) {
            return;
        }
        registerNoMatchFor(record);
    }

    protected void compareCandidatesBest(Record record, Collection<Record> collection) {
        double d = 0.0d;
        Record record2 = null;
        for (Record record3 : collection) {
            if (!isSameAs(record, record3)) {
                double compare = compare(record, record3);
                if (compare > d) {
                    d = compare;
                    record2 = record3;
                }
            }
        }
        if (d > this.config.getThreshold()) {
            registerMatch(record, record2, d);
        } else if (this.config.getMaybeThreshold() == 0.0d || d <= this.config.getMaybeThreshold()) {
            registerNoMatchFor(record);
        } else {
            registerMatchPerhaps(record, record2, d);
        }
    }

    public double compare(Record record, Record record2) {
        this.comparisons++;
        double d = 0.5d;
        for (String str : record.getProperties()) {
            Property propertyByName = this.config.getPropertyByName(str);
            if (propertyByName != null && !propertyByName.isIdProperty() && !propertyByName.isIgnoreProperty()) {
                Collection<String> values = record.getValues(str);
                Collection<String> values2 = record2.getValues(str);
                if (values != null && !values.isEmpty() && values2 != null && !values2.isEmpty()) {
                    double d2 = 0.0d;
                    for (String str2 : values) {
                        if (!str2.equals("")) {
                            for (String str3 : values2) {
                                if (!str3.equals("")) {
                                    try {
                                        d2 = Math.max(d2, propertyByName.compare(str2, str3));
                                    } catch (Exception e) {
                                        throw new DukeException("Comparison of values '" + str2 + "' and '" + str3 + "' with " + propertyByName.getComparator() + " failed", e);
                                    }
                                }
                            }
                        }
                    }
                    d = Utils.computeBayes(d, d2);
                }
            }
        }
        return d;
    }

    public void close() {
        this.database.close();
    }

    private boolean isSameAs(Record record, Record record2) {
        for (Property property : this.config.getIdentityProperties()) {
            Collection<String> values = record2.getValues(property.getName());
            Collection<String> values2 = record.getValues(property.getName());
            if (values2 != null) {
                Iterator<String> it = values2.iterator();
                while (it.hasNext()) {
                    if (values.contains(it.next())) {
                        return true;
                    }
                }
            }
        }
        return false;
    }

    private void startProcessing() {
        long currentTimeMillis = System.currentTimeMillis();
        Iterator<MatchListener> it = this.listeners.iterator();
        while (it.hasNext()) {
            it.next().startProcessing();
        }
        this.callbacks += System.currentTimeMillis() - currentTimeMillis;
    }

    private void endProcessing() {
        long currentTimeMillis = System.currentTimeMillis();
        Iterator<MatchListener> it = this.listeners.iterator();
        while (it.hasNext()) {
            it.next().endProcessing();
        }
        this.callbacks += System.currentTimeMillis() - currentTimeMillis;
    }

    private void batchReady(int i) {
        long currentTimeMillis = System.currentTimeMillis();
        Iterator<MatchListener> it = this.listeners.iterator();
        while (it.hasNext()) {
            it.next().batchReady(i);
        }
        this.callbacks += System.currentTimeMillis() - currentTimeMillis;
    }

    private void batchDone() {
        long currentTimeMillis = System.currentTimeMillis();
        Iterator<MatchListener> it = this.listeners.iterator();
        while (it.hasNext()) {
            it.next().batchDone();
        }
        this.callbacks += System.currentTimeMillis() - currentTimeMillis;
    }

    private void registerMatch(Record record, Record record2, double d) {
        long currentTimeMillis = System.currentTimeMillis();
        Iterator<MatchListener> it = this.listeners.iterator();
        while (it.hasNext()) {
            it.next().matches(record, record2, d);
        }
        this.callbacks += System.currentTimeMillis() - currentTimeMillis;
    }

    private void registerMatchPerhaps(Record record, Record record2, double d) {
        long currentTimeMillis = System.currentTimeMillis();
        Iterator<MatchListener> it = this.listeners.iterator();
        while (it.hasNext()) {
            it.next().matchesPerhaps(record, record2, d);
        }
        this.callbacks += System.currentTimeMillis() - currentTimeMillis;
    }

    private void registerNoMatchFor(Record record) {
        long currentTimeMillis = System.currentTimeMillis();
        Iterator<MatchListener> it = this.listeners.iterator();
        while (it.hasNext()) {
            it.next().noMatchFor(record);
        }
        this.callbacks += System.currentTimeMillis() - currentTimeMillis;
    }
}
