package smile.nlp.collocation;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Iterator;
import smile.nlp.Corpus;
import smile.sort.HeapSelect;
import smile.stat.distribution.ChiSquareDistribution;

/* loaded from: input_file:smile/nlp/collocation/Bigram.class */
public class Bigram extends smile.nlp.Bigram implements Comparable<Bigram> {
    public final int count;
    public final double score;
    private static ChiSquareDistribution chisq = new ChiSquareDistribution(1);

    public Bigram(String str, String str2, int i, double d) {
        super(str, str2);
        this.count = i;
        this.score = d;
    }

    @Override // smile.nlp.Bigram
    public String toString() {
        return String.format("(%s %s, %d, %.2f)", this.w1, this.w2, Integer.valueOf(this.count), Double.valueOf(this.score));
    }

    @Override // java.lang.Comparable
    public int compareTo(Bigram bigram) {
        return Double.compare(this.score, bigram.score);
    }

    public static Bigram[] of(Corpus corpus, int i, int i2) {
        Bigram[] bigramArr = new Bigram[i];
        HeapSelect heapSelect = new HeapSelect(bigramArr);
        Iterator<smile.nlp.Bigram> bigrams = corpus.getBigrams();
        while (bigrams.hasNext()) {
            smile.nlp.Bigram next = bigrams.next();
            int bigramFrequency = corpus.getBigramFrequency(next);
            if (bigramFrequency > i2) {
                heapSelect.add(new Bigram(next.w1, next.w2, bigramFrequency, -likelihoodRatio(corpus.getTermFrequency(next.w1), corpus.getTermFrequency(next.w2), bigramFrequency, corpus.size())));
            }
        }
        heapSelect.sort();
        Bigram[] bigramArr2 = new Bigram[i];
        for (int i3 = 0; i3 < i; i3++) {
            Bigram bigram = bigramArr[(i - i3) - 1];
            bigramArr2[i3] = new Bigram(bigram.w1, bigram.w2, bigram.count, -bigram.score);
        }
        return bigramArr2;
    }

    public static Bigram[] of(Corpus corpus, double d, int i) {
        if (d <= 0.0d || d >= 1.0d) {
            throw new IllegalArgumentException("Invalid p = " + d);
        }
        double quantile = chisq.quantile(d);
        ArrayList arrayList = new ArrayList();
        Iterator<smile.nlp.Bigram> bigrams = corpus.getBigrams();
        while (bigrams.hasNext()) {
            smile.nlp.Bigram next = bigrams.next();
            int bigramFrequency = corpus.getBigramFrequency(next);
            if (bigramFrequency > i) {
                double likelihoodRatio = likelihoodRatio(corpus.getTermFrequency(next.w1), corpus.getTermFrequency(next.w2), bigramFrequency, corpus.size());
                if (likelihoodRatio > quantile) {
                    arrayList.add(new Bigram(next.w1, next.w2, bigramFrequency, likelihoodRatio));
                }
            }
        }
        Bigram[] bigramArr = (Bigram[]) arrayList.toArray(new Bigram[arrayList.size()]);
        Arrays.sort(bigramArr, Collections.reverseOrder());
        return bigramArr;
    }

    private static double likelihoodRatio(int i, int i2, int i3, long j) {
        double d = i2 / j;
        return (-2.0d) * (((logL(i3, i, d) + logL(i2 - i3, j - i, d)) - logL(i3, i, i3 / i)) - logL(i2 - i3, j - i, (i2 - i3) / (j - i)));
    }

    private static double logL(int i, long j, double d) {
        if (d == 0.0d) {
            d = 0.01d;
        }
        if (d == 1.0d) {
            d = 0.99d;
        }
        return (i * Math.log(d)) + ((j - i) * Math.log(1.0d - d));
    }
}
