package dragon.ir.classification.featureselection;

import dragon.ir.classification.DocClass;
import dragon.ir.classification.DocClassSet;
import dragon.ir.index.IndexReader;
import dragon.matrix.SparseMatrix;
import dragon.matrix.vector.DoubleVector;
import dragon.nlp.Token;
import dragon.nlp.compare.IndexComparator;
import dragon.nlp.compare.WeightComparator;
import dragon.util.MathUtil;
import dragon.util.SortedArray;
import java.io.Serializable;

/* loaded from: input_file:dragon/ir/classification/featureselection/InfoGainFeatureSelector.class */
public class InfoGainFeatureSelector extends AbstractFeatureSelector implements Serializable {
    private static final long serialVersionUID = 1;
    private double topPercentage;

    public InfoGainFeatureSelector(double d) {
        this.topPercentage = d;
    }

    @Override // dragon.ir.classification.featureselection.AbstractFeatureSelector
    protected int[] getSelectedFeatures(SparseMatrix sparseMatrix, DocClassSet docClassSet) {
        System.out.println("InfoGainSelector does not accept SparseMatrix as input. Please use IndexReader as input instead.");
        return null;
    }

    @Override // dragon.ir.classification.featureselection.AbstractFeatureSelector
    protected int[] getSelectedFeatures(IndexReader indexReader, DocClassSet docClassSet) {
        SortedArray computeTermIG = computeTermIG(indexReader, docClassSet);
        int min = Math.min(computeTermIG.size(), (int) (this.topPercentage * indexReader.getCollection().getTermNum()));
        SortedArray sortedArray = new SortedArray(min, new IndexComparator());
        for (int i = 0; i < min; i++) {
            sortedArray.add(computeTermIG.get(i));
        }
        int[] iArr = new int[sortedArray.size()];
        for (int i2 = 0; i2 < iArr.length; i2++) {
            iArr[i2] = ((Token) sortedArray.get(i2)).getIndex();
        }
        return iArr;
    }

    private SortedArray computeTermIG(IndexReader indexReader, DocClassSet docClassSet) {
        int i = 0;
        for (int i2 = 0; i2 < docClassSet.getClassNum(); i2++) {
            i += docClassSet.getDocClass(i2).getDocNum();
        }
        DoubleVector classPrior = getClassPrior(docClassSet);
        double calEntropy = calEntropy(classPrior);
        DoubleVector copy = classPrior.copy();
        copy.multiply(i);
        int[] iArr = new int[indexReader.getCollection().getDocNum()];
        MathUtil.initArray(iArr, -1);
        for (int i3 = 0; i3 < docClassSet.getClassNum(); i3++) {
            DocClass docClass = docClassSet.getDocClass(i3);
            for (int i4 = 0; i4 < docClass.getDocNum(); i4++) {
                iArr[docClass.getDoc(i4).getIndex()] = i3;
            }
        }
        int termNum = indexReader.getCollection().getTermNum();
        SortedArray sortedArray = new SortedArray(termNum, new IndexComparator());
        DoubleVector doubleVector = new DoubleVector(termNum);
        DoubleVector doubleVector2 = new DoubleVector(classPrior.size());
        DoubleVector doubleVector3 = new DoubleVector(classPrior.size());
        for (int i5 = 0; i5 < termNum; i5++) {
            int[] termDocIndexList = indexReader.getTermDocIndexList(i5);
            if (termDocIndexList != null && termDocIndexList.length != 0) {
                doubleVector2.assign(0.0d);
                doubleVector3.assign(copy);
                int i6 = 0;
                for (int i7 : termDocIndexList) {
                    int i8 = iArr[i7];
                    if (i8 >= 0) {
                        doubleVector2.add(i8, 1.0d);
                        doubleVector3.add(i8, -1.0d);
                        i6++;
                    }
                }
                if (i6 != 0) {
                    doubleVector2.multiply(1.0d / i6);
                    doubleVector3.multiply(1.0d / (i - i6));
                    doubleVector.set(i5, (calEntropy - calEntropy(doubleVector2)) - calEntropy(doubleVector3));
                }
            }
        }
        for (int i9 = 0; i9 < doubleVector.size(); i9++) {
            Token token = new Token(i9, 0);
            if (doubleVector.get(i9) > 0.0d) {
                token.setWeight(doubleVector.get(i9));
                sortedArray.add(token);
            }
        }
        sortedArray.setComparator(new WeightComparator(true));
        return sortedArray;
    }

    private double calEntropy(DoubleVector doubleVector) {
        double d;
        double d2;
        double log;
        double d3 = 0.0d;
        for (int i = 0; i < doubleVector.size(); i++) {
            if (doubleVector.get(i) == 0.0d) {
                d = d3;
                d2 = Double.MIN_VALUE;
                log = Math.log(Double.MIN_VALUE);
            } else {
                d = d3;
                d2 = doubleVector.get(i);
                log = Math.log(doubleVector.get(i));
            }
            d3 = d - (d2 * log);
        }
        return d3;
    }
}
