package dragon.ir.classification;

import dragon.ir.index.IRDoc;
import dragon.ir.index.IRTerm;
import dragon.ir.index.IndexReader;
import dragon.matrix.DoubleFlatDenseMatrix;
import dragon.matrix.IntRow;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Random;

/* loaded from: input_file:dragon/ir/classification/NigamActiveLearning.class */
public class NigamActiveLearning extends NBClassifier {
    private IntRow[] externalUnlabeled;
    private DocClass unlabeledSet;
    private DocClass unlabeledSetBackup;
    private int externalDocOffset;
    private double convergeThreshold;
    private double unlabeledRate;
    private int runNum;

    public NigamActiveLearning(String str) {
        super(str);
    }

    public NigamActiveLearning(IndexReader indexReader, double d) {
        super(indexReader);
        this.externalDocOffset = indexReader.getCollection().getDocNum();
        this.runNum = 15;
        this.convergeThreshold = 1.0E-4d;
        this.unlabeledRate = d;
    }

    public void setUnlabeledData(IndexReader indexReader, DocClass docClass) {
        int[] termMap = getTermMap(indexReader, this.indexReader);
        this.externalUnlabeled = new IntRow[docClass.getDocNum()];
        this.unlabeledSet = new DocClass(0);
        int i = 0;
        for (int i2 = 0; i2 < this.externalUnlabeled.length; i2++) {
            IRDoc doc = docClass.getDoc(i2);
            int[] termIndexList = indexReader.getTermIndexList(doc.getIndex());
            int[] termFrequencyList = indexReader.getTermFrequencyList(doc.getIndex());
            if (termIndexList != null) {
                int i3 = 0;
                for (int i4 : termIndexList) {
                    if (termMap[i4] >= 0) {
                        i3++;
                    }
                }
                if (i3 != 0) {
                    int[] iArr = new int[i3];
                    int[] iArr2 = new int[i3];
                    int i5 = 0;
                    for (int i6 = 0; i6 < termIndexList.length; i6++) {
                        int i7 = termMap[termIndexList[i6]];
                        if (i7 >= 0) {
                            iArr[i5] = i7;
                            iArr2[i5] = termFrequencyList[i6];
                            i5++;
                        }
                    }
                    this.externalUnlabeled[i] = new IntRow(i, i5, iArr, iArr2);
                    doc.setIndex(this.externalDocOffset + i);
                    doc.setKey(new StringBuffer().append("external_unlabeled").append(doc.getKey()).toString());
                    this.unlabeledSet.addDoc(doc);
                    i++;
                }
            }
        }
    }

    public void setUnlabeledData(DocClass docClass) {
        this.unlabeledSet = docClass;
        this.externalUnlabeled = null;
    }

    @Override // dragon.ir.classification.AbstractClassifier, dragon.ir.classification.Classifier
    public DocClassSet classify(DocClassSet docClassSet, DocClass docClass) {
        if (this.indexReader == null && this.doctermMatrix == null) {
            return null;
        }
        if (this.unlabeledRate > 0.0d) {
            this.unlabeledSetBackup = this.unlabeledSet;
            this.unlabeledSet = new DocClass(0);
            if (this.unlabeledSetBackup != null) {
                for (int i = 0; i < this.unlabeledSetBackup.getDocNum(); i++) {
                    this.unlabeledSet.addDoc(this.unlabeledSetBackup.getDoc(i));
                }
            }
            ArrayList arrayList = new ArrayList(docClass.getDocNum());
            for (int i2 = 0; i2 < docClass.getDocNum(); i2++) {
                arrayList.add(docClass.getDoc(i2));
            }
            Collections.shuffle(arrayList, new Random(10L));
            int size = (int) (this.unlabeledRate * arrayList.size());
            for (int i3 = 0; i3 < size; i3++) {
                this.unlabeledSet.addDoc((IRDoc) arrayList.get(i3));
            }
            train(docClassSet);
            this.unlabeledSet.removeAll();
            this.unlabeledSet = this.unlabeledSetBackup;
        } else {
            train(docClassSet);
        }
        return classify(docClass);
    }

    @Override // dragon.ir.classification.NBClassifier, dragon.ir.classification.Classifier
    public void train(DocClassSet docClassSet) {
        if (this.indexReader == null && this.doctermMatrix == null) {
            return;
        }
        this.classNum = docClassSet.getClassNum();
        this.arrLabel = new String[this.classNum];
        for (int i = 0; i < this.classNum; i++) {
            this.arrLabel[i] = docClassSet.getDocClass(i).getClassName();
        }
        eStep(docClassSet);
        double d = 0.0d;
        double d2 = -1.7976931348623157E308d;
        for (int i2 = 0; Math.abs(d2 - d) > this.convergeThreshold && i2 < this.runNum; i2++) {
            d = d2;
            d2 = 0.0d;
            DocClassSet classify = classify(this.unlabeledSet);
            for (int i3 = 0; i3 < docClassSet.getClassNum(); i3++) {
                DocClass docClass = docClassSet.getDocClass(i3);
                for (int i4 = 0; i4 < docClass.getDocNum(); i4++) {
                    d2 += docClass.getDoc(i4).getWeight();
                }
            }
            for (int i5 = 0; i5 < docClassSet.getClassNum(); i5++) {
                DocClass docClass2 = docClassSet.getDocClass(i5);
                for (int i6 = 0; i6 < docClass2.getDocNum(); i6++) {
                    IRDoc doc = docClass2.getDoc(i6);
                    double d3 = this.classPrior.get(i5);
                    int[] termIndexList = this.indexReader.getTermIndexList(doc.getIndex());
                    int[] termFrequencyList = this.indexReader.getTermFrequencyList(doc.getIndex());
                    for (int i7 = 0; i7 < termIndexList.length; i7++) {
                        int map = this.featureSelector.map(termIndexList[i7]);
                        if (map >= 0) {
                            d3 += termFrequencyList[i7] * this.model.getDouble(i5, map);
                        }
                    }
                    d2 += d3;
                }
            }
            DocClassSet docClassSet2 = new DocClassSet(docClassSet.getClassNum());
            for (int i8 = 0; i8 < docClassSet.getClassNum(); i8++) {
                DocClass docClass3 = docClassSet.getDocClass(i8);
                for (int i9 = 0; i9 < docClass3.getDocNum(); i9++) {
                    docClassSet2.addDoc(i8, docClass3.getDoc(i9));
                }
            }
            for (int i10 = 0; i10 < classify.getClassNum(); i10++) {
                DocClass docClass4 = classify.getDocClass(i10);
                for (int i11 = 0; i11 < docClass4.getDocNum(); i11++) {
                    docClassSet2.addDoc(i10, docClass4.getDoc(i11));
                }
            }
            eStep(docClassSet2);
        }
    }

    @Override // dragon.ir.classification.NBClassifier, dragon.ir.classification.AbstractClassifier, dragon.ir.classification.Classifier
    public int classify(IRDoc iRDoc) {
        int[] termIndexList;
        int[] termFrequencyList;
        if (iRDoc.getKey().startsWith("external_unlabeled")) {
            termIndexList = this.externalUnlabeled[iRDoc.getIndex() - this.externalDocOffset].getNonZeroColumns();
            termFrequencyList = this.externalUnlabeled[iRDoc.getIndex() - this.externalDocOffset].getNonZeroIntScores();
        } else {
            termIndexList = this.indexReader.getTermIndexList(iRDoc.getIndex());
            termFrequencyList = this.indexReader.getTermFrequencyList(iRDoc.getIndex());
        }
        int classify = classify(new IntRow(0, termIndexList.length, termIndexList, termFrequencyList));
        iRDoc.setWeight(this.lastClassProb.get(classify));
        return classify;
    }

    private void eStep(DocClassSet docClassSet) {
        int[] termIndexList;
        int[] termFrequencyList;
        this.classPrior = getClassPrior(docClassSet);
        this.featureSelector.train(this.indexReader, docClassSet);
        this.model = new DoubleFlatDenseMatrix(docClassSet.getClassNum(), this.featureSelector.getSelectedFeatureNum());
        this.model.assign(1.0d);
        for (int i = 0; i < docClassSet.getClassNum(); i++) {
            int selectedFeatureNum = this.featureSelector.getSelectedFeatureNum();
            DocClass docClass = docClassSet.getDocClass(i);
            for (int i2 = 0; i2 < docClass.getDocNum(); i2++) {
                IRDoc doc = docClass.getDoc(i2);
                if (doc.getKey().startsWith("external_unlabeled")) {
                    termIndexList = this.externalUnlabeled[doc.getIndex() - this.externalDocOffset].getNonZeroColumns();
                    termFrequencyList = this.externalUnlabeled[doc.getIndex() - this.externalDocOffset].getNonZeroIntScores();
                } else {
                    termIndexList = this.indexReader.getTermIndexList(doc.getIndex());
                    termFrequencyList = this.indexReader.getTermFrequencyList(doc.getIndex());
                }
                for (int i3 = 0; i3 < termIndexList.length; i3++) {
                    int map = this.featureSelector.map(termIndexList[i3]);
                    if (map >= 0) {
                        selectedFeatureNum += termFrequencyList[i3];
                        this.model.add(i, map, termFrequencyList[i3]);
                    }
                }
            }
            double d = 1.0d / selectedFeatureNum;
            for (int i4 = 0; i4 < this.model.columns(); i4++) {
                this.model.setDouble(i, i4, Math.log(this.model.getDouble(i, i4) * d));
            }
        }
    }

    private int[] getTermMap(IndexReader indexReader, IndexReader indexReader2) {
        int[] iArr = new int[indexReader.getCollection().getTermNum()];
        for (int i = 0; i < iArr.length; i++) {
            IRTerm iRTerm = indexReader2.getIRTerm(indexReader.getTermKey(i));
            if (iRTerm != null) {
                iArr[i] = iRTerm.getIndex();
            } else {
                iArr[i] = -1;
            }
        }
        return iArr;
    }
}
