package dragon.ir.kngbase;

import dragon.ir.index.IRSignatureIndexList;
import dragon.matrix.DoubleSuperSparseMatrix;
import dragon.matrix.IntSparseMatrix;
import dragon.nlp.Counter;
import dragon.nlp.Token;
import dragon.util.MathUtil;
import java.io.File;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.Random;

/* loaded from: input_file:dragon/ir/kngbase/TopicSignatureModel.class */
public class TopicSignatureModel {
    private IRSignatureIndexList srcIndexList;
    private IRSignatureIndexList destIndexList;
    private IntSparseMatrix srcSignatureDocMatrix;
    private IntSparseMatrix destDocSignatureMatrix;
    private IntSparseMatrix cooccurMatrix;
    private boolean useDocFrequency;
    private boolean useMeanTrim;
    private boolean useEM;
    private double probThreshold;
    private double bkgCoeffi;
    private int[] buf;
    private int iterationNum;
    private int totalDestSignatureNum;
    private int DOC_THRESH;

    public TopicSignatureModel(IRSignatureIndexList iRSignatureIndexList, IntSparseMatrix intSparseMatrix, IntSparseMatrix intSparseMatrix2) {
        this.srcIndexList = iRSignatureIndexList;
        this.srcSignatureDocMatrix = intSparseMatrix;
        this.destDocSignatureMatrix = intSparseMatrix2;
        this.useDocFrequency = true;
        this.useMeanTrim = true;
        this.probThreshold = 0.001d;
        this.useEM = false;
        this.iterationNum = 15;
        this.bkgCoeffi = 0.5d;
        this.totalDestSignatureNum = intSparseMatrix2.columns();
    }

    public TopicSignatureModel(IRSignatureIndexList iRSignatureIndexList, IntSparseMatrix intSparseMatrix) {
        this.srcIndexList = iRSignatureIndexList;
        this.cooccurMatrix = intSparseMatrix;
        this.useMeanTrim = true;
        this.probThreshold = 0.001d;
        this.useEM = false;
        this.iterationNum = 15;
        this.bkgCoeffi = 0.5d;
        this.totalDestSignatureNum = intSparseMatrix.columns();
    }

    public TopicSignatureModel(IRSignatureIndexList iRSignatureIndexList, IRSignatureIndexList iRSignatureIndexList2, IntSparseMatrix intSparseMatrix) {
        this.srcIndexList = iRSignatureIndexList;
        this.destIndexList = iRSignatureIndexList2;
        this.cooccurMatrix = intSparseMatrix;
        this.useMeanTrim = true;
        this.probThreshold = 0.001d;
        this.useEM = true;
        this.iterationNum = 15;
        this.bkgCoeffi = 0.5d;
        this.totalDestSignatureNum = intSparseMatrix.columns();
    }

    public TopicSignatureModel(IRSignatureIndexList iRSignatureIndexList, IntSparseMatrix intSparseMatrix, IRSignatureIndexList iRSignatureIndexList2, IntSparseMatrix intSparseMatrix2) {
        this.srcIndexList = iRSignatureIndexList;
        this.srcSignatureDocMatrix = intSparseMatrix;
        this.destIndexList = iRSignatureIndexList2;
        this.destDocSignatureMatrix = intSparseMatrix2;
        this.useDocFrequency = true;
        this.useMeanTrim = true;
        this.probThreshold = 0.001d;
        this.useEM = true;
        this.iterationNum = 15;
        this.bkgCoeffi = 0.5d;
        this.totalDestSignatureNum = intSparseMatrix2.columns();
    }

    public void setUseEM(boolean z) {
        this.useEM = z;
    }

    public boolean getUseEM() {
        return this.useEM;
    }

    public void setEMBackgroundCoefficient(double d) {
        this.bkgCoeffi = d;
    }

    public double getEMBackgroundCoefficient() {
        return this.bkgCoeffi;
    }

    public void setEMIterationNum(int i) {
        this.iterationNum = i;
    }

    public int getEMIterationNum() {
        return this.iterationNum;
    }

    public void setUseDocFrequency(boolean z) {
        this.useDocFrequency = z;
    }

    public boolean getUseDocFrequency() {
        return this.useDocFrequency;
    }

    public void setUseMeanTrim(boolean z) {
        this.useMeanTrim = z;
    }

    public boolean getUseMeanTrim() {
        return this.useMeanTrim;
    }

    public void setProbThreshold(double d) {
        this.probThreshold = d;
    }

    public double getProbThreshold() {
        return this.probThreshold;
    }

    public boolean genTransMatrix(int i, String str, String str2) {
        String stringBuffer = new StringBuffer().append(str).append("/").append(str2).append(".index").toString();
        String stringBuffer2 = new StringBuffer().append(str).append("/").append(str2).append(".matrix").toString();
        String stringBuffer3 = new StringBuffer().append(str).append("/").append(str2).append("t.index").toString();
        String stringBuffer4 = new StringBuffer().append(str).append("/").append(str2).append("t.matrix").toString();
        File file = new File(stringBuffer2);
        if (file.exists()) {
            file.delete();
        }
        File file2 = new File(stringBuffer);
        if (file2.exists()) {
            file2.delete();
        }
        File file3 = new File(stringBuffer4);
        if (file3.exists()) {
            file3.delete();
        }
        File file4 = new File(stringBuffer3);
        if (file4.exists()) {
            file4.delete();
        }
        DoubleSuperSparseMatrix doubleSuperSparseMatrix = new DoubleSuperSparseMatrix(stringBuffer, stringBuffer2, false, false);
        doubleSuperSparseMatrix.setFlushInterval(Integer.MAX_VALUE);
        DoubleSuperSparseMatrix doubleSuperSparseMatrix2 = new DoubleSuperSparseMatrix(stringBuffer3, stringBuffer4, false, false);
        doubleSuperSparseMatrix2.setFlushInterval(Integer.MAX_VALUE);
        int i2 = 0;
        int size = this.srcIndexList.size();
        this.buf = new int[this.totalDestSignatureNum];
        if (this.destDocSignatureMatrix != null) {
            this.DOC_THRESH = computeDocThreshold(this.destDocSignatureMatrix);
        }
        for (int i3 = 0; i3 < size; i3++) {
            if (i3 % 1000 == 0) {
                System.out.println(new StringBuffer().append(new Date().toString()).append(" Processing Row#").append(i3).toString());
            }
            if (this.srcIndexList.getIRSignature(i3).getDocFrequency() >= i && (this.cooccurMatrix == null || this.cooccurMatrix.getNonZeroNumInRow(i3) >= 5)) {
                ArrayList genSignatureTranslation = genSignatureTranslation(i3);
                for (int i4 = 0; i4 < genSignatureTranslation.size(); i4++) {
                    Token token = (Token) genSignatureTranslation.get(i4);
                    doubleSuperSparseMatrix.add(i3, token.getIndex(), token.getWeight());
                    doubleSuperSparseMatrix2.add(token.getIndex(), i3, token.getWeight());
                }
                i2 += genSignatureTranslation.size();
                genSignatureTranslation.clear();
                if (i2 >= 5000000) {
                    doubleSuperSparseMatrix2.flush();
                    doubleSuperSparseMatrix.flush();
                    i2 = 0;
                }
            }
        }
        doubleSuperSparseMatrix2.finalizeData();
        doubleSuperSparseMatrix2.close();
        doubleSuperSparseMatrix.finalizeData();
        doubleSuperSparseMatrix.close();
        return true;
    }

    public ArrayList genSignatureTranslation(int i) {
        ArrayList computeDistributionByCooccurMatrix;
        if (this.srcSignatureDocMatrix != null) {
            int[] nonZeroColumnsInRow = this.srcSignatureDocMatrix.getNonZeroColumnsInRow(i);
            computeDistributionByCooccurMatrix = nonZeroColumnsInRow.length > this.DOC_THRESH ? computeDistributionByArray(nonZeroColumnsInRow) : computeDistributionByHash(nonZeroColumnsInRow);
        } else {
            computeDistributionByCooccurMatrix = computeDistributionByCooccurMatrix(i);
        }
        if (this.useEM) {
            computeDistributionByCooccurMatrix = emTopicSignatureModel(computeDistributionByCooccurMatrix);
        }
        return computeDistributionByCooccurMatrix;
    }

    private int computeDocThreshold(IntSparseMatrix intSparseMatrix) {
        return (int) ((intSparseMatrix.columns() / computeAvgTermNum(intSparseMatrix)) / 8.0d);
    }

    private double computeAvgTermNum(IntSparseMatrix intSparseMatrix) {
        Random random = new Random();
        int min = Math.min(50, intSparseMatrix.rows());
        double d = 0.0d;
        for (int i = 0; i < min; i++) {
            d += intSparseMatrix.getNonZeroNumInRow(random.nextInt(intSparseMatrix.rows()));
        }
        return d / min;
    }

    private ArrayList computeDistributionByCooccurMatrix(int i) {
        double d = 0.0d;
        int[] nonZeroColumnsInRow = this.cooccurMatrix.getNonZeroColumnsInRow(i);
        int[] nonZeroIntScoresInRow = this.cooccurMatrix.getNonZeroIntScoresInRow(i);
        for (int i2 : nonZeroIntScoresInRow) {
            d += i2;
        }
        double length = this.useMeanTrim ? d / nonZeroIntScoresInRow.length : 0.5d;
        if (length < d * getMinInitProb()) {
            length = d * getMinInitProb();
        }
        double d2 = 0.0d;
        ArrayList arrayList = new ArrayList();
        for (int i3 = 0; i3 < nonZeroIntScoresInRow.length; i3++) {
            if (nonZeroIntScoresInRow[i3] >= length) {
                arrayList.add(new Token(nonZeroColumnsInRow[i3], nonZeroIntScoresInRow[i3]));
                d2 += nonZeroIntScoresInRow[i3];
            }
        }
        for (int i4 = 0; i4 < arrayList.size(); i4++) {
            ((Token) arrayList.get(i4)).setWeight(r0.getFrequency() / d2);
        }
        return arrayList;
    }

    private ArrayList computeDistributionByArray(int[] iArr) {
        double d = 0.0d;
        if (this.buf == null) {
            this.buf = new int[this.totalDestSignatureNum];
        }
        MathUtil.initArray(this.buf, 0);
        for (int i = 0; i < iArr.length; i++) {
            int[] nonZeroColumnsInRow = this.destDocSignatureMatrix.getNonZeroColumnsInRow(iArr[i]);
            int[] nonZeroIntScoresInRow = this.useDocFrequency ? null : this.destDocSignatureMatrix.getNonZeroIntScoresInRow(iArr[i]);
            for (int i2 = 0; i2 < nonZeroColumnsInRow.length; i2++) {
                if (this.useDocFrequency) {
                    int[] iArr2 = this.buf;
                    int i3 = nonZeroColumnsInRow[i2];
                    iArr2[i3] = iArr2[i3] + 1;
                } else {
                    int[] iArr3 = this.buf;
                    int i4 = nonZeroColumnsInRow[i2];
                    iArr3[i4] = iArr3[i4] + nonZeroIntScoresInRow[i2];
                }
            }
        }
        int i5 = 0;
        for (int i6 = 0; i6 < this.buf.length; i6++) {
            if (this.buf[i6] > 0) {
                i5++;
                d += this.buf[i6];
            }
        }
        double d2 = this.useMeanTrim ? d / i5 : 0.5d;
        if (d2 < d * getMinInitProb()) {
            d2 = d * getMinInitProb();
        }
        double d3 = 0.0d;
        ArrayList arrayList = new ArrayList();
        for (int i7 = 0; i7 < this.buf.length; i7++) {
            if (this.buf[i7] >= d2) {
                arrayList.add(new Token(i7, this.buf[i7]));
                d3 += this.buf[i7];
            }
        }
        for (int i8 = 0; i8 < arrayList.size(); i8++) {
            ((Token) arrayList.get(i8)).setWeight(r0.getFrequency() / d3);
        }
        return arrayList;
    }

    private ArrayList computeDistributionByHash(int[] iArr) {
        ArrayList arrayList;
        ArrayList countTokensByHashMap = countTokensByHashMap(iArr);
        double d = 0.0d;
        for (int i = 0; i < countTokensByHashMap.size(); i++) {
            d += ((Token) countTokensByHashMap.get(i)).getFrequency();
        }
        if (this.useMeanTrim || d * getMinInitProb() > 1.0d) {
            double size = this.useMeanTrim ? d / countTokensByHashMap.size() : 0.5d;
            if (size < d * getMinInitProb()) {
                size = d * getMinInitProb();
            }
            arrayList = new ArrayList();
            d = 0.0d;
            for (int i2 = 0; i2 < countTokensByHashMap.size(); i2++) {
                Token token = (Token) countTokensByHashMap.get(i2);
                if (token.getFrequency() >= size) {
                    arrayList.add(token);
                    d += token.getFrequency();
                }
            }
            countTokensByHashMap.clear();
        } else {
            arrayList = countTokensByHashMap;
        }
        for (int i3 = 0; i3 < arrayList.size(); i3++) {
            ((Token) arrayList.get(i3)).setWeight(r0.getFrequency() / d);
        }
        return arrayList;
    }

    private ArrayList countTokensByHashMap(int[] iArr) {
        HashMap hashMap = new HashMap();
        for (int i = 0; i < iArr.length; i++) {
            int nonZeroNumInRow = this.destDocSignatureMatrix.getNonZeroNumInRow(iArr[i]);
            if (nonZeroNumInRow != 0) {
                int[] nonZeroColumnsInRow = this.destDocSignatureMatrix.getNonZeroColumnsInRow(iArr[i]);
                int[] nonZeroIntScoresInRow = this.useDocFrequency ? null : this.destDocSignatureMatrix.getNonZeroIntScoresInRow(iArr[i]);
                for (int i2 = 0; i2 < nonZeroNumInRow; i2++) {
                    Token token = this.useDocFrequency ? new Token(nonZeroColumnsInRow[i2], 1) : new Token(nonZeroColumnsInRow[i2], nonZeroIntScoresInRow[i2]);
                    Counter counter = (Counter) hashMap.get(token);
                    if (counter == null) {
                        hashMap.put(token, new Counter(token.getFrequency()));
                    } else {
                        counter.addCount(token.getFrequency());
                    }
                }
            }
        }
        ArrayList arrayList = new ArrayList(hashMap.size());
        for (Token token2 : hashMap.keySet()) {
            token2.setFrequency(((Counter) hashMap.get(token2)).getCount());
            arrayList.add(token2);
        }
        hashMap.clear();
        return arrayList;
    }

    private double getMinInitProb() {
        return this.probThreshold;
    }

    private ArrayList emTopicSignatureModel(ArrayList arrayList) {
        int size = arrayList.size();
        double[] dArr = new double[size];
        double[] dArr2 = new double[size];
        double d = 0.0d;
        for (int i = 0; i < size; i++) {
            Token token = (Token) arrayList.get(i);
            if (this.useDocFrequency) {
                dArr2[i] = this.destIndexList.getIRSignature(token.getIndex()).getDocFrequency();
            } else {
                dArr2[i] = this.destIndexList.getIRSignature(token.getIndex()).getFrequency();
            }
            d += dArr2[i];
        }
        for (int i2 = 0; i2 < size; i2++) {
            dArr2[i2] = dArr2[i2] / d;
        }
        for (int i3 = 0; i3 < this.iterationNum; i3++) {
            double d2 = 0.0d;
            for (int i4 = 0; i4 < size; i4++) {
                Token token2 = (Token) arrayList.get(i4);
                dArr[i4] = (((1.0d - this.bkgCoeffi) * token2.getWeight()) / (((1.0d - this.bkgCoeffi) * token2.getWeight()) + (this.bkgCoeffi * dArr2[i4]))) * token2.getFrequency();
                d2 += dArr[i4];
            }
            for (int i5 = 0; i5 < size; i5++) {
                ((Token) arrayList.get(i5)).setWeight(dArr[i5] / d2);
            }
        }
        return arrayList;
    }
}
