package org.apache.lucene.classification;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TotalHitCountCollector;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.util.BytesRef;

/* loaded from: input_file:org/apache/lucene/classification/SimpleNaiveBayesClassifier.class */
public class SimpleNaiveBayesClassifier implements Classifier<BytesRef> {
    protected LeafReader leafReader;
    protected String[] textFieldNames;
    protected String classFieldName;
    protected Analyzer analyzer;
    protected IndexSearcher indexSearcher;
    protected Query query;

    @Override // org.apache.lucene.classification.Classifier
    public void train(LeafReader leafReader, String str, String str2, Analyzer analyzer) throws IOException {
        train(leafReader, str, str2, analyzer, (Query) null);
    }

    @Override // org.apache.lucene.classification.Classifier
    public void train(LeafReader leafReader, String str, String str2, Analyzer analyzer, Query query) throws IOException {
        train(leafReader, new String[]{str}, str2, analyzer, query);
    }

    @Override // org.apache.lucene.classification.Classifier
    public void train(LeafReader leafReader, String[] strArr, String str, Analyzer analyzer, Query query) throws IOException {
        this.leafReader = leafReader;
        this.indexSearcher = new IndexSearcher(this.leafReader);
        this.textFieldNames = strArr;
        this.classFieldName = str;
        this.analyzer = analyzer;
        this.query = query;
    }

    @Override // org.apache.lucene.classification.Classifier
    public ClassificationResult<BytesRef> assignClass(String str) throws IOException {
        ClassificationResult<BytesRef> classificationResult = null;
        double d = -1.7976931348623157E308d;
        for (ClassificationResult<BytesRef> classificationResult2 : assignClassNormalizedList(str)) {
            if (classificationResult2.getScore() > d) {
                classificationResult = classificationResult2;
                d = classificationResult2.getScore();
            }
        }
        return classificationResult;
    }

    @Override // org.apache.lucene.classification.Classifier
    public List<ClassificationResult<BytesRef>> getClasses(String str) throws IOException {
        List<ClassificationResult<BytesRef>> assignClassNormalizedList = assignClassNormalizedList(str);
        Collections.sort(assignClassNormalizedList);
        return assignClassNormalizedList;
    }

    @Override // org.apache.lucene.classification.Classifier
    public List<ClassificationResult<BytesRef>> getClasses(String str, int i) throws IOException {
        List<ClassificationResult<BytesRef>> assignClassNormalizedList = assignClassNormalizedList(str);
        Collections.sort(assignClassNormalizedList);
        return assignClassNormalizedList.subList(0, i);
    }

    private List<ClassificationResult<BytesRef>> assignClassNormalizedList(String str) throws IOException {
        if (this.leafReader == null) {
            throw new IOException("You must first call Classifier#train");
        }
        ArrayList<ClassificationResult> arrayList = new ArrayList();
        TermsEnum it = MultiFields.getTerms(this.leafReader, this.classFieldName).iterator();
        String[] strArr = tokenizeDoc(str);
        int countDocsWithClass = countDocsWithClass();
        while (true) {
            BytesRef next = it.next();
            if (next == null) {
                break;
            }
            arrayList.add(new ClassificationResult(BytesRef.deepCopyOf(next), calculateLogPrior(next, countDocsWithClass) + calculateLogLikelihood(strArr, next, countDocsWithClass)));
        }
        ArrayList arrayList2 = new ArrayList();
        if (!arrayList.isEmpty()) {
            Collections.sort(arrayList);
            double score = ((ClassificationResult) arrayList.get(0)).getScore();
            double d = 0.0d;
            Iterator it2 = arrayList.iterator();
            while (it2.hasNext()) {
                d += Math.exp(((ClassificationResult) it2.next()).getScore() - score);
            }
            double log = score + Math.log(d);
            for (ClassificationResult classificationResult : arrayList) {
                arrayList2.add(new ClassificationResult(classificationResult.getAssignedClass(), Math.exp(classificationResult.getScore() - log)));
            }
        }
        return arrayList2;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public int countDocsWithClass() throws IOException {
        int docCount = MultiFields.getTerms(this.leafReader, this.classFieldName).getDocCount();
        if (docCount == -1) {
            TotalHitCountCollector totalHitCountCollector = new TotalHitCountCollector();
            BooleanQuery booleanQuery = new BooleanQuery();
            booleanQuery.add(new BooleanClause(new WildcardQuery(new Term(this.classFieldName, String.valueOf('*'))), BooleanClause.Occur.MUST));
            if (this.query != null) {
                booleanQuery.add(this.query, BooleanClause.Occur.MUST);
            }
            this.indexSearcher.search(booleanQuery, totalHitCountCollector);
            docCount = totalHitCountCollector.getTotalHits();
        }
        return docCount;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public String[] tokenizeDoc(String str) throws IOException {
        LinkedList linkedList = new LinkedList();
        for (String str2 : this.textFieldNames) {
            TokenStream tokenStream = this.analyzer.tokenStream(str2, str);
            Throwable th = null;
            try {
                try {
                    CharTermAttribute addAttribute = tokenStream.addAttribute(CharTermAttribute.class);
                    tokenStream.reset();
                    while (tokenStream.incrementToken()) {
                        linkedList.add(addAttribute.toString());
                    }
                    tokenStream.end();
                    if (tokenStream != null) {
                        if (0 != 0) {
                            try {
                                tokenStream.close();
                            } catch (Throwable th2) {
                                th.addSuppressed(th2);
                            }
                        } else {
                            tokenStream.close();
                        }
                    }
                } finally {
                }
            } catch (Throwable th3) {
                if (tokenStream != null) {
                    if (th != null) {
                        try {
                            tokenStream.close();
                        } catch (Throwable th4) {
                            th.addSuppressed(th4);
                        }
                    } else {
                        tokenStream.close();
                    }
                }
                throw th3;
            }
        }
        return (String[]) linkedList.toArray(new String[linkedList.size()]);
    }

    private double calculateLogLikelihood(String[] strArr, BytesRef bytesRef, int i) throws IOException {
        double d = 0.0d;
        for (String str : strArr) {
            d += Math.log((getWordFreqForClass(str, bytesRef) + 1) / (getTextTermFreqForClass(bytesRef) + i));
        }
        return d;
    }

    private double getTextTermFreqForClass(BytesRef bytesRef) throws IOException {
        double d = 0.0d;
        for (String str : this.textFieldNames) {
            Terms terms = MultiFields.getTerms(this.leafReader, str);
            d += terms.getSumDocFreq() / terms.getDocCount();
        }
        return d * this.leafReader.docFreq(new Term(this.classFieldName, bytesRef));
    }

    private int getWordFreqForClass(String str, BytesRef bytesRef) throws IOException {
        BooleanQuery booleanQuery = new BooleanQuery();
        BooleanQuery booleanQuery2 = new BooleanQuery();
        for (String str2 : this.textFieldNames) {
            booleanQuery2.add(new BooleanClause(new TermQuery(new Term(str2, str)), BooleanClause.Occur.SHOULD));
        }
        booleanQuery.add(new BooleanClause(booleanQuery2, BooleanClause.Occur.MUST));
        booleanQuery.add(new BooleanClause(new TermQuery(new Term(this.classFieldName, bytesRef)), BooleanClause.Occur.MUST));
        if (this.query != null) {
            booleanQuery.add(this.query, BooleanClause.Occur.MUST);
        }
        TotalHitCountCollector totalHitCountCollector = new TotalHitCountCollector();
        this.indexSearcher.search(booleanQuery, totalHitCountCollector);
        return totalHitCountCollector.getTotalHits();
    }

    private double calculateLogPrior(BytesRef bytesRef, int i) throws IOException {
        return Math.log(docCount(bytesRef)) - Math.log(i);
    }

    private int docCount(BytesRef bytesRef) throws IOException {
        return this.leafReader.docFreq(new Term(this.classFieldName, bytesRef));
    }
}
