/*
 * Decompiled with CFR 0.152.
 */
package de.julielab.geneexpbase.scoring;

import com.wcohen.ss.BasicStringWrapper;
import com.wcohen.ss.BasicStringWrapperIterator;
import com.wcohen.ss.SerializableTFIDF;
import com.wcohen.ss.TFIDF;
import com.wcohen.ss.api.StringWrapper;
import com.wcohen.ss.tokens.SerializableSimpleTokenizer;
import de.julielab.geneexpbase.GeneExpRuntimeException;
import java.io.IOException;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.stream.IntStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexableField;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class TFIDFUtils
implements Serializable {
    private static final Logger log = LoggerFactory.getLogger(TFIDFUtils.class);
    public static boolean TEST_MODE = false;
    private TFIDF tfidf;

    public static void main(String[] args) {
        TFIDFUtils tfidfUtils = new TFIDFUtils();
        tfidfUtils.test();
    }

    public void learnFromLuceneIndex(IndexReader ir, String contentField) {
        this.learnFromLuceneIndex(ir, new String[]{contentField});
    }

    public void learnFromLuceneIndex(IndexReader ir, String[] contentFields) {
        long time = System.currentTimeMillis();
        log.info("Learning TF/IDF statistic from Lucene index, field names: {}", (Object)Arrays.toString(contentFields));
        if (TEST_MODE) {
            log.warn("Test mode is active. TFIDF is only estimated on a very small subset of documents.");
        }
        Iterator trainIt = IntStream.range(0, TEST_MODE ? Math.min(1000, ir.numDocs()) : ir.numDocs()).mapToObj(i -> {
            try {
                Document doc = ir.document(i);
                String[] stringArray = contentFields;
                int n = stringArray.length;
                int n2 = 0;
                if (n2 < n) {
                    String contentField = stringArray[n2];
                    IndexableField field = doc.getField(contentField);
                    if (field == null) {
                        throw new IllegalArgumentException("Field " + contentField + " was not found in the given index.");
                    }
                    return new BasicStringWrapper(field.stringValue());
                }
            }
            catch (IOException e) {
                log.error("Exception while iterating over index", e);
                throw new GeneExpRuntimeException(e);
            }
            return null;
        }).iterator();
        log.info("Computing TF/IDF statistics...");
        this.tfidf = new SerializableTFIDF(SerializableSimpleTokenizer.DEFAULT_TOKENIZER);
        this.tfidf.train(new BasicStringWrapperIterator(trainIt));
        time = System.currentTimeMillis() - time;
        log.info("Done learning TF/IDF for fields {} in {} seconds.", (Object)Arrays.toString(contentFields), (Object)(time / 1000L));
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public double score(String s1, String s2) {
        TFIDF tFIDF = this.tfidf;
        synchronized (tFIDF) {
            return this.tfidf.score(s1, s2);
        }
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public double score(StringWrapper s1, StringWrapper s2) {
        TFIDF tFIDF = this.tfidf;
        synchronized (tFIDF) {
            return this.tfidf.score(s1, s2);
        }
    }

    public void test() {
        TFIDF tfidf = new TFIDF();
        ArrayList<StringWrapper> strings = new ArrayList<StringWrapper>();
        strings.add(this.getbsw("1 2 3 4 "));
        strings.add(this.getbsw("1 4"));
        strings.add(this.getbsw("5 5 5 "));
        strings.add(this.getbsw("5 6 2 1"));
        BasicStringWrapperIterator it = new BasicStringWrapperIterator(strings.iterator());
        tfidf.train(it);
        System.out.println(tfidf.explainScore("2 4 6", "7, 4, 9"));
        System.out.println(tfidf.explainScore("1 2 3", "1 7 19"));
    }

    private StringWrapper getbsw(String string) {
        return new BasicStringWrapper(string);
    }
}

