/*
 * Decompiled with CFR 0.152.
 */
package de.julielab.speciesassignment.candidateretrieval;

import com.google.common.collect.Sets;
import de.julielab.geneexpbase.GeneExpRuntimeException;
import de.julielab.geneexpbase.candidateretrieval.CandidateCacheKey;
import de.julielab.geneexpbase.candidateretrieval.CandidateRetrieval;
import de.julielab.geneexpbase.candidateretrieval.QueryGenerator;
import de.julielab.geneexpbase.candidateretrieval.SynHit;
import de.julielab.geneexpbase.configuration.Configuration;
import de.julielab.geneexpbase.configuration.Parameters;
import de.julielab.geneexpbase.genemodel.GeneMention;
import de.julielab.geneexpbase.scoring.JaroWinklerScorer;
import de.julielab.geneexpbase.scoring.Scorer;
import de.julielab.geneexpbase.services.CacheService;
import de.julielab.speciesassignment.candidateretrieval.SimpleGeneNameQueryGenerator;
import java.io.IOException;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.Collectors;
import javax.cache.Cache;
import javax.inject.Inject;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.store.FSDirectory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class SimpleCandidateRetrieval
implements CandidateRetrieval {
    private static final Logger log = LoggerFactory.getLogger(SimpleCandidateRetrieval.class);
    public static Set<String> UNIT_TEST_GENE_ID_ACCUMULATION_SET;
    private static final AtomicInteger instanceCounter;
    private final IndexSearcher indexSearcher;
    private final Cache<CandidateCacheKey, List<SynHit>> cache;

    @Inject
    public SimpleCandidateRetrieval(Configuration configuration, CacheService cacheService) throws IOException {
        boolean isWindows = System.getProperty("os.name").toLowerCase().contains("win");
        String indexPath = configuration.getProperty("simple_gene_name_index");
        if (indexPath == null) {
            throw new IllegalArgumentException("The property 'simple_gene_name_index' is not specified in the configuration file.");
        }
        this.indexSearcher = new IndexSearcher(DirectoryReader.open(FSDirectory.open(Paths.get(indexPath, new String[0]))));
        this.cache = cacheService.getCacheManager().getCache("candidates-cache");
    }

    @Override
    public List<SynHit> getCandidates(GeneMention geneMention, Collection<String> geneIdsFilter, Collection<String> organisms, QueryGenerator queryGenerator) {
        CandidateCacheKey key = new CandidateCacheKey(geneMention.getGeneName());
        key.setGeneIdsFilter(geneIdsFilter);
        key.setQueryGenerator(queryGenerator);
        List<SynHit> ret = this.cache.get(key);
        if (ret == null) {
            if (organisms == null || organisms.isEmpty()) {
                ret = this.getCandidatesFromIndex(key);
            } else if (!organisms.isEmpty()) {
                for (String organism : organisms) {
                    key.setTaxId(organism);
                    ret.addAll(this.getCandidatesFromIndex(key));
                }
            }
            ret.sort(Comparator.comparingDouble(SynHit::getLexicalScore).thenComparingDouble(SynHit::getLuceneScore).reversed());
            this.cache.put(key, ret);
        }
        return ret;
    }

    @Override
    public List<SynHit> getCandidates(GeneMention geneMention, QueryGenerator queryGenerator) {
        return this.getCandidates(geneMention, null, null, queryGenerator);
    }

    @Override
    public List<SynHit> getCandidates(GeneMention geneMention, Collection<String> geneIdsFilter, QueryGenerator queryGenerator) {
        return this.getCandidates(geneMention, geneIdsFilter, Collections.emptySet(), queryGenerator);
    }

    private List<SynHit> getCandidatesFromIndex(CandidateCacheKey key) {
        try {
            Query query = key.generateQuery();
            TopScoreDocCollector resultsCollector = TopScoreDocCollector.create(key.getMaxHits(), key.getMaxHits());
            this.indexSearcher.search(query, resultsCollector);
            TopDocs topDocs = resultsCollector.topDocs();
            ArrayList<SynHit> ret = new ArrayList<SynHit>();
            JaroWinklerScorer scorer = new JaroWinklerScorer();
            for (ScoreDoc doc : topDocs.scoreDocs) {
                Document document = this.indexSearcher.doc(doc.doc);
                String geneId = document.getField("gene_id").stringValue();
                String taxId = document.getField("tax_id").stringValue();
                String[] geneNames = (String[])Arrays.stream(document.getFields("names_exact")).map(IndexableField::stringValue).toArray(String[]::new);
                double bestScore = 0.0;
                String bestName = null;
                for (String name : geneNames) {
                    double score = scorer.getScore(key.getGeneName().getNormalizedText(), name);
                    if (bestName != null && !(score > bestScore)) continue;
                    bestScore = score;
                    bestName = name;
                }
                SynHit sh2 = new SynHit(bestName, bestScore, geneId, null);
                sh2.setTaxId(taxId);
                sh2.setLuceneScore(doc.score);
                ret.add(sh2);
            }
            if (UNIT_TEST_GENE_ID_ACCUMULATION_SET != null) {
                ret.forEach(sh -> UNIT_TEST_GENE_ID_ACCUMULATION_SET.add(sh.getId()));
            }
            return ret;
        }
        catch (BooleanQuery.TooManyClauses e) {
            log.warn("Got too many clauses exception from gene name \"{}\". Assuming that this is a tagging error and not returning any candidates.", (Object)key.getGeneName().getText());
            throw new IllegalArgumentException(e);
        }
        catch (IOException e) {
            log.error("Could not read simple gene name index.");
            throw new GeneExpRuntimeException(e);
        }
    }

    public Set<String> checkForCompatibleTaxonomyCandidates(GeneMention gm, Set<String> offeredTaxIds, Scorer scorer, Parameters parameterMap) {
        List<SynHit> candidates = this.getCandidates(gm, offeredTaxIds, new SimpleGeneNameQueryGenerator());
        double synhitGenMentionSimilaryThreshold = parameterMap.getDouble("species_assignment.singular.synonym_similarity_threshold");
        HashSet<String> foundTaxIds = new HashSet<String>();
        for (SynHit sh : candidates) {
            if (!(scorer.getScore(sh.getSynonym(), gm.getNormalizedText()) > synhitGenMentionSimilaryThreshold)) continue;
            foundTaxIds.add(sh.getTaxId());
        }
        return Sets.intersection(offeredTaxIds, foundTaxIds).stream().collect(Collectors.toSet());
    }

    @Override
    public String mapGeneIdToTaxId(String geneId) {
        try {
            BooleanQuery q = new BooleanQuery.Builder().add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST).add(new TermQuery(new Term("gene_id", geneId)), BooleanClause.Occur.FILTER).build();
            TopDocs topDocs = this.indexSearcher.search((Query)q, 1);
            if (topDocs.scoreDocs.length > 0) {
                if (UNIT_TEST_GENE_ID_ACCUMULATION_SET != null) {
                    UNIT_TEST_GENE_ID_ACCUMULATION_SET.add(geneId);
                }
                return this.indexSearcher.doc(topDocs.scoreDocs[0].doc).getField("tax_id").stringValue();
            }
        }
        catch (IOException e) {
            log.error("Could not read simple gene name index.");
            throw new GeneExpRuntimeException(e);
        }
        return null;
    }

    static {
        instanceCounter = new AtomicInteger(0);
    }
}

