/*
 * Decompiled with CFR 0.152.
 */
package de.julielab.gene.candidateretrieval;

import com.google.common.cache.CacheBuilder;
import com.google.common.cache.LoadingCache;
import de.julielab.gene.candidateretrieval.BooleanQueryGenerator;
import de.julielab.gene.candidateretrieval.CandidateRetrieval;
import de.julielab.gene.candidateretrieval.Configuration;
import de.julielab.gene.candidateretrieval.GeneRecordHit;
import de.julielab.gene.candidateretrieval.GeneRecordQueryGenerator;
import de.julielab.gene.candidateretrieval.GeneRecordSynonymsQueryGenerator;
import de.julielab.gene.candidateretrieval.NGramQueryGenerator;
import de.julielab.gene.candidateretrieval.scoring.LuceneScorer;
import de.julielab.gene.candidateretrieval.scoring.MaxEntScorer;
import de.julielab.geneexpbase.GeneExpRuntimeException;
import de.julielab.geneexpbase.TermNormalizer;
import de.julielab.geneexpbase.candidateretrieval.CandidateCacheKey;
import de.julielab.geneexpbase.candidateretrieval.GeneCandidateRetrievalException;
import de.julielab.geneexpbase.candidateretrieval.QueryGenerator;
import de.julielab.geneexpbase.candidateretrieval.SynHit;
import de.julielab.geneexpbase.configuration.Parameters;
import de.julielab.geneexpbase.genemodel.GeneMention;
import de.julielab.geneexpbase.genemodel.GeneName;
import de.julielab.geneexpbase.scoring.JaroWinklerScorer;
import de.julielab.geneexpbase.scoring.LevenshteinScorer;
import de.julielab.geneexpbase.scoring.Scorer;
import de.julielab.geneexpbase.scoring.SimpleScorer;
import de.julielab.geneexpbase.scoring.TFIDFScorer;
import de.julielab.geneexpbase.scoring.TFIDFUtils;
import de.julielab.geneexpbase.scoring.TokenJaroSimilarityScorer;
import de.julielab.geneexpbase.services.CacheService;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.OptionalDouble;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.Executor;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import java.util.function.BiConsumer;
import java.util.function.BiFunction;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import javax.cache.Cache;
import javax.inject.Inject;
import org.apache.commons.lang3.tuple.ImmutablePair;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.lucene.analysis.custom.CustomAnalyzer;
import org.apache.lucene.analysis.ngram.NGramFilterFactory;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.StandardDirectoryReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.search.spell.SpellChecker;
import org.apache.lucene.store.FSDirectory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class LuceneCandidateRetrieval
implements CandidateRetrieval {
    public static final boolean TEST_MODE = false;
    public static final QueryGenerator CONJUNCTION = new BooleanQueryGenerator(BooleanClause.Occur.MUST, 0);
    public static final QueryGenerator DISJUNCTION = new BooleanQueryGenerator(BooleanClause.Occur.SHOULD, -1);
    public static final QueryGenerator DISJUNCTION_MINUS_1 = new BooleanQueryGenerator(BooleanClause.Occur.SHOULD, 1);
    public static final QueryGenerator DISJUNCTION_MINUS_2 = new BooleanQueryGenerator(BooleanClause.Occur.SHOULD, 2);
    public static final QueryGenerator NGRAM_2_3 = new NGramQueryGenerator(2, 3);
    public static final QueryGenerator GENE_RECORDS_CNF = new GeneRecordQueryGenerator();
    public static final QueryGenerator GENE_RECORDS_CNF_WITH_SYNONYMS = new GeneRecordQueryGenerator(false, false, true, true);
    public static final QueryGenerator GENE_RECORDS_FLAT_DISJUNCTION = new GeneRecordQueryGenerator(false, true, false, false);
    public static final QueryGenerator GENE_RECORDS_DISMAX = new GeneRecordQueryGenerator(true);
    public static final QueryGenerator GENE_RECORDS_SYNONYMS_APPROX = new GeneRecordSynonymsQueryGenerator(false);
    public static final QueryGenerator GENE_RECORDS_SYNONYMS_EXACT = new GeneRecordSynonymsQueryGenerator(true);
    public static final String NAME_PRIO_DELIMITER = "__";
    public static final String LOGGER_NAME_CANDIDATES = "de.julielab.jules.ae.genemapper.candidates";
    public static final int SIMPLE_SCORER = 0;
    public static final int TOKEN_JAROWINKLER_SCORER = 1;
    public static final int MAXENT_SCORER = 2;
    public static final int JAROWINKLER_SCORER = 3;
    public static final int LEVENSHTEIN_SCORER = 4;
    public static final int TFIDF = 5;
    public static final int LUCENE_SCORER = 10;
    public static final String MAXENT_SCORER_MODEL = "/genemapper_jules_mallet.mod";
    public static final Logger candidateLog = LoggerFactory.getLogger("de.julielab.jules.ae.genemapper.candidates");
    public static final int LUCENE_MAX_HITS = 20;
    private static final Logger log = LoggerFactory.getLogger(LuceneCandidateRetrieval.class);
    private static final ConcurrentHashMap<String, LoadingCache<CandidateCacheKey, List<SynHit>>> caches = new ConcurrentHashMap();
    private static final AtomicLong totalGeneRecordFieldLoadingTime = new AtomicLong();
    private static final AtomicLong totalCacheGettime = new AtomicLong();
    private static final AtomicLong totalCachePuttime = new AtomicLong();
    private static final AtomicLong totalLuceneQueryTime = new AtomicLong();
    private static final AtomicLong cacheHits = new AtomicLong();
    private static final AtomicLong cacheMisses = new AtomicLong();
    private static final com.google.common.cache.Cache<Thread, IndexSearcher> mentionIndexSearchers = CacheBuilder.newBuilder().weakKeys().weakValues().build();
    private static final com.google.common.cache.Cache<Thread, IndexSearcher> geneRecordIndexSearchers = CacheBuilder.newBuilder().weakKeys().weakValues().build();
    private static final com.google.common.cache.Cache<Thread, IndexSearcher> geneRecordOriginalNamesIndexSearchers = CacheBuilder.newBuilder().weakKeys().weakValues().build();
    private static final Map<String, IndexReader> geneRecordIndexReaders = new ConcurrentHashMap<String, IndexReader>();
    private static final Map<String, IndexReader> geneRecordOriginalNamesIndexReaders = new ConcurrentHashMap<String, IndexReader>();
    private static final Map<String, IndexReader> nameCentricIndexReaders = new ConcurrentHashMap<String, IndexReader>();
    private static final Map<String, BiConsumer<GeneRecordHit, String[]>> fullTextFieldSetter = Map.of("generif", GeneRecordHit::setGeneRifs, "interaction", GeneRecordHit::setInteractions, "godesc", GeneRecordHit::setGoDescriptors, "summary", GeneRecordHit::setSummaries);
    private static final AtomicInteger instanceCounter = new AtomicInteger(0);
    public static Set<String> UNIT_TEST_GENE_ID_ACCUMULATION_SET;
    private static ExecutorService executorService;
    private final Scorer exactScorer;
    private final Map<String, Float> globalFieldWeights;
    private final Boolean useLuceneCandidateCache;
    private IndexSearcher geneRecordIndexSearcher;
    private IndexSearcher geneRecordOriginalNamesIndexSearcher;
    private IndexReader geneRecordIndexReader;
    private IndexReader geneRecordOriginalNamesIndexReader;
    private IndexSearcher nameCentricIndexSearcher;
    private CustomAnalyzer ngramAnalyzer;
    private String maxEntModel = "/genemapper_jules_mallet.mod";
    private TermNormalizer normalizer;
    private Scorer approxScorer;
    private SpellChecker spellingChecker;
    private Configuration configuration;
    private Cache<CandidateCacheKey, List> candidateCache;
    private Cache<Pair<String, String>, String[]> geneRecordFieldCache;
    private Cache<String, TFIDFUtils> tfidfCache;

    @Deprecated
    public LuceneCandidateRetrieval(IndexSearcher mentionIndexSearcher, Scorer scorer) {
        mentionIndexSearchers.put(Thread.currentThread(), mentionIndexSearcher);
        this.exactScorer = scorer;
        this.normalizer = new TermNormalizer();
        this.globalFieldWeights = Collections.emptyMap();
        this.useLuceneCandidateCache = false;
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     * Enabled aggressive block sorting
     * Enabled unnecessary exception pruning
     * Enabled aggressive exception aggregation
     * Converted monitor instructions to comments
     * Lifted jumps to return sites
     */
    @Inject
    public LuceneCandidateRetrieval(Configuration config, ExecutorService executorService, CacheService cacheService) throws GeneCandidateRetrievalException {
        OptionalDouble fieldWeight;
        String mentionIndex;
        this.configuration = config;
        LuceneCandidateRetrieval.executorService = executorService;
        this.useLuceneCandidateCache = config.getBoolean("use_lucene_candidates_cache").orElse(false);
        String geneRecordIndex = config.getProperty("gene_record_index");
        if (geneRecordIndex == null) {
            throw new GeneCandidateRetrievalException("geneRecordIndex index not specified in configuration file (critical).");
        }
        String geneRecordOriginalNamesIndex = config.getProperty("gene_record_original_names_index");
        if (geneRecordOriginalNamesIndex == null) {
            log.warn("No value for property {} specified in the configuration. No original names will be available.", (Object)"gene_record_original_names_index");
        }
        if ((mentionIndex = config.getProperty("name_centric_index")) == null) {
            throw new GeneCandidateRetrievalException("name centric mention index not specified in configuration file (critical).");
        }
        try {
            String scorerType;
            File spellingIndex;
            String spellingIndexPath;
            Class<LuceneCandidateRetrieval> clazz = LuceneCandidateRetrieval.class;
            // MONITORENTER : de.julielab.gene.candidateretrieval.LuceneCandidateRetrieval.class
            int luceneConcurrencyLevel22 = Integer.parseInt((String)config.getOrDefault((Object)"concurrency_level", "1"));
            log.info("Using Lucene concurrency level of {}. Note that concurrency is limited by the number of segments of the index.", (Object)luceneConcurrencyLevel22);
            // MONITOREXIT : clazz
            boolean isWindows = System.getProperty("os.name").toLowerCase().contains("win");
            Class<LuceneCandidateRetrieval> luceneConcurrencyLevel22 = LuceneCandidateRetrieval.class;
            // MONITORENTER : de.julielab.gene.candidateretrieval.LuceneCandidateRetrieval.class
            if (this.geneRecordIndexSearcher == null) {
                this.geneRecordIndexSearcher = new IndexSearcher(DirectoryReader.open(FSDirectory.open(Paths.get(geneRecordIndex, new String[0]))), (Executor)executorService);
                log.info("Gene record index has {} segments", (Object)((StandardDirectoryReader)this.geneRecordIndexSearcher.getIndexReader()).getSegmentInfos().size());
            }
            // MONITOREXIT : luceneConcurrencyLevel22
            if (geneRecordOriginalNamesIndex != null) {
                if (new File(geneRecordIndex).exists()) {
                    luceneConcurrencyLevel22 = LuceneCandidateRetrieval.class;
                    // MONITORENTER : de.julielab.gene.candidateretrieval.LuceneCandidateRetrieval.class
                    if (this.geneRecordOriginalNamesIndexSearcher == null) {
                        this.geneRecordOriginalNamesIndexSearcher = new IndexSearcher(DirectoryReader.open(FSDirectory.open(Paths.get(geneRecordOriginalNamesIndex, new String[0]))), (Executor)executorService);
                        log.info("Original gene names record names index has {} segments", (Object)((StandardDirectoryReader)this.geneRecordOriginalNamesIndexSearcher.getIndexReader()).getSegmentInfos().size());
                    }
                    // MONITOREXIT : luceneConcurrencyLevel22
                } else {
                    log.warn("Original gene names record index {} does not exist. This index will not be available.", (Object)geneRecordIndex);
                }
            }
            if (mentionIndex != null) {
                if (new File(mentionIndex).exists()) {
                    if (this.nameCentricIndexSearcher == null) {
                        this.nameCentricIndexSearcher = new IndexSearcher(DirectoryReader.open(FSDirectory.open(Paths.get(mentionIndex, new String[0]))), (Executor)executorService);
                        log.info("Gene record index has {} segments", (Object)((StandardDirectoryReader)this.nameCentricIndexSearcher.getIndexReader()).getSegmentInfos().size());
                    }
                } else {
                    log.warn("Name centric index {} does not exist. This index will not be available.", (Object)geneRecordIndex);
                }
            }
            if ((spellingIndexPath = config.getProperty("spelling_index")) != null && (spellingIndex = new File(spellingIndexPath)).exists()) {
                this.spellingChecker = new SpellChecker(FSDirectory.open(spellingIndex.toPath()));
            }
            if (this.spellingChecker == null) {
                log.warn("Spelling index was not given or file does not exist. No spelling correction can be done. Specified spelling index: {}", (Object)spellingIndexPath);
            }
            if ((scorerType = config.getProperty("exact_scorer_type")) == null) {
                log.debug("No configuration value given for exact_scorer_type");
                this.exactScorer = this.setScorerType(10);
            } else {
                this.exactScorer = this.setScorerType(Integer.valueOf(scorerType));
            }
            scorerType = config.getProperty("approx_scorer_type");
            if (scorerType == null) {
                log.debug("No configuration value given for approx_scorer_type");
                this.approxScorer = this.setScorerType(10);
            } else {
                this.approxScorer = this.setScorerType(Integer.valueOf(scorerType));
            }
            String maxEntModel = config.getProperty("maxent_model");
            if (maxEntModel != null) {
                this.maxEntModel = maxEntModel;
            }
            this.normalizer = new TermNormalizer();
        }
        catch (IOException e) {
            throw new GeneCandidateRetrievalException(e);
        }
        log.info("Exact scorer: " + this.exactScorer);
        log.info("Approx scorer: " + this.approxScorer);
        this.candidateCache = cacheService.getCacheManager().getCache("candidates-cache");
        this.geneRecordFieldCache = cacheService.getCacheManager().getCache("generecord-field-cache");
        this.tfidfCache = cacheService.getCacheManager().getCache("tfidf-cache");
        try {
            HashMap<String, String> ngramFilterSettings = new HashMap<String, String>();
            ngramFilterSettings.put("minGramSize", "2");
            ngramFilterSettings.put("maxGramSize", "3");
            this.ngramAnalyzer = CustomAnalyzer.builder().withTokenizer("whitespace", new String[0]).addTokenFilter(NGramFilterFactory.class, ngramFilterSettings).build();
        }
        catch (IOException e) {
            e.printStackTrace();
        }
        this.globalFieldWeights = new HashMap<String, Float>();
        config.getDouble(Configuration.dot("candidate_retrieval", "dismax_tie_breaker")).ifPresent(d -> this.globalFieldWeights.put("dismax_tie_breaker", Float.valueOf((float)d)));
        for (String field : GeneRecordQueryGenerator.ALL_FIELDS) {
            fieldWeight = config.getDouble(Configuration.dot("candidate_retrieval", field));
            fieldWeight.ifPresent(d -> this.globalFieldWeights.put(field, Float.valueOf((float)d)));
        }
        String[] stringArray = GeneRecordQueryGenerator.SYNONYM_FIELDS;
        int n = stringArray.length;
        int n2 = 0;
        while (n2 < n) {
            String field;
            field = stringArray[n2];
            fieldWeight = config.getDouble(Configuration.dot("candidate_retrieval", field + "_exact"));
            fieldWeight.ifPresent(d -> this.globalFieldWeights.put(field, Float.valueOf((float)d)));
            ++n2;
        }
    }

    public static AtomicLong getTotalCacheGettime() {
        return totalCacheGettime;
    }

    public static AtomicLong getTotalGeneRecordFieldLoadingTime() {
        return totalGeneRecordFieldLoadingTime;
    }

    public static AtomicLong getTotalCachePuttime() {
        return totalCachePuttime;
    }

    public static AtomicLong getTotalLuceneQueryTime() {
        return totalLuceneQueryTime;
    }

    public static AtomicLong getCacheMisses() {
        return cacheMisses;
    }

    public static AtomicLong getCacheHits() {
        return cacheHits;
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    @Override
    public TFIDFScorer getTFIDFOnGeneRecordNames() {
        TFIDFUtils utils;
        Object[] synonymFields = GeneRecordQueryGenerator.SYNONYM_FIELDS;
        String cacheKey = this.configuration.getProperty("gene_record_index") + " " + Arrays.toString(synonymFields);
        Cache<String, TFIDFUtils> cache = this.tfidfCache;
        synchronized (cache) {
            utils = this.tfidfCache.get(cacheKey);
            if (utils == null) {
                utils = new TFIDFUtils();
                utils.learnFromLuceneIndex(this.geneRecordIndexSearcher.getIndexReader(), (String[])synonymFields);
                this.tfidfCache.put(cacheKey, utils);
            }
        }
        return new TFIDFScorer(utils);
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public TFIDFScorer getTFIDFOnGeneSynonyms() {
        TFIDFUtils utils;
        Object[] synonymFields = new String[]{"indexed_syn"};
        String cacheKey = this.configuration.getProperty("name_centric_index") + " " + Arrays.toString(synonymFields);
        Cache<String, TFIDFUtils> cache = this.tfidfCache;
        synchronized (cache) {
            utils = this.tfidfCache.get(cacheKey);
            if (utils == null) {
                utils = new TFIDFUtils();
                utils.learnFromLuceneIndex(this.nameCentricIndexSearcher.getIndexReader(), (String[])synonymFields);
                this.tfidfCache.put(cacheKey, utils);
            }
        }
        return new TFIDFScorer(utils);
    }

    private IndexSearcher getGeneRecordIndexSearcher() {
        return this.geneRecordIndexSearcher;
    }

    public Configuration getConfiguration() {
        return this.configuration;
    }

    public TermNormalizer getNormalizer() {
        return this.normalizer;
    }

    public void setNormalizer(TermNormalizer normalizer) {
        this.normalizer = normalizer;
    }

    public Scorer getScorer() {
        return this.exactScorer;
    }

    @Override
    public SpellChecker getSpellingChecker() {
        return this.spellingChecker;
    }

    public Scorer setScorerType(int type) throws GeneCandidateRetrievalException {
        Scorer scorer;
        if (type == 0) {
            scorer = new SimpleScorer();
        } else if (type == 1) {
            scorer = new TokenJaroSimilarityScorer();
        } else if (type == 2) {
            if (!this.maxEntModel.equals(MAXENT_SCORER_MODEL)) {
                scorer = new MaxEntScorer(new File(this.maxEntModel));
            } else {
                InputStream in = this.getClass().getResourceAsStream(MAXENT_SCORER_MODEL);
                scorer = new MaxEntScorer(in);
            }
        } else if (type == 3) {
            scorer = new JaroWinklerScorer();
        } else if (type == 10) {
            scorer = new LuceneScorer();
        } else if (type == 4) {
            scorer = new LevenshteinScorer();
        } else if (type == 5) {
            scorer = this.getTFIDFOnGeneRecordNames();
        } else {
            throw new GeneCandidateRetrievalException("Unknown mention scorer type: " + type);
        }
        return scorer;
    }

    public String getScorerInfo() {
        if (this.exactScorer == null) {
            return "Lucene Score (unnormalized)";
        }
        return this.exactScorer.info();
    }

    public int getScorerType() {
        return this.exactScorer.getScorerType();
    }

    @Override
    public List<SynHit> getCandidates(String originalSearchTerm, QueryGenerator queryGenerator) {
        GeneMention geneMention = new GeneMention(originalSearchTerm, this.normalizer);
        return this.getCandidates(geneMention, queryGenerator);
    }

    @Override
    public List<SynHit> getCandidates(GeneMention geneMention, QueryGenerator queryGenerator) {
        return this.getCandidates(geneMention, geneMention.getTaxonomyIds(), queryGenerator);
    }

    @Override
    public List<SynHit> getCandidates(GeneMention geneMention, Collection<String> organisms, QueryGenerator queryGenerator) {
        return this.getCandidates(geneMention, null, organisms != null ? organisms : Collections.emptyList(), queryGenerator);
    }

    @Override
    public List<SynHit> getCandidates(GeneMention geneMention, Collection<String> geneIdsFilter, Collection<String> organisms, QueryGenerator queryGenerator) {
        return this.getCandidates(geneMention, geneIdsFilter, organisms, true, null, queryGenerator);
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    @Override
    public List<SynHit> getCandidates(GeneMention geneMention, Collection<String> geneIdsFilter, Collection<String> organisms, boolean loadFields, Parameters parameters, QueryGenerator queryGenerator) {
        ArrayList<SynHit> hits = new ArrayList();
        CandidateCacheKey key = new CandidateCacheKey(geneMention.getGeneName());
        key.setLoadSynHitFields(loadFields);
        key.setQueryGenerator(queryGenerator);
        if (parameters != null && parameters.getBoolean(Configuration.dot("candidate_retrieval", "use_query_field_weights"))) {
            key.setFieldWeights(this.getFieldWeightsFromParameters(parameters));
        }
        if (queryGenerator instanceof GeneRecordQueryGenerator && ((GeneRecordQueryGenerator)queryGenerator).isUseContextGenesAsRelevanceSignal()) {
            key.setContextNames(geneMention.getContextGeneNames().collect(Collectors.toSet()));
        }
        if (geneIdsFilter != null) {
            key.setGeneIdsFilter(geneIdsFilter);
        }
        if (organisms == null || organisms.isEmpty()) {
            hits = this.getCandidatesFromIndex(key);
            if (log.isTraceEnabled()) {
                int geneBegin = geneMention.getOffsets() != null ? geneMention.getBegin() : -1;
                int geneEnd = geneMention.getOffsets() != null ? geneMention.getEnd() : -1;
                log.trace("Returning {} candidates for gene mention {}[{}-{}]", hits.size(), key.getGeneName().getText(), geneBegin, geneEnd);
            }
        }
        if (organisms != null) {
            for (String taxonomyId : organisms) {
                key.setTaxId(taxonomyId);
                hits.addAll(this.getCandidatesFromIndex(key));
                if (!log.isTraceEnabled()) continue;
                int begin = -1;
                int end = -1;
                if (geneMention.getOffsets() != null) {
                    begin = geneMention.getBegin();
                    end = geneMention.getEnd();
                }
                log.trace("Returning {} candidates for gene mention {}[{}-{}] for taxonomy ID {}", hits.size(), key.getGeneName().getText(), begin, end, organisms);
            }
        }
        if (UNIT_TEST_GENE_ID_ACCUMULATION_SET != null) {
            Set<String> set = UNIT_TEST_GENE_ID_ACCUMULATION_SET;
            synchronized (set) {
                hits.stream().map(SynHit::getId).forEach(UNIT_TEST_GENE_ID_ACCUMULATION_SET::add);
            }
        }
        return hits;
    }

    @Override
    public List<SynHit> getCandidates(String geneMentionText, Collection<String> geneIdsFilter, Collection<String> organism, QueryGenerator queryGenerator) {
        GeneMention geneMention = new GeneMention(geneMentionText, this.normalizer);
        return this.getCandidates(geneMention, geneIdsFilter, organism, queryGenerator);
    }

    public List<SynHit> getCandidates(String geneMentionText, Collection<String> geneIdsFilter, Collection<String> organism, boolean loadFields, QueryGenerator queryGenerator) {
        GeneMention geneMention = new GeneMention(geneMentionText, this.normalizer);
        return this.getCandidates(geneMention, geneIdsFilter, organism, queryGenerator);
    }

    private List<SynHit> getCandidatesFromIndexWithoutCache(CandidateCacheKey key) throws IOException, BooleanQuery.TooManyClauses {
        long luceneQueryTime = System.nanoTime();
        List<SynHit> synHits = this.getCandidatesFromRecordIndex(key);
        luceneQueryTime = System.nanoTime() - luceneQueryTime;
        totalLuceneQueryTime.addAndGet(luceneQueryTime);
        return synHits;
    }

    @Override
    public List<SynHit> getCandidates(GeneMention geneMention, String organism, QueryGenerator queryGenerator) {
        return this.getCandidates(geneMention, organism != null ? Arrays.asList(organism) : Collections.emptyList(), queryGenerator);
    }

    @Override
    public List<SynHit> getCandidates(String geneMentionText, String organism, QueryGenerator queryGenerator) {
        return this.getCandidates(new GeneMention(geneMentionText, this.normalizer), organism != null ? Arrays.asList(organism) : Collections.emptyList(), queryGenerator);
    }

    @Override
    public List<SynHit> getCandidates(String geneMentionText, Collection<String> organism, QueryGenerator queryGenerator) {
        return this.getCandidates(new GeneMention(geneMentionText, this.normalizer), organism, queryGenerator);
    }

    @Override
    public String mapGeneIdToTaxId(String geneId) {
        Optional gene;
        Set<GeneRecordHit> geneRecords = this.getGeneRecords(List.of(geneId));
        if (geneRecords != null && (gene = geneRecords.stream().findAny()).isPresent()) {
            return ((GeneRecordHit)gene.get()).getTaxId();
        }
        log.warn("GeneID: " + geneId + " was not found in the index.");
        return "";
    }

    public List<SynHit> getIndexRecords(Collection<String> ids) {
        IndexSearcher indexSearcher = this.getGeneRecordIndexSearcher();
        return this.getIndexRecords(ids, indexSearcher);
    }

    @Override
    public List<SynHit> getOriginalNamesIndexRecords(Collection<String> ids) {
        return this.getOriginalNamesIndexRecords(ids, null);
    }

    @Override
    public List<SynHit> getOriginalNamesIndexRecords(Collection<String> ids, GeneName geneName) {
        IndexSearcher indexSearcher = this.getGeneRecordOriginalNamesIndexSearcher();
        return this.getIndexRecords(ids, geneName, GeneName::getText, indexSearcher);
    }

    public List<SynHit> getIndexRecords(Collection<String> ids, IndexSearcher indexSearcher) {
        return this.getIndexRecords(ids, null, GeneName::getNormalizedText, indexSearcher);
    }

    public List<SynHit> getIndexRecords(Collection<String> ids, GeneName geneName, Function<GeneName, String> geneNameFunc, IndexSearcher indexSearcher) {
        try {
            ArrayList<SynHit> entries = new ArrayList<SynHit>(ids.size());
            for (String id : ids) {
                BooleanClause clause = new BooleanClause(new TermQuery(new Term("entry_id", id)), BooleanClause.Occur.FILTER);
                BooleanQuery query = new BooleanQuery.Builder().add(clause).build();
                TopDocs result = indexSearcher.search((Query)query, 1);
                if (result.totalHits.value <= 0L) continue;
                int docID = result.scoreDocs[0].doc;
                Document d = indexSearcher.doc(docID);
                GeneRecordHit m3 = this.getRecordHit(true, geneName != null ? geneName : new GeneName("<retrieved by id>", this.normalizer), geneNameFunc, result.scoreDocs[0], d);
                m3.setLuceneScore(1.0f);
                entries.add(m3);
            }
            if (geneName != null) {
                entries.sort(GeneRecordHit.getNormalizedExactMatchThenLuceneScoreComparator(geneNameFunc.apply(geneName), false));
            }
            if (UNIT_TEST_GENE_ID_ACCUMULATION_SET != null) {
                entries.forEach(e -> UNIT_TEST_GENE_ID_ACCUMULATION_SET.add(e.getId()));
            }
            return entries;
        }
        catch (IOException e2) {
            throw new GeneExpRuntimeException(e2);
        }
    }

    @Override
    public List<SynHit> scoreIdsByBoWSynonyms(Collection<String> allSynonyms, Set<String> ids, QueryGenerator qg) {
        CandidateCacheKey cacheKey = new CandidateCacheKey(new GeneName(String.join((CharSequence)" ", allSynonyms), this.normalizer), null);
        cacheKey.setGeneIdsFilter(new HashSet<String>(ids));
        cacheKey.setQueryGenerator(qg);
        cacheKey.setMaxHits(1000);
        cacheKey.setLoadSynHitFields(false);
        return this.getCandidatesFromIndex(cacheKey);
    }

    @Override
    public Pair<Map<String, Double>, Map<String, Set<String>>> scoreSynonymsRecordIndex(String queryType, Map<String, Collection<GeneName>> ids2entities, Function<GeneRecordHit, String[]> synhit2namesFunc, QueryGenerator qg) {
        HashMap<String, Double> scores = new HashMap<String, Double>();
        HashMap<String, Set> ids2synonyms = new HashMap<String, Set>();
        Map ids2geneNameTokens = ids2entities.keySet().stream().collect(Collectors.toMap(Function.identity(), id -> ((Collection)ids2entities.get(id)).stream().flatMap(gn -> Stream.of(gn.getNormalizedText().split("\\s+"))).collect(Collectors.toSet())));
        Map ids2geneNames = ids2entities.keySet().stream().collect(Collectors.toMap(Function.identity(), id -> ((Collection)ids2entities.get(id)).stream().flatMap(gn -> Stream.of(gn.getNormalizedText())).collect(Collectors.toSet())));
        Iterator namesIt = ids2entities.values().stream().flatMap(Collection::stream).iterator();
        while (namesIt.hasNext()) {
            GeneName synonym = (GeneName)namesIt.next();
            CandidateCacheKey cacheKey = new CandidateCacheKey(synonym, null);
            cacheKey.setGeneIdsFilter(new HashSet<String>(ids2entities.keySet()));
            cacheKey.setQueryGenerator(qg);
            cacheKey.setMaxHits(1000);
            cacheKey.setLoadSynHitFields(synhit2namesFunc != null);
            String geneName = cacheKey.getGeneName().getNormalizedText();
            BiFunction<String, Set, Boolean> namesHaveTokenOverlap = (name, tokens) -> Stream.of(name.split("\\s+")).anyMatch(tokens::contains);
            for (SynHit sh : this.getCandidatesFromIndex(cacheKey)) {
                String hitId = sh.getId();
                scores.merge(hitId, Double.valueOf(sh.getLuceneScore()), (s1, s2) -> s1 + s2);
                if (synhit2namesFunc == null) continue;
                Set names = ids2synonyms.compute(hitId, (k, v) -> v != null ? v : new HashSet());
                String[] newnames = synhit2namesFunc.apply((GeneRecordHit)sh);
                if (newnames == null) continue;
                for (String name2 : newnames) {
                    boolean equalsName = geneName.equals(name2);
                    if (name2 == null || (!queryType.equals("exact") || !equalsName) && (!queryType.equals("apprx") || ids2geneNames.get(hitId).contains(name2) || !namesHaveTokenOverlap.apply(name2, ids2geneNameTokens.get(hitId)).booleanValue())) continue;
                    names.add(name2);
                }
            }
        }
        if (queryType.equals("apprx")) {
            for (String id2 : ids2entities.keySet()) {
                Set synonyms4id = (Set)ids2synonyms.get(id2);
                if (synonyms4id == null || !synonyms4id.isEmpty()) continue;
                ids2synonyms.remove(id2);
                scores.remove(id2);
            }
        }
        return new ImmutablePair<Map<String, Double>, Map<String, Set<String>>>(scores, ids2synonyms);
    }

    @Override
    public List<SynHit> getCandidates(GeneMention gm, Collection<String> taxId, Parameters parameters, QueryGenerator queryGenerator) {
        return this.getCandidates(gm, Collections.emptyList(), taxId, true, parameters, queryGenerator);
    }

    private Map<String, Float> getFieldWeightsFromParameters(Map<String, Object> parameterMap) {
        if (parameterMap == null) {
            parameterMap = Collections.emptyMap();
        }
        HashMap<String, Float> fieldWeights = new HashMap<String, Float>();
        Object tieBreaker = parameterMap.get(Configuration.dot("candidate_retrieval", "dismax_tie_breaker"));
        tieBreaker = tieBreaker == null ? this.globalFieldWeights.getOrDefault("dismax_tie_breaker", Float.valueOf(0.3f)) : Float.valueOf(Float.parseFloat((String)tieBreaker));
        fieldWeights.put("dismax_tie_breaker", (Float)tieBreaker);
        for (String field : GeneRecordQueryGenerator.ALL_FIELDS) {
            Float defaultValue = this.globalFieldWeights.getOrDefault(field, Float.valueOf(1.0f));
            String parameterValue = (String)parameterMap.get(Configuration.dot("candidate_retrieval", field));
            float finalValue = parameterValue != null ? Float.parseFloat(parameterValue) : defaultValue.floatValue();
            fieldWeights.put(field, Float.valueOf(finalValue));
        }
        for (String field : GeneRecordQueryGenerator.SYNONYM_FIELDS) {
            String exactFieldName = field + "_exact";
            Float defaultValue = this.globalFieldWeights.getOrDefault(exactFieldName, Float.valueOf(1.0f));
            String parameterValue = (String)parameterMap.get(Configuration.dot("candidate_retrieval", exactFieldName));
            float finalValue = parameterValue != null ? Float.parseFloat(parameterValue) : defaultValue.floatValue();
            fieldWeights.put(exactFieldName, Float.valueOf(finalValue));
        }
        return fieldWeights;
    }

    @Override
    public void close() {
        try {
            this.geneRecordIndexSearcher.getIndexReader().close();
            if (this.geneRecordOriginalNamesIndexSearcher != null && this.geneRecordOriginalNamesIndexSearcher.getIndexReader() != null) {
                this.geneRecordOriginalNamesIndexSearcher.getIndexReader().close();
            }
            this.geneRecordOriginalNamesIndexSearcher = null;
        }
        catch (IOException e) {
            throw new GeneExpRuntimeException(e);
        }
    }

    @Override
    public List<SynHit> getFamilyNames(GeneMention gm, QueryGenerator queryGenerator) {
        CandidateCacheKey cacheKey = new CandidateCacheKey(gm.getGeneName());
        cacheKey.setLoadSynHitFields(true);
        cacheKey.setTermFilter("entity_type", GeneMention.SpecificType.FAMILYNAME.name());
        cacheKey.setQueryGenerator(queryGenerator);
        return this.getCandidatesFromIndex(cacheKey);
    }

    private List<SynHit> getCandidatesFromIndex(CandidateCacheKey key) {
        List<SynHit> synHits;
        long gettime = System.nanoTime();
        List<SynHit> list = synHits = this.useLuceneCandidateCache != false ? this.candidateCache.get(key) : null;
        if (synHits == null) {
            cacheMisses.addAndGet(1L);
            long puttime = System.nanoTime();
            try {
                synHits = this.getCandidatesFromIndexWithoutCache(key);
            }
            catch (IOException e) {
                throw new GeneExpRuntimeException(e);
            }
            gettime = System.nanoTime() - gettime;
            totalCacheGettime.addAndGet(gettime);
            if (this.useLuceneCandidateCache.booleanValue()) {
                this.candidateCache.put(key, synHits);
            }
            puttime = System.nanoTime() - puttime;
            totalCachePuttime.addAndGet(puttime);
        } else {
            cacheHits.addAndGet(1L);
        }
        return synHits.stream().map(SynHit::clone).collect(Collectors.toList());
    }

    private IndexSearcher getGeneRecordOriginalNamesIndexSearcher() {
        return this.geneRecordOriginalNamesIndexSearcher;
    }

    private List<SynHit> getCandidatesFromRecordIndex(CandidateCacheKey key) throws IOException {
        try {
            ArrayList<SynHit> ret = new ArrayList<SynHit>();
            List<String> allGeneFilterIds = key.getGeneIdsFilter() instanceof List ? (List)key.getGeneIdsFilter() : new ArrayList<String>(key.getGeneIdsFilter());
            int batchNum = -1;
            int maxClauseCount = BooleanQuery.getMaxClauseCount();
            do {
                List<String> currentBatch = allGeneFilterIds.size() <= maxClauseCount ? allGeneFilterIds : allGeneFilterIds.subList(++batchNum * maxClauseCount, Math.min(batchNum * maxClauseCount + maxClauseCount, allGeneFilterIds.size()));
                key.setGeneIdsFilter(currentBatch);
                Query query = key.generateQuery();
                TopScoreDocCollector resultsCollector = TopScoreDocCollector.create(key.getMaxHits(), key.getMaxHits());
                IndexSearcher indexSearcher = this.getGeneRecordIndexSearcher();
                indexSearcher.search(query, resultsCollector);
                TopDocs topDocs = resultsCollector.topDocs();
                boolean loadSynHitFields = key.isLoadSynHitFields();
                for (ScoreDoc doc : topDocs.scoreDocs) {
                    Document document = indexSearcher.doc(doc.doc);
                    GeneRecordHit sh = this.getRecordHit(loadSynHitFields, key.getGeneName(), GeneName::getNormalizedText, doc, document);
                    ret.add(sh);
                }
            } while (batchNum * maxClauseCount + maxClauseCount < allGeneFilterIds.size());
            Collections.sort(ret, GeneRecordHit.getNormalizedExactMatchThenLuceneScoreComparator(key.getGeneName().getNormalizedText(), true));
            return ret;
        }
        catch (BooleanQuery.TooManyClauses e) {
            log.warn("Got too many clauses exception from gene name \"{}\". Assuming that this is a tagging error and not returning any candidates.", (Object)key.getGeneName().getText());
            throw new IllegalArgumentException(e);
        }
    }

    private GeneRecordHit getRecordHit(boolean loadSynHitFields, GeneName geneName, Function<GeneName, String> geneNameFunc, ScoreDoc doc, Document document) {
        IndexableField symbolField;
        String id = document.getField("entry_id").stringValue();
        String taxId = null;
        if (loadSynHitFields) {
            taxId = document.getField("tax_id").stringValue();
        }
        String symbol2 = (symbolField = document.getField("symbol")) != null ? symbolField.stringValue() : "";
        GeneRecordHit sh = new GeneRecordHit(symbol2, doc.score, id, "<no source specified>");
        sh.setMappedMention(geneName != null ? geneName.getText() : "none");
        sh.setMappedGeneName(geneName);
        sh.setLuceneScore(doc.score);
        if (taxId != null) {
            sh.setTaxIds(Collections.singletonList(taxId));
            sh.setTaxId(taxId);
        }
        if (loadSynHitFields) {
            long time = System.nanoTime();
            sh.setSymbol(symbol2);
            Optional.ofNullable(document.getField("symbol_from_nomenclature")).ifPresent(f -> sh.setNomenclature(f.stringValue()));
            Optional.ofNullable(document.getField("chromosome")).ifPresent(f -> sh.setChromosome(f.stringValue()));
            Optional.ofNullable(document.getField("maplocation")).ifPresent(f -> sh.setMapLocation(f.stringValue()));
            sh.setSynonyms((String[])Arrays.stream(document.getFields("synonyms")).map(IndexableField::stringValue).toArray(String[]::new));
            sh.setFullNames((String[])Arrays.stream(document.getFields("full_names")).map(IndexableField::stringValue).toArray(String[]::new));
            sh.setOtherDesignations((String[])Arrays.stream(document.getFields("other_designations")).map(IndexableField::stringValue).toArray(String[]::new));
            sh.setXrefs((String[])Arrays.stream(document.getFields("xrefs")).map(IndexableField::stringValue).toArray(String[]::new));
            sh.setUniprotNames((String[])Arrays.stream(document.getFields("uniprot_names")).map(IndexableField::stringValue).toArray(String[]::new));
            sh.setBioThesaurusNames((String[])Arrays.stream(document.getFields("bio_thesaurus")).map(IndexableField::stringValue).toArray(String[]::new));
            Optional.ofNullable(document.getField("ecnumber")).ifPresent(f -> sh.setEcNumber(f.stringValue()));
            if (geneName != null) {
                HashMap<String, ImmutablePair<String, Double>> jaroWinklerScores = new HashMap<String, ImmutablePair<String, Double>>();
                HashMap synonymScores = new HashMap();
                HashMap recordScores = new HashMap();
                JaroWinklerScorer jaroWinklerScorer = new JaroWinklerScorer();
                HashSet<String> names = new HashSet<String>();
                String name = geneNameFunc.apply(geneName);
                names.add(name);
                geneName.getAlternatives().stream().map(geneNameFunc).forEach(names::add);
                Optional<ImmutablePair> exactMatch = Stream.concat(Arrays.stream(GeneRecordQueryGenerator.SYNONYM_FIELDS), Stream.of("ecnumber")).map(document::getFields).flatMap(Arrays::stream).map(f -> new ImmutablePair<String, String>(f.name(), f.stringValue())).filter(p -> names.contains(p.getRight())).findAny();
                if (exactMatch.isPresent()) {
                    sh.setExactMatch(true);
                    sh.setSynonym((String)exactMatch.get().getRight());
                    sh.setSynonymField((String)exactMatch.get().getLeft());
                    sh.setSynonymSimilarityScore(1.0);
                } else {
                    for (String synonymField : () -> Stream.concat(Arrays.stream(GeneRecordQueryGenerator.SYNONYM_FIELDS), Stream.of("ecnumber")).iterator()) {
                        Optional<String> anyExactMatch = Arrays.stream(document.getFields(synonymField)).map(IndexableField::stringValue).filter(names::contains).findAny();
                        if (anyExactMatch.isPresent()) {
                            sh.setExactMatch(true);
                            sh.setSynonym(anyExactMatch.get());
                            sh.setSynonymField(synonymField);
                            break;
                        }
                        for (IndexableField f2 : document.getFields(synonymField)) {
                            String s2 = f2.stringValue();
                            double score = ((Scorer)jaroWinklerScorer).getScore(s2, name);
                            jaroWinklerScores.put(s2, new ImmutablePair<String, Double>(synonymField, score));
                        }
                    }
                    if (!sh.isExactMatch()) {
                        double bestApproxScore = 0.0;
                        String bestApproxMatch = null;
                        String bestMatchField = null;
                        double bestSynonymApproxScore = 0.0;
                        Object bestSynonymApproxMatch = null;
                        Object bestSynonymMatchField = null;
                        double bestRecordApproxScore = 0.0;
                        Object bestRecordApproxMatch = null;
                        Object bestRecordMatchField = null;
                        for (String synonym : jaroWinklerScores.keySet()) {
                            Pair pair = (Pair)jaroWinklerScores.get(synonym);
                            double score = (Double)pair.getRight();
                            if (!(score > bestApproxScore)) continue;
                            bestApproxScore = score;
                            bestApproxMatch = synonym;
                            bestMatchField = (String)pair.getLeft();
                        }
                        sh.setSynonym(bestApproxMatch);
                        sh.setSynonymField(bestMatchField);
                        sh.setSynonymSimilarityScore(bestApproxScore);
                    }
                }
            }
            totalGeneRecordFieldLoadingTime.addAndGet(System.nanoTime() - time);
        }
        if (sh.getSynonym() == null) {
            sh.setSynonym(symbol2);
        }
        return sh;
    }

    @Override
    public void setFulltextFieldsToRecordHits(Collection<? extends SynHit> recordHits, Collection<String> fieldsToLoad) {
        HashMap<ImmutablePair<String, String>, GeneRecordHit> id2hit = new HashMap<ImmutablePair<String, String>, GeneRecordHit>();
        for (SynHit synHit : recordHits) {
            GeneRecordHit grh = (GeneRecordHit)synHit;
            for (String fieldName : fieldsToLoad) {
                ImmutablePair<String, String> cacheKey = new ImmutablePair<String, String>(synHit.getId(), fieldName);
                String[] fieldValues = this.geneRecordFieldCache.get(cacheKey);
                if (fieldValues != null) {
                    fullTextFieldSetter.get(fieldName).accept(grh, fieldValues);
                    continue;
                }
                id2hit.put(new ImmutablePair<String, String>(synHit.getId(), fieldName), grh);
            }
        }
        try {
            if (!id2hit.isEmpty()) {
                IndexSearcher indexSearcher = this.getGeneRecordIndexSearcher();
                BooleanQuery.Builder builder = new BooleanQuery.Builder();
                builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
                BooleanQuery.Builder filterBuilder = new BooleanQuery.Builder();
                id2hit.keySet().stream().map(Pair::getLeft).forEach(id -> filterBuilder.add(new TermQuery(new Term("entry_id", (String)id)), BooleanClause.Occur.SHOULD));
                builder.add(filterBuilder.build(), BooleanClause.Occur.FILTER);
                TopDocs topdocs = indexSearcher.search((Query)builder.build(), recordHits.size());
                for (ScoreDoc sd : topdocs.scoreDocs) {
                    Document document = indexSearcher.doc(sd.doc);
                    String id2 = document.getField("entry_id").stringValue();
                    for (String fieldName : fieldsToLoad) {
                        ImmutablePair<String, String> cacheKey = new ImmutablePair<String, String>(id2, fieldName);
                        GeneRecordHit sh = (GeneRecordHit)id2hit.get(cacheKey);
                        if (sh == null) continue;
                        String[] fieldValues = (String[])Arrays.stream(document.getFields(fieldName)).map(IndexableField::stringValue).toArray(String[]::new);
                        fullTextFieldSetter.get(fieldName).accept(sh, fieldValues);
                        this.geneRecordFieldCache.put(cacheKey, fieldValues);
                    }
                }
            }
        }
        catch (IOException e) {
            throw new GeneExpRuntimeException(e);
        }
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public Set<GeneRecordHit> getGeneRecords(Collection<String> ids) {
        try {
            HashSet<GeneRecordHit> hits = new HashSet<GeneRecordHit>();
            IndexSearcher indexSearcher = this.getGeneRecordIndexSearcher();
            BooleanQuery.Builder mainBuilder = new BooleanQuery.Builder();
            mainBuilder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST);
            BooleanQuery.Builder filterBuilder = new BooleanQuery.Builder();
            ids.stream().forEach(id -> filterBuilder.add(new TermQuery(new Term("entry_id", (String)id)), BooleanClause.Occur.SHOULD));
            mainBuilder.add(filterBuilder.build(), BooleanClause.Occur.FILTER);
            TopDocs topdocs = indexSearcher.search((Query)mainBuilder.build(), ids.size());
            for (ScoreDoc sd : topdocs.scoreDocs) {
                Document doc = indexSearcher.doc(sd.doc);
                GeneRecordHit recordHit = this.getRecordHit(true, null, x -> "<none>", sd, doc);
                hits.add(recordHit);
            }
            if (UNIT_TEST_GENE_ID_ACCUMULATION_SET != null) {
                Set<String> set = UNIT_TEST_GENE_ID_ACCUMULATION_SET;
                synchronized (set) {
                    hits.stream().map(SynHit::getId).forEach(UNIT_TEST_GENE_ID_ACCUMULATION_SET::add);
                }
            }
            return hits;
        }
        catch (IOException e) {
            throw new GeneExpRuntimeException(e);
        }
    }
}

