/*
 * Decompiled with CFR 0.152.
 */
package de.julielab.genemapper.mappingcores;

import com.google.common.collect.Sets;
import de.julielab.gene.candidateretrieval.BooleanQueryGenerator;
import de.julielab.gene.candidateretrieval.CandidateRetrieval;
import de.julielab.gene.candidateretrieval.GeneRecordHit;
import de.julielab.gene.candidateretrieval.GeneRecordQueryGenerator;
import de.julielab.gene.candidateretrieval.LuceneCandidateRetrieval;
import de.julielab.gene.candidateretrieval.NameCentricRetrieval;
import de.julielab.geneexpbase.CandidateFilter;
import de.julielab.geneexpbase.TermNormalizer;
import de.julielab.geneexpbase.candidateretrieval.QueryGenerator;
import de.julielab.geneexpbase.candidateretrieval.SynHit;
import de.julielab.geneexpbase.configuration.Parameters;
import de.julielab.geneexpbase.genemodel.GeneDocument;
import de.julielab.geneexpbase.genemodel.GeneMention;
import de.julielab.geneexpbase.genemodel.GeneName;
import de.julielab.geneexpbase.genemodel.GeneOrthologs;
import de.julielab.geneexpbase.genemodel.GeneSet;
import de.julielab.geneexpbase.genemodel.MentionMappingResult;
import de.julielab.genemapper.Configuration;
import de.julielab.genemapper.disambig.RRFSynHitListFusion;
import de.julielab.genemapper.disambig.SynHitListFusion;
import de.julielab.genemapper.evaluation.tools.Stats;
import de.julielab.genemapper.mappingcores.DypsisCandidateRanker;
import de.julielab.genemapper.mappingcores.DypsisMappingCore;
import de.julielab.genemapper.utils.GeneMapperException;
import de.julielab.genemapper.utils.GeneMapperInitializationException;
import de.julielab.java.utilities.FileUtilities;
import de.julielab.speciesassignment.services.SpeciesHintSetter;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.BiFunction;
import java.util.function.Function;
import java.util.function.Predicate;
import java.util.function.UnaryOperator;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import java.util.stream.Stream;
import javax.inject.Inject;
import org.apache.commons.lang3.Range;
import org.apache.commons.lang3.tuple.ImmutablePair;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.lucene.search.BooleanClause;
import org.jetbrains.annotations.NotNull;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class DypsisCandidateSetter {
    public static final boolean recordStats = false;
    private static final Logger log = LoggerFactory.getLogger(DypsisCandidateSetter.class);
    private static final BiFunction<List<SynHit>, Collection<String>, Boolean> hasCorrectEntryFunc = (candidates, geneIds) -> candidates.stream().anyMatch(c -> c.getIds().stream().anyMatch(geneIds::contains));
    private static final BiFunction<Map<String, List<SynHit>>, Collection<String>, Boolean> hasCorrectEntryInTax2CandidatesFunc = (tax2candidates, geneIds) -> tax2candidates.values().stream().flatMap(Collection::stream).anyMatch(c -> c.getIds().stream().anyMatch(geneIds::contains));
    private static final BiFunction<List<SynHit>, Collection<String>, Boolean> hasCorrectEntryFuncTop1 = (candidates, geneIds) -> candidates.subList(0, Math.min(1, candidates.size())).stream().anyMatch(c -> c.getIds().stream().anyMatch(geneIds::contains));
    private static final BiFunction<List<SynHit>, Collection<String>, Boolean> hasCorrectEntryFuncTop3 = (candidates, geneIds) -> candidates.subList(0, Math.min(3, candidates.size())).stream().anyMatch(c -> c.getIds().stream().anyMatch(geneIds::contains));
    private static final BiFunction<List<SynHit>, Collection<String>, Boolean> hasCorrectEntryFuncTop5 = (candidates, geneIds) -> candidates.subList(0, Math.min(5, candidates.size())).stream().anyMatch(c -> c.getIds().stream().anyMatch(geneIds::contains));
    private static final BiFunction<List<SynHit>, Collection<String>, Boolean> hasCorrectEntryFuncTop10 = (candidates, geneIds) -> candidates.subList(0, Math.min(10, candidates.size())).stream().anyMatch(c -> c.getIds().stream().anyMatch(geneIds::contains));
    private static final BiFunction<List<SynHit>, Collection<String>, Boolean> hasCorrectEntryFuncTop15 = (candidates, geneIds) -> candidates.subList(0, Math.min(15, candidates.size())).stream().anyMatch(c -> c.getIds().stream().anyMatch(geneIds::contains));
    private static final Function<List<SynHit>, Boolean> hasExactMatchFunc = candidates -> !candidates.isEmpty() && ((SynHit)candidates.get(0)).isExactMatch();
    private static final BiFunction<List<SynHit>, String, Boolean> hasExactCandidate4taxid = (candidates, taxId) -> candidates.stream().anyMatch(sh -> sh.isExactMatch() && sh.getTaxId().equals(taxId));
    private static final Function<List<SynHit>, Boolean> hasMultipleExactMatchFunc = candidates -> candidates.size() > 1 && ((SynHit)candidates.get(1)).isExactMatch();
    private static final BiFunction<List<SynHit>, String, List<SynHit>> getBestSynHitsForTax = (candidates, taxId) -> {
        double bestScore = candidates.isEmpty() ? 0.0 : ((SynHit)candidates.get(0)).getLexicalScore();
        return candidates.stream().filter(sh -> taxId.equals(sh.getTaxId())).filter(sh -> sh.getLexicalScore() == bestScore).collect(Collectors.toList());
    };
    private static final Predicate<List<SynHit>> candidatesFull = candidates -> candidates != null && candidates.size() >= 20;
    private static final Function<Map<String, List<SynHit>>, Boolean> hasFamilyName = tax2candidates -> tax2candidates.values().stream().flatMap(Collection::stream).anyMatch(SynHit::isFamilyName);
    private static final boolean printOrthologyResolution = false;
    private static final boolean printMentionsWithoutCorrectCandidates = false;
    private static final boolean printCertainButWrong = false;
    private final Matcher nonDescM = Pattern.compile(CandidateFilter.NON_DESCRIPTIVE).matcher("");
    private final Matcher aminoAcidsM = Pattern.compile(CandidateFilter.AMINO_ACIDS).matcher("");
    private final Matcher modifierM = Pattern.compile(CandidateFilter.MODIFIER).matcher("");
    private final Matcher wsNormM = Pattern.compile("\\s+").matcher("");
    private final CandidateRetrieval candidateRetrieval;
    private final DypsisCandidateRanker candidateRanker;
    private final GeneOrthologs geneOrthologs;
    private final QueryGenerator queryGenerator;
    private final NameCentricRetrieval nameCentricRetrieval;
    private final CandidateFilter candidateFilter;
    Set<String> known = Set.of("8774743480544", "2115402814857", "11210186161165", "17889706388400", "17277899117123", "10942389611", "10942389357381", "10942389687692", "10942389745750", "1094238910901095", "1094238911651170", "1094238912271232", "8990205646650", "8990205655659", "8428657321327", "8428657899905", "842865712031209", "842865712931299", "8643607538559", "6431896411432", "1072271813491376", "15834508417432", "77903635559", "7790363310313", "77903639751034", "84866311641", "792062846105", "7920628145157", "7920628159177", "7920628348366", "7920628713731", "375867810771082", "10753886116123", "10753886124157", "10753886433440", "10753886531538", "10753886718730", "10753886960967", "1075388611391151", "1075388612301242", "1075388614051416", "1075388614221429", "1075388615121519", "9479038342370", "8643544570594", "9480843669673", "948084312801284", "11001923824839", "10903743128137", "9092545715733", "3202857302326", "856317110401044", "947366910781085", "2739739525", "2365818665676", "1648233374380", "1648233515521");

    @Inject
    public DypsisCandidateSetter(Configuration config, CandidateRetrieval candidateRetrieval, NameCentricRetrieval nameCentricRetrieval, DypsisCandidateRanker candidateRanker, GeneOrthologs geneOrthologs) throws GeneMapperInitializationException {
        this.candidateRetrieval = candidateRetrieval;
        this.queryGenerator = new GeneRecordQueryGenerator();
        this.nameCentricRetrieval = nameCentricRetrieval;
        this.candidateRanker = candidateRanker;
        this.geneOrthologs = geneOrthologs;
        this.loadLexicalCandidateRankingModel(config);
        this.candidateFilter = new CandidateFilter();
    }

    @NotNull
    private static List<SynHit> findRelaxedCandidates(GeneMention gm, Collection<String> taxId, CandidateRetrieval luceneCandidateRetrieval, Parameters parameters, String queryGeneratorType, boolean useDisMax, boolean synonymRelevanceSignal, boolean npRelevanceSignal, boolean alternativeNamesRelevanceSignal, boolean appositionRelevanceSignal, Stats stats, Set<String> goldIds) {
        QueryGenerator disjunction;
        QueryGenerator disjunctionMinus2;
        QueryGenerator disjunctionMinus1;
        if (queryGeneratorType.equals("name_centric")) {
            disjunctionMinus1 = LuceneCandidateRetrieval.DISJUNCTION_MINUS_1;
            disjunctionMinus2 = LuceneCandidateRetrieval.DISJUNCTION_MINUS_2;
            disjunction = LuceneCandidateRetrieval.DISJUNCTION;
        } else if (queryGeneratorType.equals("gene_records")) {
            disjunctionMinus1 = new GeneRecordQueryGenerator(useDisMax, false, true, synonymRelevanceSignal, npRelevanceSignal, alternativeNamesRelevanceSignal, appositionRelevanceSignal, 1);
            disjunctionMinus2 = new GeneRecordQueryGenerator(useDisMax, false, true, synonymRelevanceSignal, npRelevanceSignal, alternativeNamesRelevanceSignal, appositionRelevanceSignal, 2);
            disjunction = new GeneRecordQueryGenerator(useDisMax, false, true, synonymRelevanceSignal, npRelevanceSignal, alternativeNamesRelevanceSignal, appositionRelevanceSignal, Integer.MAX_VALUE);
        } else {
            throw new IllegalArgumentException("Unsupported mention index query generator " + queryGeneratorType);
        }
        ArrayList<SynHit> retCandidates = new ArrayList<SynHit>();
        DypsisMappingCore.timerLog.debug("Relaxed search 1");
        List<SynHit> candidatesRelaxed1 = luceneCandidateRetrieval.getCandidates(gm, taxId, parameters, disjunctionMinus1);
        stats.incNumCandidateSearches();
        retCandidates.addAll(candidatesRelaxed1);
        if (!candidatesFull.test(retCandidates)) {
            DypsisMappingCore.timerLog.debug("Relaxed search 2");
            List<SynHit> candidatesRelaxed2 = luceneCandidateRetrieval.getCandidates(gm, taxId, parameters, disjunctionMinus2);
            stats.incNumCandidateSearches();
            retCandidates.addAll(candidatesRelaxed2);
        }
        if (!candidatesFull.test(retCandidates)) {
            DypsisMappingCore.timerLog.debug("Relaxed search 3");
            List<SynHit> candidatesRelaxed3 = luceneCandidateRetrieval.getCandidates(gm, taxId, parameters, disjunction);
            stats.incNumCandidateSearches();
            retCandidates.addAll(candidatesRelaxed3);
        }
        return retCandidates.stream().distinct().collect(Collectors.toList());
    }

    public void loadLexicalCandidateRankingModel(Configuration config) throws GeneMapperInitializationException {
        try {
            String lexicalRankingModel = config.getProperty(Configuration.dot("candidate_retrieval", "model"), "<none specified>");
            log.info("Loading lexical candidate ranking model from {}", (Object)lexicalRankingModel);
            InputStream resource = FileUtilities.findResource(lexicalRankingModel);
            if (resource != null && !lexicalRankingModel.isBlank()) {
                try {
                    this.candidateRanker.loadModel(resource);
                }
                catch (IOException e) {
                    log.warn("Lexical candidate ranking model could not be loaded from {} due to an IOException. Candidate retrieval will happen with default Lucene settings.", (Object)lexicalRankingModel, (Object)e);
                }
                catch (ClassNotFoundException e) {
                    log.warn("Lexical candidate ranking model could not be loaded from {} because it contains an unexpected class. The model is incompatible with this version of {}. Candidate retrieval will happen with default Lucene settings.", lexicalRankingModel, "GeNo", e);
                }
            } else {
                log.warn("Lexical candidate ranking model {} was not found. Candidate retrieval will happen with default Lucene settings.", (Object)lexicalRankingModel);
            }
        }
        catch (IOException e) {
            throw new GeneMapperInitializationException(e);
        }
    }

    public void save(File destination, Parameters parameters) {
        this.candidateRanker.saveModel(destination, parameters);
    }

    public void load(File source) throws IOException, ClassNotFoundException {
        this.candidateRanker.loadModel(source);
    }

    private Pair<List<SynHit>, String> getCandidateList(GeneMention gm, String focusTaxId, CandidateRetrieval luceneCandidateRetrieval, Parameters parameters, Stats stats) {
        List<SynHit> synHitsMatchingTax;
        String speciesPrefix;
        QueryGenerator disjunction;
        QueryGenerator conjunction;
        long time = System.nanoTime();
        DypsisMappingCore.timerLog.debug("Entering 'getCandidateList' for gene with text {} and taxonomy ID {}", (Object)gm.getText(), (Object)focusTaxId);
        String queryGeneratorType = "gene_records";
        boolean useDisMax = Optional.ofNullable(parameters.get(Configuration.dot("candidate_retrieval", "gene_records_field_query_type"))).orElse("disjunction").equals("dismax");
        String synonymSearchMode = parameters.getString(Configuration.dot("candidate_retrieval", "synonym_search_mode"));
        String npSearchMode = parameters.getString(Configuration.dot("candidate_retrieval", "search_gm_np_mode"));
        String alternativesSearchMode = parameters.getString(Configuration.dot("candidate_retrieval", "alternative_names_search_mode"));
        boolean synonymRelevanceSignal = synonymSearchMode.equals("cnf_relevance_signal") || synonymSearchMode.equals("all");
        boolean npRelevanceSignal = npSearchMode.equals("cnf_relevance_signal") || npSearchMode.equals("all");
        boolean alternativeNamesRelevanceSignal = alternativesSearchMode.equals("cnf_relevance_signal") || alternativesSearchMode.equals("all");
        boolean appositionRelevanceSignal = parameters.getBoolean(Configuration.dot("candidate_retrieval", "use_appositions_as_lucene_relevance_signal"));
        boolean contextGenesRelevanceSignal = parameters.getBoolean(Configuration.dot("candidate_retrieval", "add_context_genes_as_lucene_relevance_signal"));
        if (queryGeneratorType.equals("name_centric")) {
            conjunction = LuceneCandidateRetrieval.CONJUNCTION;
            disjunction = LuceneCandidateRetrieval.DISJUNCTION;
        } else if (queryGeneratorType.equals("gene_records")) {
            conjunction = disjunction = new GeneRecordQueryGenerator(useDisMax, false, true, synonymRelevanceSignal, npRelevanceSignal, alternativeNamesRelevanceSignal, appositionRelevanceSignal, contextGenesRelevanceSignal);
        } else {
            throw new IllegalArgumentException("Unsupported mention index query generator " + queryGeneratorType);
        }
        if (parameters.getBoolean(Configuration.dot("candidate_retrieval", "strip_species_prefix_for_alternative"), false) && (speciesPrefix = SpeciesHintSetter.getSpeciesPrefix(gm)) != null) {
            Collection<String> taxIdsForPrefix = SpeciesHintSetter.getTaxIdsForPrefix(speciesPrefix);
            if (taxIdsForPrefix.stream().anyMatch(gm.getTaxonomyIdsSet()::contains)) {
                gm.getGeneName().addAlternative(new GeneName(gm.getText().substring(speciesPrefix.length()), gm.getNormalizer()));
            }
        }
        DypsisMappingCore.timerLog.debug("Initial candidate retrieval");
        List<SynHit> candidates = parameters.getBoolean(Configuration.dot("candidate_retrieval", "search_tax_specific_candidates_first")) ? luceneCandidateRetrieval.getCandidates(gm, Collections.emptyList(), List.of(focusTaxId), true, parameters, conjunction) : luceneCandidateRetrieval.getCandidates(gm, Collections.emptyList(), Collections.emptyList(), true, parameters, conjunction);
        stats.incNumCandidateSearches();
        String effectiveTaxId = focusTaxId;
        if (!parameters.getBoolean(Configuration.dot("candidate_retrieval", "stop_candidate_search_on_exact_match")) || !hasExactCandidate4taxid.apply(candidates, focusTaxId).booleanValue()) {
            Object chunkNP;
            boolean performOrthologsResolution;
            String normalizedText;
            String nameWoNonDesc;
            RRFSynHitListFusion fusion = new RRFSynHitListFusion();
            ArrayList<List<SynHit>> candidateLists = new ArrayList<List<SynHit>>();
            HashSet<String> goldIds = gm.getGeneDocument().isGoldHasOffsets() ? new HashSet<String>(gm.getAllGoldIdsAsList()) : new HashSet<String>(gm.getGeneDocument().getGoldIds());
            HashSet<String> goldTaxIds = new HashSet<String>(gm.getAllGoldTaxonomyIdsAsList());
            candidateLists.add(candidates);
            Boolean hasExactMatchBeforeSpeciesRestriction = hasExactMatchFunc.apply(candidates);
            if (!hasExactMatchBeforeSpeciesRestriction.booleanValue() && gm.getText().contains("-")) {
                GeneMention g2 = new GeneMention(gm);
                g2.setText(g2.getText().replaceAll("-", ""));
                DypsisMappingCore.timerLog.debug("Candidate retrieval with removed '-'");
                candidateLists.add(luceneCandidateRetrieval.getCandidates(g2, Collections.emptyList(), Collections.emptyList(), true, parameters, conjunction));
                stats.incNumCandidateSearches();
            }
            if (!hasExactMatchFunc.apply(candidates).booleanValue() && !(nameWoNonDesc = TermNormalizer.removeNondescriptives(normalizedText = gm.getNormalizedText())).equals(normalizedText)) {
                GeneMention g3 = new GeneMention(gm);
                g3.setText(nameWoNonDesc);
                DypsisMappingCore.timerLog.debug("Candidate retrieval without non-descriptives");
                List<SynHit> candidatesWoNonDesc = luceneCandidateRetrieval.getCandidates(g3, Collections.emptyList(), Collections.emptyList(), true, parameters, conjunction);
                candidateLists.add(candidatesWoNonDesc);
                stats.incNumCandidateSearches();
            }
            if (!candidatesFull.test(candidates)) {
                DypsisMappingCore.timerLog.debug("Relaxed candidate retrieval because we didn't find anything yet");
                candidates = DypsisCandidateSetter.findRelaxedCandidates(gm, null, luceneCandidateRetrieval, parameters, queryGeneratorType, useDisMax, synonymRelevanceSignal, npRelevanceSignal, alternativeNamesRelevanceSignal, appositionRelevanceSignal, stats, goldIds);
                candidateLists.add(candidates);
            }
            synHitsMatchingTax = fusion.fuse(candidateLists).stream().filter(s2 -> s2.getTaxIds().contains(focusTaxId)).collect(Collectors.toList());
            synHitsMatchingTax.sort(Comparator.comparing(SynHit::isExactMatch).thenComparing(SynHit::getLexicalScore).reversed());
            Boolean hasExactMatchAfterSpeciesRestriction = hasExactMatchFunc.apply(synHitsMatchingTax);
            stats.incNumExactMatchAfterSpeciesRestrictionIfTrue(hasExactMatchAfterSpeciesRestriction);
            stats.incNumCorrectGenesInCandidateListAfterSpeciesRestrictionIfTrue(hasCorrectEntryFunc.apply(synHitsMatchingTax, goldIds));
            boolean bl = performOrthologsResolution = parameters.getBoolean(Configuration.dot("candidate_retrieval", "try_orthologs")) && hasExactMatchBeforeSpeciesRestriction != false && hasExactMatchAfterSpeciesRestriction == false && focusTaxId != null;
            if (performOrthologsResolution) {
                List exactMatchesAllCandidates = candidates.stream().filter(h2 -> h2.isExactMatch()).collect(Collectors.toList());
                Set<String> orthologsInSoughtTaxId = exactMatchesAllCandidates.stream().map(SynHit::getIds).flatMap(Collection::stream).flatMap(geneId -> this.geneOrthologs.getOrthologsInSpecies((String)geneId, focusTaxId).stream()).map(r -> r.getGeneIdForTaxId(focusTaxId)).collect(Collectors.toSet());
                if (!orthologsInSoughtTaxId.isEmpty()) {
                    DypsisMappingCore.timerLog.debug("Retrieving candidate orthologs");
                    List<SynHit> candidates1 = luceneCandidateRetrieval.getCandidates(gm, orthologsInSoughtTaxId, Collections.singleton(focusTaxId), true, parameters, disjunction);
                    stats.incNumCandidateSearches();
                    Boolean hasCorrectEntryInOrthologs = false;
                    if (!candidates1.isEmpty()) {
                        candidateLists.add(candidates1);
                    }
                }
            } else if (focusTaxId != null) {
                DypsisMappingCore.timerLog.debug("Retrieving taxonomy-focused candidates");
                List<SynHit> candidatesForFocusTax = luceneCandidateRetrieval.getCandidates(gm, Collections.emptyList(), Collections.singleton(focusTaxId), true, parameters, disjunction);
                stats.incNumCandidateSearches();
                if (candidatesForFocusTax.isEmpty()) {
                    DypsisMappingCore.timerLog.debug("Retrieving taxonomy-focused candidates with flat disjunction because we didn't get hits before");
                    candidatesForFocusTax = luceneCandidateRetrieval.getCandidates(gm, Collections.emptyList(), Collections.singleton(focusTaxId), true, parameters, LuceneCandidateRetrieval.GENE_RECORDS_FLAT_DISJUNCTION);
                    stats.incNumCandidateSearches();
                    candidateLists.add(candidatesForFocusTax);
                }
                DypsisMappingCore.timerLog.debug("Adding relaxed hits for focused taxId");
                candidatesForFocusTax.addAll(DypsisCandidateSetter.findRelaxedCandidates(gm, Set.of(focusTaxId), luceneCandidateRetrieval, parameters, queryGeneratorType, useDisMax, synonymRelevanceSignal, npRelevanceSignal, alternativeNamesRelevanceSignal, appositionRelevanceSignal, stats, goldIds));
                candidatesForFocusTax = candidatesForFocusTax.stream().distinct().collect(Collectors.toList());
                candidateLists.add(candidatesForFocusTax);
            }
            synHitsMatchingTax = this.mergeCandidateLists(gm, candidateLists, focusTaxId, fusion, parameters);
            if (synHitsMatchingTax.isEmpty() && !candidates.isEmpty() && candidates.get(0).isExactMatch()) {
                SynHit exactCandidate = candidates.get(0);
                if (gm.getTaxonomyScores() != null) {
                    Iterator speciesCandidatesIt = gm.getTaxonomyScores().entrySet().stream().sorted(Comparator.comparingDouble(Map.Entry::getValue).reversed()).map(Map.Entry::getKey).iterator();
                    while (speciesCandidatesIt.hasNext()) {
                        String taxCandidate = (String)speciesCandidatesIt.next();
                        if (!exactCandidate.getTaxIds().contains(taxCandidate)) continue;
                        effectiveTaxId = taxCandidate;
                        synHitsMatchingTax = candidates.stream().filter(s2 -> s2.getTaxIds().contains(taxCandidate)).collect(Collectors.toList());
                        break;
                    }
                }
            }
            if (synonymSearchMode.equals("additional_search") || synonymSearchMode.equals("all")) {
                DypsisMappingCore.timerLog.debug("Add candidates searched by exact synonyms");
                List<SynHit> candidatesBySynonymExact = this.candidateRetrieval.getCandidates(gm, Set.of(effectiveTaxId), LuceneCandidateRetrieval.GENE_RECORDS_SYNONYMS_EXACT);
                candidateLists.add(candidatesBySynonymExact);
                if (candidatesBySynonymExact.isEmpty()) {
                    DypsisMappingCore.timerLog.debug("Add candidates searched by approx synonyms");
                    List<SynHit> candidatesBySynonymApprx = this.candidateRetrieval.getCandidates(gm, Set.of(effectiveTaxId), LuceneCandidateRetrieval.GENE_RECORDS_SYNONYMS_APPROX);
                    candidateLists.add(candidatesBySynonymApprx);
                }
            }
            if ((npSearchMode.equals("additional_search") || npSearchMode.equals("all")) && ((Optional)(chunkNP = gm.getGeneDocument().getOverlappingChunks(gm.getOffsets(), "ChunkNP").stream().findAny())).isPresent() && !((Range)((Map.Entry)((Optional)chunkNP).get()).getKey()).equals(gm.getOffsets())) {
                Range offsets = (Range)((Map.Entry)((Optional)chunkNP).get()).getKey();
                String chunkText = gm.getGeneDocument().getCoveredText(offsets);
                GeneMention chunkGm = new GeneMention(chunkText);
                chunkGm.setOffsets(offsets);
                chunkGm.setNormalizer(gm.getNormalizer());
                DypsisMappingCore.timerLog.debug("Add candidates searched by their chunk");
                List<SynHit> candidatesBySynonymApprx = this.candidateRetrieval.getCandidates(chunkGm, Set.of(effectiveTaxId), LuceneCandidateRetrieval.GENE_RECORDS_FLAT_DISJUNCTION);
                candidateLists.add(candidatesBySynonymApprx);
            }
            if (alternativesSearchMode.equals("additional_search") || alternativesSearchMode.equals("all")) {
                for (GeneName alternative : gm.getGeneName().getAlternatives()) {
                    DypsisMappingCore.timerLog.debug("Add candidates searched by alternative names exact");
                    List<SynHit> candidatesBySynonymExact = this.candidateRetrieval.getCandidates(alternative.getNormalizedText(), Set.of(effectiveTaxId), LuceneCandidateRetrieval.GENE_RECORDS_SYNONYMS_EXACT);
                    candidateLists.add(candidatesBySynonymExact);
                    if (!candidatesBySynonymExact.isEmpty()) continue;
                    DypsisMappingCore.timerLog.debug("Add candidates searched by alternative names approx");
                    List<SynHit> candidatesBySynonymApprx = this.candidateRetrieval.getCandidates(alternative.getNormalizedText(), Set.of(effectiveTaxId), LuceneCandidateRetrieval.GENE_RECORDS_SYNONYMS_APPROX);
                    candidateLists.add(candidatesBySynonymApprx);
                }
            }
            if (synHitsMatchingTax.isEmpty()) {
                DypsisMappingCore.timerLog.debug("Find relaxed candidates for focus taxId because we didn't find anything yet");
                List<SynHit> relaxedCandidates4FocusTax = DypsisCandidateSetter.findRelaxedCandidates(gm, Collections.singleton(effectiveTaxId), luceneCandidateRetrieval, parameters, queryGeneratorType, useDisMax, synonymRelevanceSignal, npRelevanceSignal, alternativeNamesRelevanceSignal, appositionRelevanceSignal, stats, goldIds);
                synHitsMatchingTax = relaxedCandidates4FocusTax;
            } else {
                synHitsMatchingTax = this.mergeCandidateLists(gm, candidateLists, focusTaxId, fusion, parameters);
                synHitsMatchingTax.sort(Comparator.comparing(SynHit::isExactMatch).reversed());
            }
            if (hasMultipleExactMatchFunc.apply(synHitsMatchingTax).booleanValue()) {
                int i;
                for (i = 0; i < synHitsMatchingTax.size() && synHitsMatchingTax.get(i).isExactMatch(); ++i) {
                }
                Stream<SynHit> exatMatches = IntStream.range(0, i).mapToObj(synHitsMatchingTax::get);
                Stream<SynHit> approxMatches = IntStream.range(i, synHitsMatchingTax.size()).mapToObj(synHitsMatchingTax::get);
                synHitsMatchingTax = Stream.concat(exatMatches.sorted(Comparator.comparing(SynHit::isExactMatch).reversed().thenComparingInt(sh -> IntStream.range(0, GeneRecordQueryGenerator.ALL_FIELDS.length).filter(index -> GeneRecordQueryGenerator.ALL_FIELDS[index].equals(((GeneRecordHit)sh).getSynonymField())).findFirst().getAsInt()).reversed().thenComparingDouble(SynHit::getLexicalScore).reversed()), approxMatches).collect(Collectors.toList());
            }
        } else {
            log.trace("Found a candidate with matching tax ID with the first search. Not looking further.");
            synHitsMatchingTax = parameters.getBoolean(Configuration.dot("candidate_retrieval", "search_tax_specific_candidates_first")) ? candidates : candidates.stream().filter(sh -> sh.getTaxId().equals(focusTaxId)).collect(Collectors.toList());
        }
        boolean hasExactMatch = synHitsMatchingTax.stream().anyMatch(SynHit::isExactMatch);
        if (synHitsMatchingTax.stream().map(SynHit::getId).collect(Collectors.toSet()).size() < synHitsMatchingTax.size()) {
            throw new IllegalStateException("There were duplicate SynHits");
        }
        time = System.nanoTime() - time;
        DypsisMappingCore.timerLog.debug("Leaving 'getCandidateList' after {}s", (Object)((double)time / Math.pow(10.0, 9.0)));
        return new ImmutablePair<List<SynHit>, String>(synHitsMatchingTax, effectiveTaxId);
    }

    @NotNull
    private List<SynHit> mergeCandidateLists(GeneMention gm, List<List<SynHit>> candidateLists, String focusTaxId, SynHitListFusion fusion, Parameters parameters) {
        return fusion.fuse(candidateLists).stream().filter(s2 -> s2.getTaxId().equals(focusTaxId)).collect(Collectors.toList());
    }

    public void shutdown() throws GeneMapperException {
        this.candidateRetrieval.close();
    }

    public CandidateRetrieval getCandidateRetrieval() {
        return this.candidateRetrieval;
    }

    public void setCandidates(GeneDocument document, Parameters parameterMap, Stats stats) throws GeneMapperException {
        long time = System.nanoTime();
        DypsisMappingCore.timerLog.debug("Entering 'setCandidates(GeneDocument)' for document with ID {}", (Object)document.getId());
        Parameters parameters = this.candidateRanker.mergeParameters(parameterMap);
        document.expectState(EnumSet.of(GeneDocument.State.GENES_SELECTED, GeneDocument.State.SPECIES_ASSIGNED_TO_GENES));
        boolean mapGenesets = parameters.getString(Configuration.dot("candidate_retrieval", "mapping_mode")).equals("genesets");
        HashSet<GeneSet> mappedGenesets = mapGenesets ? new HashSet<GeneSet>() : null;
        AtomicInteger gmIdCounter = new AtomicInteger();
        int numGenes = 0;
        if (log.isDebugEnabled()) {
            numGenes = (int)document.getGenes().count();
        }
        for (GeneMention gm : () -> document.getGenes().iterator()) {
            if (mapGenesets && mappedGenesets.addAll(gm.getGeneSets())) {
                HashSet<String> alreadySeenNames = new HashSet<String>();
                alreadySeenNames.add(gm.getNormalizedText());
                gm.getGeneName().getAlternatives().forEach(n -> alreadySeenNames.add(n.getNormalizedText()));
                List geneNameStream = gm.getGeneSets().stream().flatMap(Collection::stream).map(GeneMention::getGeneName).collect(Collectors.toList());
                for (GeneName name : geneNameStream) {
                    if (name == gm.getGeneName()) continue;
                    if (alreadySeenNames.add(name.getNormalizedText())) {
                        gm.getGeneName().addAlternative(name);
                    }
                    if (name.getAppositionContexts().isEmpty()) continue;
                    for (GeneName apposition : name.getAppositionContexts()) {
                        gm.getGeneName().addAppositionContext(apposition);
                    }
                }
                this.setCandidates(gm, gmIdCounter, parameters, stats);
            }
            if (!mapGenesets) {
                this.setCandidates(gm, gmIdCounter, parameters, stats);
            }
            log.debug("Candidates set for {}/{} genes.", (Object)gmIdCounter.get(), (Object)numGenes);
        }
        document.addState(GeneDocument.State.SYNONYM_CANDIDATES_ASSIGNED);
        time = System.nanoTime() - time;
        DypsisMappingCore.timerLog.debug("Leaving 'setCandidates(GeneDocument)' for document with ID {} with {} genes ({} unique names) after {}s", document.getId(), document.getGenes().count(), document.getGenes().map(GeneMention::getText).distinct().count(), (double)time / Math.pow(10.0, 9.0));
    }

    public void setCandidates(GeneMention gm, AtomicInteger gmidCounter, Parameters parameterMap, Stats stats) throws GeneMapperException {
        long time = System.nanoTime();
        DypsisMappingCore.timerLog.debug("Entering 'setCandidates' for gene '{}'", (Object)gm.getText());
        Parameters parameters = this.candidateRanker.mergeParameters(parameterMap);
        double minFamScore = parameters.getDouble(Configuration.dot("candidate_retrieval", "min_family_match_score"), 0.0);
        double maxFamScore = parameters.getDouble(Configuration.dot("candidate_retrieval", "max_family_match_score"), 0.0);
        boolean mapGenesets = parameters.getString(Configuration.dot("candidate_retrieval", "mapping_mode")).equals("genesets");
        HashMap<String, List<SynHit>> tax2candidates = new HashMap<String, List<SynHit>>();
        ArrayList candidates = new ArrayList();
        ArrayList<UnaryOperator> taxChanges = new ArrayList<UnaryOperator>();
        for (String tax2 : gm.getTaxonomyIds()) {
            Pair<List<SynHit>, String> candidatesForTax;
            if (!tax2.equals((candidatesForTax = this.getCandidateList(gm, tax2, this.candidateRetrieval, parameters, stats)).getRight())) {
                taxChanges.add(taxInList -> taxInList.equals(tax2) ? (String)candidatesForTax.getRight() : taxInList);
            }
            candidates.addAll(candidatesForTax.getLeft());
            tax2candidates.put(candidatesForTax.getRight(), candidatesForTax.getLeft());
        }
        if (!taxChanges.isEmpty()) {
            gm.setTaxonomyIds(new ArrayList<String>(gm.getTaxonomyIds()));
        }
        taxChanges.forEach(gm.getTaxonomyIds()::replaceAll);
        if (!taxChanges.isEmpty()) {
            log.warn("There are {} taxonomy changes", (Object)taxChanges.size());
        }
        gm.setTaxonomyIds(gm.getTaxonomyIds().stream().distinct().collect(Collectors.toList()));
        if (!candidates.isEmpty()) {
            // empty if block
        }
        Iterator<GeneMention> gmsToRank = mapGenesets ? gm.getGeneSets().stream().flatMap(Collection::stream).filter(Predicate.not(GeneMention::isRejected)).iterator() : List.of(gm).iterator();
        while (gmsToRank.hasNext()) {
            GeneMention gmToRank = gmsToRank.next();
            List newTaxIds = tax2candidates.keySet().stream().filter(Predicate.not(gmToRank.getMentionMappingResult() != null ? gmToRank.getMentionMappingResult().tax2originalCandidates.keySet()::contains : Collections.emptyList()::contains)).collect(Collectors.toList());
            MentionMappingResult mmr = gmToRank.getMentionMappingResult() == null ? new MentionMappingResult(gmToRank) : gmToRank.getMentionMappingResult();
            Map<Object, Object> effectiveTax2candidates = gmToRank.getMentionMappingResult() != null ? gmToRank.getMentionMappingResult().tax2originalCandidates : new HashMap();
            newTaxIds.stream().forEach(tax -> effectiveTax2candidates.put(tax, ((List)tax2candidates.get(tax)).stream().map(SynHit::clone).collect(Collectors.toList())));
            mmr.tax2originalCandidates = effectiveTax2candidates;
            mmr.tax2bestCandidates = new HashMap(gmToRank.getTaxonomyIds().size());
            gmToRank.setMentionMappingResult(mmr);
            this.filterApproximateCandidates(gmToRank, parameters);
            this.handleApproximateMatches(gmToRank, parameterMap);
            DypsisCandidateRanker.setGoldRelevanceScores(gmToRank, mmr);
            if (!gmToRank.isRejected()) {
                this.candidateRanker.rankCandidates(gmToRank, gmidCounter, parameters);
            }
            String matchType = "approxmatch";
            if (parameters.getBoolean(Configuration.dot("candidate_retrieval", "do_rrf_with_candidate_lists")) && (!parameters.getString(Configuration.dot("candidate_retrieval", "ml", matchType, "algorithm")).equals("lucene") || parameterMap.getString(Configuration.dot("candidate_retrieval", "synonym_search_mode")).equals("additional_search"))) {
                if (mmr.tax2lexicallyRerankedCandidates == null) {
                    mmr.tax2lexicallyRerankedCandidates = new HashMap<String, List<SynHit>>();
                }
                for (String tax3 : newTaxIds) {
                    List<SynHit> originalCandidates = mmr.tax2originalCandidates.get(tax3);
                    if (originalCandidates == null) continue;
                    List mlRerankedHits = mmr.tax2lexicallyRerankedCandidates.getOrDefault(tax3, Collections.emptyList());
                    List<SynHit> fusedLists = new RRFSynHitListFusion().fuse(List.of(originalCandidates, mlRerankedHits));
                    mmr.tax2lexicallyRerankedCandidates.put(tax3, fusedLists);
                }
            }
            for (String tax3 : newTaxIds) {
                List<SynHit> rerankedCandidates = mmr.tax2lexicallyRerankedCandidates.get(tax3);
                if (rerankedCandidates == null) continue;
                List<SynHit> bestcandidates4tax = getBestSynHitsForTax.apply(rerankedCandidates, tax3);
                if (!bestcandidates4tax.isEmpty()) {
                    mmr.tax2bestCandidates.put(tax3, bestcandidates4tax);
                    continue;
                }
                mmr.tax2originalCandidates = Map.of(tax3, List.of(MentionMappingResult.REJECTION));
                mmr.setFinalRankedCandidates(tax3, List.of(MentionMappingResult.REJECTION));
                mmr.setRejectReason(tax3, MentionMappingResult.RejectReason.NO_CANDIDATES);
            }
            gmToRank.setMentionMappingResult(mmr);
            if (mapGenesets) {
                for (GeneSet gs : gmToRank.getGeneSets()) {
                    if (gs.getMentionMappingResult() != null) continue;
                    gs.setMentionMappingResult(mmr);
                }
            }
            gmidCounter.incrementAndGet();
        }
        time = System.nanoTime() - time;
        DypsisMappingCore.timerLog.debug("Leaving 'setCandidates' after {}s", (Object)((double)time / Math.pow(10.0, 9.0)));
    }

    private void filterApproximateCandidates(GeneMention gm, Parameters parameters) {
        if (gm.hasExactCandidateMatch()) {
            return;
        }
        String normalizedGeneName = gm.getNormalizedText();
        MentionMappingResult mmr = gm.getMentionMappingResult();
        boolean nonEmptyCandidateListRemains = false;
        assert (mmr != null) : "The mention mapping result is null.";
        assert (mmr.tax2originalCandidates != null) : "The original candidate map is null.";
        for (String taxId : mmr.tax2originalCandidates.keySet()) {
            List<SynHit> synHits = mmr.tax2originalCandidates.get(taxId);
            Iterator<SynHit> shIt = synHits.iterator();
            while (shIt.hasNext()) {
                SynHit sh = shIt.next();
                if (sh.isRejectionCandidate()) continue;
                boolean filterOut = false;
                boolean singletoken = false;
                boolean nooverlap = false;
                boolean contradictingnumber = false;
                boolean contradictinggreek = false;
                boolean filtermethod = false;
                if (parameters.getBoolean(Configuration.dot("rejection", "approx_single_token"), false)) {
                    boolean bl = singletoken = filterOut || normalizedGeneName.split("\\s+").length == 1;
                }
                if (parameters.getBoolean(Configuration.dot("rejection", "no_token_overlap"), false)) {
                    boolean bl = nooverlap = filterOut || CandidateFilter.getCommonWords(normalizedGeneName.split("\\s+"), sh.getSynonym().split("\\s+")).isEmpty();
                }
                if (parameters.getBoolean(Configuration.dot("rejection", "contradicting_number"), false)) {
                    boolean bl = contradictingnumber = filterOut || CandidateFilter.hasContradictingNumber(normalizedGeneName, sh.getSynonym());
                }
                if (parameters.getBoolean(Configuration.dot("rejection", "contradicting_greek"), false)) {
                    boolean bl = contradictinggreek = filterOut || this.candidateFilter.hasContradictingGreek(normalizedGeneName, sh.getSynonym());
                }
                if (parameters.getBoolean(Configuration.dot("rejection", "filter_out_method"), false)) {
                    boolean bl = filtermethod = filterOut || this.candidateFilter.filterOut(normalizedGeneName, sh.getSynonym());
                }
                if (!singletoken && !nooverlap && !contradictingnumber && !contradictinggreek && !filtermethod) continue;
                shIt.remove();
            }
            nonEmptyCandidateListRemains = nonEmptyCandidateListRemains || !synHits.isEmpty();
        }
        if (!nonEmptyCandidateListRemains) {
            gm.reject(MentionMappingResult.RejectReason.TAX_FILTERED_CANDIDATES_EMPTY);
        }
    }

    private void handleApproximateMatches(GeneMention gm, Parameters parameterMap) {
        if (gm.hasOnlyApproximateCandidateMatches()) {
            for (String taxId : gm.getTaxonomyIds()) {
                List<SynHit> nameCentricCandidatesForMissingIds;
                BooleanQueryGenerator queryGenerator = new BooleanQueryGenerator(BooleanClause.Occur.SHOULD, -1);
                List<SynHit> synonymCentricCandidates = this.nameCentricRetrieval.getCandidates(gm, Set.of(taxId), (QueryGenerator)queryGenerator);
                if (synonymCentricCandidates.isEmpty()) continue;
                HashSet<String> idsWithSetSynonyms = new HashSet<String>();
                List<SynHit> geneRecordCandidates = this.candidateRetrieval.getCandidates(gm, synonymCentricCandidates.stream().flatMap(c -> c.getGeneIdsOfTaxId(taxId)).collect(Collectors.toList()), Set.of(taxId), true, parameterMap, LuceneCandidateRetrieval.GENE_RECORDS_FLAT_DISJUNCTION);
                this.sortGeneRecordCandidatesAccordingToNameCentricCandidateList(geneRecordCandidates, synonymCentricCandidates, taxId);
                if (!gm.getMentionMappingResult().tax2originalCandidates.containsKey(taxId)) continue;
                Set alreadyReceivedIds = gm.getMentionMappingResult().tax2originalCandidates.get(taxId).stream().map(SynHit::getId).collect(Collectors.toSet());
                geneRecordCandidates.stream().filter(Predicate.not(c -> alreadyReceivedIds.contains(c.getId()))).forEach(gm.getMentionMappingResult().tax2originalCandidates.get(taxId)::add);
                Map id2sh = gm.getMentionMappingResult().tax2originalCandidates.get(taxId).stream().collect(Collectors.toMap(SynHit::getId, Function.identity()));
                synonymCentricCandidates.stream().forEach(sh -> {
                    List geneIds = sh.getGeneIdsOfTaxId(taxId).filter(id2sh::containsKey).collect(Collectors.toList());
                    for (String geneId : geneIds) {
                        SynHit geneRecordHit = (SynHit)id2sh.get(geneId);
                        geneRecordHit.setSynonym(sh.getSynonym());
                        geneRecordHit.setContextualScore(geneRecordHit.getLexicalScore());
                        geneRecordHit.setLexicalScore(sh.getLexicalScore());
                        if (!geneRecordHit.getSynonym().equals(sh.getSynonym())) {
                            geneRecordHit.setComment("Synonym set from name centric index, action 2a");
                        } else {
                            geneRecordHit.setComment("Best name centric synonym was already set");
                        }
                        idsWithSetSynonyms.add(geneId);
                    }
                });
                Set<String> idsWithoutSetSynonyms = null;
                int loopnum = 0;
                do {
                    assert (gm.getMentionMappingResult() != null) : "The mention mapping result is null.";
                    assert (gm.getMentionMappingResult().tax2originalCandidates != null) : "The original candidates map is null.";
                    Set<String> finalIdsWithoutSetSynonyms = idsWithoutSetSynonyms = gm.getMentionMappingResult().tax2originalCandidates.get(taxId).stream().map(SynHit::getId).filter(Predicate.not(idsWithSetSynonyms::contains)).collect(Collectors.toSet());
                    nameCentricCandidatesForMissingIds = this.nameCentricRetrieval.getCandidates(gm, idsWithoutSetSynonyms, Set.of(), true, null, (int)(1.5 * (double)idsWithoutSetSynonyms.size()), queryGenerator);
                    Map id2sh2 = gm.getMentionMappingResult().tax2originalCandidates.get(taxId).stream().collect(Collectors.toMap(SynHit::getId, Function.identity()));
                    for (SynHit nameCentricSh : nameCentricCandidatesForMissingIds) {
                        for (String nameCentricId : () -> nameCentricSh.getGeneIdsOfTaxId(taxId).filter(finalIdsWithoutSetSynonyms::contains).iterator()) {
                            SynHit geneRecordSh = (SynHit)id2sh2.get(nameCentricId);
                            geneRecordSh.setSynonym(nameCentricSh.getSynonym());
                            geneRecordSh.setContextualScore(geneRecordSh.getLexicalScore());
                            geneRecordSh.setLexicalScore(nameCentricSh.getLexicalScore());
                            if (!geneRecordSh.getSynonym().equals(nameCentricSh.getSynonym())) {
                                geneRecordSh.setComment("Synonym set from name centric index, action 2b");
                            } else {
                                geneRecordSh.setComment("Best name centric synonym was already set");
                            }
                            idsWithSetSynonyms.add(geneRecordSh.getId());
                        }
                    }
                    ++loopnum;
                } while (!idsWithoutSetSynonyms.isEmpty() && !nameCentricCandidatesForMissingIds.isEmpty());
            }
        }
    }

    private void sortGeneRecordCandidatesAccordingToNameCentricCandidateList(List<SynHit> geneRecordCandidates, List<SynHit> synonymCentricCandidates, String taxId) {
        HashMap<String, Double> id2score = new HashMap<String, Double>();
        for (SynHit sh : synonymCentricCandidates) {
            for (String id : () -> sh.getGeneIdsOfTaxId(taxId).iterator()) {
                id2score.merge(id, sh.getLexicalScore(), Math::max);
            }
        }
        Collections.sort(geneRecordCandidates, Comparator.comparingDouble(gsh -> (Double)id2score.get(gsh.getId())).reversed());
    }

    public void clear() {
        this.candidateRanker.clear();
    }

    public Parameters getModelParameters() {
        return this.candidateRanker.getModelParameters();
    }

    private static /* synthetic */ boolean lambda$setCandidates$36(GeneMention gm, SynHit s2) {
        return s2.getIds().stream().anyMatch(gm.getAllGoldIdsAsList()::contains);
    }

    private static /* synthetic */ boolean lambda$getCandidateList$30(Set goldIds, SynHit h2) {
        return !Sets.intersection(goldIds, new HashSet<String>(h2.getIds())).isEmpty();
    }
}

