/*
 * Decompiled with CFR 0.152.
 */
package de.julielab.genemapper.disambig;

import cc.mallet.classify.Classification;
import cc.mallet.classify.Classifier;
import cc.mallet.pipe.Pipe;
import cc.mallet.pipe.SerialPipes;
import cc.mallet.pipe.Token2FeatureVector;
import cc.mallet.types.Alphabet;
import cc.mallet.types.AlphabetCarrying;
import cc.mallet.types.FeatureVector;
import cc.mallet.types.Instance;
import cc.mallet.types.InstanceList;
import cc.mallet.types.Label;
import cc.mallet.types.LabelAlphabet;
import cc.mallet.types.Token;
import com.google.common.collect.HashMultimap;
import com.google.common.collect.HashMultiset;
import com.google.common.collect.Multimap;
import com.google.common.collect.Sets;
import de.julielab.gene.candidateretrieval.CandidateRetrieval;
import de.julielab.gene.candidateretrieval.GeneRecordHit;
import de.julielab.gene.candidateretrieval.GeneRecordSynonymsQueryGenerator;
import de.julielab.gene.candidateretrieval.LuceneCandidateRetrieval;
import de.julielab.gene.candidateretrieval.scoring.MaxEntScorerFeaturePipe;
import de.julielab.geneexpbase.candidateretrieval.SynHit;
import de.julielab.geneexpbase.classification.FeatureUtils;
import de.julielab.geneexpbase.configuration.Parameters;
import de.julielab.geneexpbase.genemodel.GeneDocument;
import de.julielab.geneexpbase.genemodel.GeneLocation;
import de.julielab.geneexpbase.genemodel.GeneMention;
import de.julielab.geneexpbase.genemodel.GeneName;
import de.julielab.geneexpbase.genemodel.GeneSet;
import de.julielab.geneexpbase.genemodel.MentionMappingResult;
import de.julielab.geneexpbase.scoring.JaccardScorer;
import de.julielab.geneexpbase.scoring.JaroWinklerScorer;
import de.julielab.geneexpbase.scoring.LevenshteinScorer;
import de.julielab.geneexpbase.scoring.Scorer;
import de.julielab.geneexpbase.scoring.SmithWatermanScorer;
import de.julielab.geneexpbase.scoring.TokenJaroSimilarityScorer;
import de.julielab.geneexpbase.services.CacheService;
import de.julielab.genemapper.Configuration;
import de.julielab.genemapper.disambig.ContextItemsIndex;
import de.julielab.genemapper.disambig.ContextRanker;
import de.julielab.genemapper.disambig.ContextScoreCacheKey;
import de.julielab.genemapper.disambig.DocumentDisambiguationData;
import de.julielab.genemapper.disambig.DypsisDocumentDisambiguationData;
import de.julielab.genemapper.disambig.MentionDisambiguationData;
import de.julielab.genemapper.disambig.SemanticContextIndex;
import de.julielab.genemapper.disambig.SemanticIndex;
import de.julielab.genemapper.evaluation.tools.Stats;
import de.julielab.genemapper.filtering.families.ChemicalSuffix;
import de.julielab.genemapper.filtering.families.FamilyGenegroupProbabilityPipe;
import de.julielab.genemapper.filtering.families.FamilyMatchPipe;
import de.julielab.genemapper.filtering.families.LastWordPipe;
import de.julielab.genemapper.filtering.families.MentionTypeHint;
import de.julielab.genemapper.filtering.families.ProteinSymbols;
import de.julielab.genemapper.filtering.families.SpecifierDetectionPipe;
import de.julielab.genemapper.utils.ContextUtils;
import de.julielab.genemapper.utils.GeneMapperException;
import de.julielab.genemapper.utils.GeneMapperRuntimeException;
import de.julielab.ipc.javabridge.StdioBridge;
import de.julielab.ml.RankLibRanker;
import de.julielab.speciesassignment.mlcandidateranker.FeatureNormalization;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.ExecutionException;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.DoubleStream;
import java.util.stream.Stream;
import javax.cache.Cache;
import javax.inject.Inject;
import org.apache.commons.codec.digest.DigestUtils;
import org.apache.commons.lang.NotImplementedException;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class DypsisContextRanker
implements ContextRanker {
    private static final Logger log = LoggerFactory.getLogger(DypsisContextRanker.class);
    private final Cache<ContextScoreCacheKey, Map<String, Pair<Double, String>>> persistentCache;
    private final ContextItemsIndex contextItemsIndex;
    private final CandidateRetrieval candidateRetrieval;
    private final Scorer jaroWinkler = new JaroWinklerScorer();
    Map<String, Double> contextScoreCache;
    private int maxAgglomerationCandidates = 20;
    private SemanticContextIndex semanticContextIndex;
    private Classifier classifier;
    private RankLibRanker ranker;
    private StdioBridge<String> transformerConnection;

    @Inject
    public DypsisContextRanker(Configuration config, CandidateRetrieval candidateRetrieval, ContextItemsIndex contextItemsIndex, CacheService cacheService) throws GeneMapperException {
        this.contextItemsIndex = contextItemsIndex;
        try {
            String semIndexFile = config.getProperty("semantic_index");
            if (semIndexFile != null) {
                this.semanticContextIndex = new SemanticContextIndex(new File(semIndexFile));
            }
        }
        catch (IOException e) {
            throw new GeneMapperException(e);
        }
        this.candidateRetrieval = candidateRetrieval;
        this.contextScoreCache = new HashMap<String, Double>();
        this.persistentCache = cacheService.getCacheManager().getCache("context-items-cache");
    }

    public void setClassifier(Classifier classifier) {
        this.classifier = classifier;
    }

    public void setRanker(RankLibRanker ranker) {
        this.ranker = ranker;
    }

    @Override
    public MentionMappingResult assignContextScore(MentionDisambiguationData disambiguationData) {
        throw new NotImplementedException();
    }

    @Override
    public InstanceList doContextRanking(DocumentDisambiguationData disambiguationData, Parameters parameters) {
        DypsisDocumentDisambiguationData data = (DypsisDocumentDisambiguationData)disambiguationData;
        this.contextScoreCache.clear();
        GeneDocument document = data.getDocument();
        Stats stats = data.getStats();
        document.agglomerateByAcronyms();
        document.agglomerateByNames(true);
        if (parameters.getBoolean(Configuration.dot("disambiguation", "score_family_names_with_jaro_winkler"))) {
            document.getGenes().filter(GeneMention::matchesFamilyName).forEach(gm -> {
                gm.getFamilyNames().forEach(sh -> sh.setLexicalScore(this.jaroWinkler.getScore(gm.getNormalizedText(), sh.getSynonym())));
                gm.getFamilyNames().sort(Comparator.comparingDouble(SynHit::getLexicalScore).reversed());
            });
        }
        Map<String, Map<String, Double>> ids2scores = this.collectCandidateDisambiguationScores(document, parameters);
        boolean useLinearCombinationDisambiguation = parameters.getString(Configuration.dot("disambiguation", "algorithm")).equals("linear_combination");
        boolean trainMode = !useLinearCombinationDisambiguation && this.getDisambiguationModel() == null || useLinearCombinationDisambiguation && parameters.getBoolean(Configuration.dot("disambiguation", "train_mode"));
        InstanceList trainingInstances = trainMode && !useLinearCombinationDisambiguation ? this.createClassificationInstances(document, ids2scores, parameters) : null;
        String singleFeatureRanking = parameters.getString(Configuration.dot("disambiguation", "score_by_single_feature_value"), "none");
        if (singleFeatureRanking.equals("none")) {
            this.rerankSemantically(document, ids2scores, parameters);
        } else {
            this.rankBySingleFeature(document, singleFeatureRanking, ids2scores, parameters);
        }
        if (!trainMode) {
            this.setBalancedScores(document, parameters);
        }
        return trainingInstances;
    }

    private void rankBySingleFeature(GeneDocument document, String singleFeatureRanking, Map<String, Map<String, Double>> ids2scores, Parameters parameters) {
        boolean scaleResultScore = parameters.getBoolean(Configuration.dot("disambiguation", "scale_result_score"));
        double minResultScore = parameters.getDouble(Configuration.dot("disambiguation", "min_result_score"), -1.0);
        double maxResultScore = parameters.getDouble(Configuration.dot("disambiguation", "max_result_score"), -1.0);
        boolean trainMode = parameters.getBoolean(Configuration.dot("disambiguation", "train_mode"));
        for (GeneMention gm : document.getNonRejectedGenesIterable()) {
            MentionMappingResult mmr = gm.getMentionMappingResult();
            for (String taxId : gm.getTaxonomyIds()) {
                List<SynHit> candidates = mmr.tax2lexicallyRerankedCandidates.get(taxId);
                ArrayList<SynHit> candidatesRankedByFeatureValue = new ArrayList<SynHit>(candidates.size());
                for (SynHit candidate : candidates) {
                    Map<String, Double> scores4id = ids2scores.get(candidate.getId());
                    if (scores4id == null) continue;
                    Double featureValue4id = scores4id.getOrDefault(singleFeatureRanking, 0.0);
                    if (scaleResultScore && !trainMode) {
                        featureValue4id = FeatureUtils.doMinMaxScaling(featureValue4id, minResultScore, maxResultScore, 0.0, 1.0);
                    }
                    candidate.setContextualScore(featureValue4id);
                    candidatesRankedByFeatureValue.add(candidate);
                }
                candidatesRankedByFeatureValue.sort(Comparator.comparingDouble(SynHit::getContextualScore).reversed());
                if (mmr.tax2semanticallyOrderedCandidates == null) {
                    mmr.tax2semanticallyOrderedCandidates = new HashMap<String, List<SynHit>>();
                }
                mmr.tax2semanticallyOrderedCandidates.put(taxId, candidatesRankedByFeatureValue);
            }
        }
    }

    private void setBalancedScores(GeneDocument document, Parameters parameters) {
        double balancingFactorExact = parameters.getDouble(Configuration.dot("disambiguation", "lexical_semantic_balancing_factor_exact"));
        double balancingFactorApprox = parameters.getDouble(Configuration.dot("disambiguation", "lexical_semantic_balancing_factor_approx"));
        double penaltyFactor = parameters.getDouble(Configuration.dot("disambiguation", "threshold_approx_length_penalty_factor"));
        for (GeneMention gm : document.getNonRejectedGenesIterable()) {
            MentionMappingResult mmr = gm.getMentionMappingResult();
            mmr.tax2balancedScoreOrderedCandidates = new HashMap<String, List<SynHit>>();
            Set gmTokens = Stream.of(gm.getNormalizedText().split("\\s+")).collect(Collectors.toSet());
            for (String taxId : gm.getTaxonomyIds()) {
                if (mmr.tax2semanticallyOrderedCandidates == null) continue;
                List<SynHit> synHits = mmr.tax2semanticallyOrderedCandidates.get(taxId);
                ArrayList<SynHit> balancedRerankedHits = new ArrayList<SynHit>();
                for (SynHit sh : synHits) {
                    double balancingFactor = sh.isExactMatch() ? balancingFactorExact : balancingFactorApprox;
                    double entryScore = this.getLexicalSemanticBalancedSynHitScore(sh, balancingFactor);
                    if (!sh.isExactMatch()) {
                        Set synhitTokens = Stream.of(sh.getSynonym().split("\\s+")).collect(Collectors.toSet());
                        double penalty = Math.log((double)Sets.symmetricDifference(synhitTokens, gmTokens).size() + Math.E) * penaltyFactor;
                        entryScore /= penalty;
                    }
                    sh.setOverallScore(entryScore);
                    balancedRerankedHits.add(sh);
                }
                balancedRerankedHits.sort(Comparator.comparingDouble(SynHit::getOverallScore).reversed());
                mmr.tax2balancedScoreOrderedCandidates.put(taxId, balancedRerankedHits);
            }
        }
    }

    private double getLexicalSemanticBalancedSynHitScore(SynHit sh, double balancingFactor) {
        return balancingFactor * sh.getLexicalScore() + (1.0 - balancingFactor) * sh.getContextualScore();
    }

    /*
     * Enabled force condition propagation
     * Lifted jumps to return sites
     */
    private void rerankSemantically(GeneDocument document, Map<String, Map<String, Double>> ids2scores, Parameters parameters) {
        Object model = this.getDisambiguationModel();
        if (model != null) {
            FeatureNormalization featureNormalization = new FeatureNormalization("disambiguation");
            int gmid = 0;
            if (!(model instanceof AlphabetCarrying)) throw new NotImplementedException("Transformer-based disambiguation not yet implemented.");
            Pipe disambiguationFeaturePipes = this.getDisambiguationFeaturePipes(((AlphabetCarrying)model).getAlphabet(), parameters);
            for (GeneMention gm : document.getNonRejectedGenesIterable()) {
                for (String taxId : gm.getTaxonomyIds()) {
                    if (this.classifier == null && this.ranker == null) continue;
                    this.rerankSemanticallyClassicalMl(document, gm, gmid++, taxId, ids2scores, featureNormalization, disambiguationFeaturePipes, parameters);
                }
            }
            return;
        } else {
            if (!parameters.getString(Configuration.dot("disambiguation", "algorithm")).equals("linear_combination")) return;
            this.rerankSemanticallyLinearScoreCombination(document, ids2scores, parameters);
        }
    }

    private void rerankSemanticallyLinearScoreCombination(GeneDocument document, Map<String, Map<String, Double>> ids2scores, Parameters parameters) {
        boolean scaleResultValues = parameters.getBoolean(Configuration.dot("disambiguation", "scale_result_score"));
        double minScalingValue = parameters.getDouble(Configuration.dot("disambiguation", "min_result_score"), -1.0);
        double maxScalingValue = parameters.getDouble(Configuration.dot("disambiguation", "max_result_score"), -1.0);
        HashMap<String, Double> maxFeatureValues = (HashMap<String, Double>)parameters.getOrDefault((Object)Configuration.PARAM_DISAMBIGUATION_LINCOMB_MAX_VALUES, (Object)null);
        HashMap<String, Double> minFeatureValues = (HashMap<String, Double>)parameters.getOrDefault((Object)Configuration.PARAM_DISAMBIGUATION_LINCOMB_MIN_VALUES, (Object)null);
        assert (maxFeatureValues != null && minFeatureValues != null || maxFeatureValues == null && minFeatureValues == null) : "Nonconsistent state for linear combination min-max values. Max values are null: " + (maxFeatureValues == null) + ", min values are null: " + (minFeatureValues == null);
        boolean training = parameters.getBoolean(Configuration.dot("disambiguation", "train_mode"));
        if (training && maxFeatureValues == null) {
            maxFeatureValues = new HashMap<String, Double>();
            minFeatureValues = new HashMap<String, Double>();
            parameters.put(Configuration.PARAM_DISAMBIGUATION_LINCOMB_MAX_VALUES, maxFeatureValues);
            parameters.put(Configuration.PARAM_DISAMBIGUATION_LINCOMB_MIN_VALUES, minFeatureValues);
        }
        HashMap<String, Double> id2finalscore = new HashMap<String, Double>();
        for (String id : ids2scores.keySet()) {
            DoubleStream.Builder scoreBuilder = DoubleStream.builder();
            Map<String, Double> features = ids2scores.get(id);
            for (String featureName : features.keySet()) {
                double featureWeight = parameters.getDouble(Configuration.dot("disambiguation", featureName));
                double featureValue = this.scaleLinearCombinationFeatureValue(maxFeatureValues, minFeatureValues, training, features, featureName);
                scoreBuilder.accept(featureWeight * featureValue);
            }
            for (GeneSet gs : document.getGeneSets()) {
                Map<String, Double> contextScores = gs.getId2contextScores().get(id);
                if (contextScores == null) continue;
                for (String featureName : contextScores.keySet()) {
                    double featureWeight = parameters.getDouble(Configuration.dot("disambiguation", featureName));
                    double featureValue = this.scaleLinearCombinationFeatureValue(maxFeatureValues, minFeatureValues, training, contextScores, featureName);
                    scoreBuilder.accept(featureWeight * featureValue);
                }
            }
            double score = scoreBuilder.build().sum();
            if (scaleResultValues && minScalingValue + maxScalingValue != 0.0) {
                score = FeatureUtils.doMinMaxScaling(score, minScalingValue, maxScalingValue, 0.0, 1.0);
            }
            id2finalscore.put(id, score);
        }
        for (GeneMention gm : document.getNonRejectedGenesIterable()) {
            if (gm.getMentionMappingResult().tax2semanticallyOrderedCandidates == null) {
                gm.getMentionMappingResult().tax2semanticallyOrderedCandidates = new HashMap<String, List<SynHit>>();
            }
            Map<String, List<SynHit>> tax2originalCandidates = gm.getMentionMappingResult().tax2originalCandidates;
            for (String taxId : tax2originalCandidates.keySet()) {
                List<SynHit> candidates4tax = tax2originalCandidates.get(taxId);
                candidates4tax.forEach(sh -> sh.setContextualScore(id2finalscore.getOrDefault(sh.getId(), 0.0)));
                List rerankedCandidates4tax = candidates4tax.stream().sorted(Comparator.comparingDouble(SynHit::getContextualScore).reversed()).collect(Collectors.toList());
                gm.getMentionMappingResult().tax2semanticallyOrderedCandidates.put(taxId, rerankedCandidates4tax);
            }
        }
    }

    private double scaleLinearCombinationFeatureValue(Map<String, Double> maxFeatureValues, Map<String, Double> minFeatureValues, boolean training, Map<String, Double> features, String featureName) {
        Double featureValue = features.get(featureName);
        double effectiveFeatureValue = featureValue;
        if (training) {
            maxFeatureValues.compute(featureName, (k, v) -> v == null || featureValue > v ? featureValue : v);
            minFeatureValues.compute(featureName, (k, v) -> v == null || featureValue < v ? featureValue : v);
        } else {
            double maxValue = maxFeatureValues.get(featureName);
            double minValue = minFeatureValues.get(featureName);
            if (maxValue > 0.0 || minValue > 0.0) {
                effectiveFeatureValue = (featureValue - minValue) / (maxValue - minValue);
            }
        }
        return effectiveFeatureValue;
    }

    private void rerankSemanticallyClassicalMl(GeneDocument document, GeneMention gm, int gmid, String taxId, Map<String, Map<String, Double>> ids2scores, FeatureNormalization featureNormalization, Pipe disambiguationFeaturePipes, Parameters parameters) {
        if (this.classifier != null) {
            this.rerankSemanticallyClassifier(document, gm, gmid, taxId, ids2scores, featureNormalization, disambiguationFeaturePipes, parameters);
        } else if (this.ranker != null) {
            this.rerankSemanticallyRanker(document, gm, gmid, taxId, ids2scores, featureNormalization, disambiguationFeaturePipes, parameters);
        }
    }

    private void rerankSemanticallyRanker(GeneDocument document, GeneMention gm, int gmid, String taxId, Map<String, Map<String, Double>> ids2scores, FeatureNormalization featureNormalization, Pipe disambiguationFeaturePipes, Parameters parameters) {
        boolean scaleResultValues = parameters.getBoolean(Configuration.dot("disambiguation", "scale_result_score"));
        double minScalingValue = parameters.getDouble(Configuration.dot("disambiguation", "min_result_score"), -1.0);
        double maxScalingValue = parameters.getDouble(Configuration.dot("disambiguation", "max_result_score"), -1.0);
        MentionMappingResult mmr = gm.getMentionMappingResult();
        List<SynHit> candidates4tax = mmr.tax2originalCandidates.get(taxId);
        Map<String, FeatureVector> candidateRerankingFeatureVectors = mmr.tax2originalCandidateRankingInstances.get(taxId).stream().collect(Collectors.toMap(instance -> ((SynHit)instance.getProperty("sh")).getId(), instance -> (FeatureVector)instance.getData()));
        InstanceList instances = new InstanceList(this.ranker.getAlphabet(), this.ranker.getTargetAlphabet());
        for (SynHit candidate : () -> candidates4tax.stream().filter(sh -> ids2scores.containsKey(sh.getId())).iterator()) {
            instances.add(this.getSemanticRerankingFeatureInstance(document, ids2scores, (LabelAlphabet)this.ranker.getTargetAlphabet(), gmid, gm, candidate, candidateRerankingFeatureVectors, disambiguationFeaturePipes, parameters));
        }
        if (!instances.isEmpty()) {
            featureNormalization.applyFeatureNormalization(instances, parameters);
            InstanceList rerankedInstances = this.ranker.rank(instances);
            rerankedInstances.forEach(i -> ((SynHit)i.getProperty("sh")).setContextualScore((float)((Double)i.getProperty("score")).doubleValue()));
            List<SynHit> rerankedCandidates = rerankedInstances.stream().map(i -> i.getProperty("sh")).map(SynHit.class::cast).sorted(Comparator.comparingDouble(SynHit::getContextualScore).reversed()).collect(Collectors.toList());
            if (scaleResultValues) {
                rerankedCandidates.forEach(sh -> sh.setContextualScore(FeatureUtils.doMinMaxScaling(sh.getContextualScore(), minScalingValue, maxScalingValue, 0.0, 1.0)));
            }
            if (mmr.tax2semanticallyOrderedCandidates == null) {
                mmr.tax2semanticallyOrderedCandidates = new HashMap<String, List<SynHit>>();
            }
            mmr.tax2semanticallyOrderedCandidates.put(taxId, rerankedCandidates);
        }
    }

    private Pipe getDisambiguationFeaturePipes(Alphabet dataAlphabet, Parameters parameters) {
        ArrayList<Pipe> activeFeaturePipes = new ArrayList<Pipe>();
        if (parameters.getBoolean(Configuration.dot("disambiguation", "use_token_features")) && !parameters.getBoolean(Configuration.dot("candidate_retrieval", "use_token_features"))) {
            activeFeaturePipes.add(new MaxEntScorerFeaturePipe(dataAlphabet));
        }
        if (parameters.getBoolean(Configuration.dot("disambiguation", "use_gnormplus_chemical_suffix_features"))) {
            activeFeaturePipes.add(new ChemicalSuffix());
        }
        if (parameters.getBoolean(Configuration.dot("disambiguation", "use_gnormplus_mention_type_hint_features"))) {
            activeFeaturePipes.add(new MentionTypeHint());
        }
        if (parameters.getBoolean(Configuration.dot("disambiguation", "use_gnormplus_protein_symbols_features"))) {
            activeFeaturePipes.add(new ProteinSymbols());
        }
        if (parameters.getBoolean(Configuration.dot("disambiguation", "use_gene_family_score_feature"))) {
            activeFeaturePipes.add(new FamilyGenegroupProbabilityPipe());
        }
        if (parameters.getBoolean(Configuration.dot("disambiguation", "use_lastword_feature"))) {
            activeFeaturePipes.add(new LastWordPipe("LASTWORD"));
        }
        if (parameters.getBoolean(Configuration.dot("disambiguation", "use_other_has_specifier"))) {
            activeFeaturePipes.add(new SpecifierDetectionPipe());
        }
        if (parameters.getBoolean(Configuration.dot("disambiguation", "use_family_match_exact_feature")) || parameters.getBoolean(Configuration.dot("disambiguation", "use_family_match_score_feature")) || parameters.getBoolean(Configuration.dot("disambiguation", "use_family_match_rank_feature")) || parameters.getBoolean(Configuration.dot("disambiguation", "use_family_match_fraction_feature"))) {
            activeFeaturePipes.add(new FamilyMatchPipe(parameters));
        }
        activeFeaturePipes.add(new Token2FeatureVector(dataAlphabet, false, true));
        return new SerialPipes(activeFeaturePipes);
    }

    private void rerankSemanticallyClassifier(GeneDocument document, GeneMention gm, int gmid, String taxId, Map<String, Map<String, Double>> ids2scores, FeatureNormalization featureNormalization, Pipe disambiguationFeaturePipes, Parameters parameters) {
        boolean scaleResultValues = parameters.getBoolean(Configuration.dot("disambiguation", "scale_result_score"));
        double minScalingValue = parameters.getDouble(Configuration.dot("disambiguation", "min_result_score"), -1.0);
        double maxScalingValue = parameters.getDouble(Configuration.dot("disambiguation", "max_result_score"), -1.0);
        MentionMappingResult mmr = gm.getMentionMappingResult();
        List<SynHit> candidates4tax = mmr.tax2originalCandidates.get(taxId);
        if (!candidates4tax.isEmpty() && mmr.tax2semanticallyOrderedCandidates == null) {
            mmr.tax2semanticallyOrderedCandidates = new HashMap<String, List<SynHit>>();
        }
        if (!candidates4tax.isEmpty()) {
            int disambiguationHead = parameters.getInt(Configuration.dot("disambiguation", "top_n_candidates_for_reranking"));
            Map<String, FeatureVector> candidateRerankingFeatureVectors = Collections.emptyMap();
            InstanceList instances = new InstanceList(this.classifier.getAlphabet(), this.classifier.getLabelAlphabet());
            for (SynHit candidate : candidates4tax.subList(0, Math.min(disambiguationHead, candidates4tax.size()))) {
                instances.add(this.getSemanticRerankingFeatureInstance(document, ids2scores, this.classifier.getLabelAlphabet(), gmid, gm, candidate, candidateRerankingFeatureVectors, disambiguationFeaturePipes, parameters));
            }
            if (!instances.isEmpty()) {
                featureNormalization.applyFeatureNormalization(instances, parameters);
                ArrayList<Classification> classifications = this.classifier.classify(instances);
                Label positiveLabel = this.classifier.getLabelAlphabet().lookupLabel(Float.valueOf(1.0f));
                ArrayList<Instance> rerankedInstances = new ArrayList<Instance>(instances.size());
                for (Classification classification : classifications) {
                    Instance instance2 = classification.getInstance();
                    double confidenceForPositive = classification.getLabelVector().value(positiveLabel.getIndex());
                    if (scaleResultValues) {
                        confidenceForPositive = FeatureUtils.doMinMaxScaling(confidenceForPositive, minScalingValue, maxScalingValue, 0.0, 1.0);
                    }
                    instance2.setProperty("score", confidenceForPositive);
                    rerankedInstances.add(instance2);
                }
                rerankedInstances.sort(Comparator.comparingDouble(instance -> (Double)instance.getProperty("score")).reversed());
                rerankedInstances.forEach(i -> ((SynHit)i.getProperty("sh")).setContextualScore((Double)i.getProperty("score")));
                List rerankedCandidates = rerankedInstances.stream().map(i -> i.getProperty("sh")).map(SynHit.class::cast).sorted(Comparator.comparingDouble(SynHit::getContextualScore).reversed()).collect(Collectors.toList());
                mmr.tax2semanticallyOrderedCandidates.put(taxId, rerankedCandidates);
            }
        }
    }

    private Object getDisambiguationModel() {
        Object model;
        Object object = model = this.classifier != null ? this.classifier : this.ranker;
        if (model == null) {
            model = this.transformerConnection;
        }
        return model;
    }

    public InstanceList createClassificationInstances(GeneDocument document, Map<String, Map<String, Double>> ids2scores, Parameters parameters) {
        int numCandidatesToDisambiguate = parameters.getInt(Configuration.dot("disambiguation", "top_n_candidates_for_reranking"));
        String dataAlphabetKey = Configuration.dot("disambiguation", "data_alphabet");
        String targetAlphabetKey = Configuration.dot("disambiguation", "target_alphabet");
        Alphabet dataAlphabet = (Alphabet)parameters.getOrDefault((Object)dataAlphabetKey, new Alphabet());
        LabelAlphabet targetAlphabet = (LabelAlphabet)parameters.getOrDefault((Object)targetAlphabetKey, new LabelAlphabet());
        Pipe disambiguationFeaturePipes = this.getDisambiguationFeaturePipes(dataAlphabet, parameters);
        if (!parameters.containsKey(dataAlphabetKey)) {
            parameters.put(dataAlphabetKey, dataAlphabet);
            parameters.put(targetAlphabetKey, targetAlphabet);
        }
        InstanceList instances = new InstanceList(dataAlphabet, targetAlphabet);
        int gmid = 0;
        for (GeneSet gs : document.getGeneSets()) {
            for (GeneMention gm : gs) {
                for (String taxId : gm.getTaxonomyIds()) {
                    List<SynHit> candidates4tax = gm.getMentionMappingResult().tax2originalCandidates.get(taxId);
                    Map<String, FeatureVector> candidateRerankingFeatureVectors = Collections.emptyMap();
                    for (int i = 0; i < candidates4tax.size() && i < numCandidatesToDisambiguate; ++i) {
                        SynHit candidate = candidates4tax.get(i);
                        Instance instance = this.getSemanticRerankingFeatureInstance(document, ids2scores, targetAlphabet, gmid++, gm, candidate, candidateRerankingFeatureVectors, disambiguationFeaturePipes, parameters);
                        instances.add(instance);
                    }
                }
            }
        }
        return instances;
    }

    public Instance getSemanticRerankingFeatureInstance(GeneDocument document, Map<String, Map<String, Double>> ids2scores, LabelAlphabet targetAlphabet, int gmid, GeneMention gm, SynHit candidate, Map<String, FeatureVector> candidateRerankingFeatureVectors, Pipe featurePipes, Parameters parameters) {
        List<String> list;
        String geneId = candidate.getId();
        Map<String, Double> scores = ids2scores.get(geneId);
        Token token = new Token(gm.getNormalizedText());
        for (String string : scores.keySet()) {
            Double e = scores.get(string);
            token.setFeatureValue(string, e);
        }
        Map<String, Double> contextScores4Id = gm.getSingleGeneSet().getId2contextScores().get(geneId);
        if (contextScores4Id != null) {
            for (String featureName : contextScores4Id.keySet()) {
                Double e = contextScores4Id.get(featureName);
                token.setFeatureValue(featureName, e);
            }
        }
        Label label = document.isGoldHasOffsets() ? ((list = gm.getAllGoldIdsAsList()).contains(geneId) ? targetAlphabet.lookupLabel(Float.valueOf(1.0f)) : targetAlphabet.lookupLabel(Float.valueOf(0.0f))) : (document.getGoldIds().contains(geneId) ? targetAlphabet.lookupLabel(Float.valueOf(1.0f)) : targetAlphabet.lookupLabel(Float.valueOf(0.0f)));
        Instance instance = new Instance(token, label, document.getId() + ":" + gmid, geneId);
        instance.setProperty("sh", candidate);
        instance.setProperty("gm", gm);
        instance.setProperty("gs", gm.getSingleGeneSet());
        return featurePipes.instanceFrom(instance);
    }

    private void addLexicalRerankingFeatures(SynHit candidate, Map<String, FeatureVector> candidateRerankingFeatureVectors, Token token) {
        FeatureVector candidateRerankingFeatureVector = candidateRerankingFeatureVectors.get(candidate.getId());
        assert (candidateRerankingFeatureVector != null) : "We try to disambiguate with a gene ID for which there is no candidate re-ranking instance. This is either a programming error.";
        Alphabet candidateRerankingAlphabet = candidateRerankingFeatureVector.getAlphabet();
        int[] candidateRerankingIndices = candidateRerankingFeatureVector.getIndices();
        for (int i = 0; i < candidateRerankingFeatureVector.numLocations(); ++i) {
            int index = candidateRerankingIndices != null ? candidateRerankingIndices[i] : i;
            String candidateRerankingFeatureName = (String)candidateRerankingAlphabet.lookupObject(index);
            double value = candidateRerankingFeatureVector.value(index);
            token.setFeatureValue(candidateRerankingFeatureName, value);
        }
    }

    public Map<String, Map<String, Double>> collectCandidateDisambiguationScores(GeneDocument document, Parameters parameters) {
        Map<String, Map<String, Collection<GeneName>>> tax2eg2entities = this.findTaggedSynonymsForIds(document, parameters);
        Optional<Map> ids2EntitesOpt = tax2eg2entities.keySet().stream().map(tax2eg2entities::get).reduce((m1, m22) -> {
            m1.putAll(m22);
            return m1;
        });
        Map<String, Map<String, Double>> ids2scores = Collections.emptyMap();
        if (ids2EntitesOpt.isPresent()) {
            Map ids2entities = ids2EntitesOpt.get();
            ids2scores = new HashMap<String, Map<String, Double>>();
            Map<String, Multimap<String, String>> ids2synonymsExact = Collections.emptyMap();
            Map<String, Multimap<String, String>> ids2synonymsApprx = Collections.emptyMap();
            if (parameters.getBoolean(Configuration.dot("disambiguation", "use_lucene_exact_scores")) || parameters.getBoolean(Configuration.dot("disambiguation", "use_jaro_winkler_exact_scores"))) {
                ids2synonymsExact = this.addEntity2SynonymsLuceneScores(ids2entities, "exact", ids2scores, parameters);
            }
            if (parameters.getBoolean(Configuration.dot("disambiguation", "use_lucene_approx_scores")) || parameters.getBoolean(Configuration.dot("disambiguation", "use_jaro_winkler_approx_scores"))) {
                ids2synonymsApprx = this.addEntity2SynonymsLuceneScores(ids2entities, "apprx", ids2scores, parameters);
            }
            if (parameters.getBoolean(Configuration.dot("disambiguation", "intersection_filter"))) {
                this.intersectionFilter(ids2entities, ids2synonymsExact, ids2synonymsApprx, ids2scores);
            }
            if (parameters.getBoolean(Configuration.dot("disambiguation", "use_jaro_winkler_exact_scores"))) {
                this.addEntity2SynonymsJaroWinklerScores(ids2entities, ids2synonymsExact, true, ids2scores, parameters);
            }
            if (parameters.getBoolean(Configuration.dot("disambiguation", "use_jaro_winkler_approx_scores"))) {
                this.addEntity2SynonymsJaroWinklerScores(ids2entities, ids2synonymsApprx, false, ids2scores, parameters);
            }
            JaroWinklerScorer scorer = new JaroWinklerScorer();
            TokenJaroSimilarityScorer tokenJaro = new TokenJaroSimilarityScorer();
            LevenshteinScorer levenshtein = new LevenshteinScorer();
            JaccardScorer jaccard = new JaccardScorer();
            SmithWatermanScorer sw = new SmithWatermanScorer();
            Set entities = Stream.concat(document.getNonRejectedGenes().map(GeneMention::getNormalizedText), document.findChromosomeLocations().stream().map(GeneLocation::toString)).collect(Collectors.toSet());
            Map ids2candidates = document.getNonRejectedGenes().map(GeneMention::getMentionMappingResult).flatMap(mmr -> mmr.tax2originalCandidates.values().stream()).flatMap(Collection::stream).map(GeneRecordHit.class::cast).collect(Collectors.toMap(SynHit::getId, Function.identity(), (sh1, sh2) -> sh1));
            for (String id : ids2entities.keySet()) {
                try {
                    GeneRecordHit candidate = (GeneRecordHit)ids2candidates.get(id);
                    String symbol2 = candidate.getSymbol();
                    String mapLocation = candidate.getMapLocation();
                    String chromosome = candidate.getChromosome();
                    Collection descriptions = this.contextItemsIndex.getContextItems(id, "description").stream().collect(Collectors.toList());
                    DoubleStream.Builder scoreBuilder = DoubleStream.builder();
                    for (String taggedGene : entities) {
                        double symbolScoreJW = 0.0;
                        double mapLocationScoreJW = 0.0;
                        double chromosomeScoreJW = 0.0;
                        double descriptionScoreJW = 0.0;
                        double symbolScoreTJ = 0.0;
                        double mapLocationScoreTJ = 0.0;
                        double chromosomeScoreTJ = 0.0;
                        double descriptionScoreTJ = 0.0;
                        double symbolScoreLS = 0.0;
                        double mapLocationScoreLS = 0.0;
                        double chromosomeScoreLS = 0.0;
                        double descriptionScoreLS = 0.0;
                        double symbolScoreSW = 0.0;
                        double mapLocationScoreSW = 0.0;
                        double chromosomeScoreSW = 0.0;
                        double descriptionScoreSW = 0.0;
                        double symbolScoreJC = 0.0;
                        double mapLocationScoreJC = 0.0;
                        double chromosomeScoreJC = 0.0;
                        double descriptionScoreJC = 0.0;
                        if (symbol2 != null) {
                            symbolScoreJW = ((Scorer)scorer).getScore(symbol2, taggedGene);
                            symbolScoreTJ = ((Scorer)tokenJaro).getScore(symbol2, taggedGene);
                            symbolScoreLS = ((Scorer)levenshtein).getScore(symbol2, taggedGene);
                            symbolScoreSW = ((Scorer)sw).getScore(symbol2, taggedGene);
                            symbolScoreJC = ((Scorer)jaccard).getScore(symbol2, taggedGene);
                            scoreBuilder.accept(symbolScoreJW);
                        }
                        if (mapLocation != null) {
                            mapLocationScoreJW = ((Scorer)scorer).getScore(mapLocation, taggedGene);
                            mapLocationScoreTJ = ((Scorer)tokenJaro).getScore(mapLocation, taggedGene);
                            mapLocationScoreLS = ((Scorer)levenshtein).getScore(mapLocation, taggedGene);
                            mapLocationScoreSW = ((Scorer)sw).getScore(mapLocation, taggedGene);
                            mapLocationScoreJC = ((Scorer)jaccard).getScore(mapLocation, taggedGene);
                            scoreBuilder.accept(mapLocationScoreJW);
                        }
                        if (chromosome != null) {
                            chromosomeScoreJW = ((Scorer)scorer).getScore(chromosome, taggedGene);
                            chromosomeScoreTJ = ((Scorer)tokenJaro).getScore(chromosome, taggedGene);
                            chromosomeScoreLS = ((Scorer)levenshtein).getScore(chromosome, taggedGene);
                            chromosomeScoreSW = ((Scorer)sw).getScore(chromosome, taggedGene);
                            chromosomeScoreJC = ((Scorer)jaccard).getScore(chromosome, taggedGene);
                            scoreBuilder.accept(chromosomeScoreJW);
                        }
                        for (String description : descriptions) {
                            descriptionScoreJW = ((Scorer)scorer).getScore(description, taggedGene);
                            descriptionScoreTJ = ((Scorer)tokenJaro).getScore(description, taggedGene);
                            descriptionScoreLS = ((Scorer)levenshtein).getScore(description, taggedGene);
                            descriptionScoreSW = ((Scorer)sw).getScore(description, taggedGene);
                            descriptionScoreJC = ((Scorer)jaccard).getScore(description, taggedGene);
                            scoreBuilder.accept(descriptionScoreJW);
                        }
                        if (!id.equals("1438") && !id.equals("1439")) continue;
                        System.out.println("muh");
                    }
                    double sum = scoreBuilder.build().sum();
                    Map id2scores = ids2scores.compute(id, (k, v) -> v != null ? v : new HashMap());
                    id2scores.put("genecontext_semanticcontext_JAROWINKLER", sum);
                }
                catch (ExecutionException e) {
                    log.error("Could not retrieve the description of gene with ID {}", (Object)id, (Object)e);
                }
            }
            try {
                this.setGeneSetContext2ContextItemsScores(document, ids2entities.keySet(), parameters);
                this.setGeneSetContext2SynonymScores(document, ids2entities.keySet(), parameters);
                this.addSynonym2ContextItemsScores(ids2entities, ids2scores, parameters);
            }
            catch (IOException e) {
                throw new GeneMapperRuntimeException(e);
            }
            for (GeneMention gm : document.getGenesIterable()) {
                for (List<SynHit> synHitList : gm.getMentionMappingResult().tax2originalCandidates.values()) {
                    for (SynHit sh : synHitList) {
                        Map scores4id = ids2scores.compute(sh.getId(), (k, v) -> v != null ? v : new HashMap());
                        scores4id.put("mention_score_lucene", Double.valueOf(sh.getLuceneScore()));
                    }
                }
            }
        }
        return ids2scores;
    }

    private void intersectionFilter(Map<String, Collection<GeneName>> ids2entities, Map<String, Multimap<String, String>> ids2synonymsExact, Map<String, Multimap<String, String>> ids2synonymsApprx, Map<String, Map<String, Double>> ids2scores) {
        HashMap ids2entityTerms = new HashMap();
        for (String geneId : ids2entities.keySet()) {
            Collection<GeneName> geneNames4id = ids2entities.get(geneId);
            Set taggedTokens = geneNames4id.stream().map(GeneName::getNormalizedText).map(text -> text.split("\\s+")).flatMap(Arrays::stream).collect(Collectors.toSet());
            Set exactSynonymTokens = null;
            exactSynonymTokens = ids2synonymsExact.containsKey(geneId) ? ids2synonymsExact.get(geneId).values().stream().map(syn -> syn.split("\\s+")).flatMap(Arrays::stream).collect(Collectors.toSet()) : Collections.emptySet();
            Set approxSynonymTokens = ids2synonymsApprx.containsKey(geneId) ? ids2synonymsApprx.get(geneId).values().stream().map(syn -> syn.split("\\s+")).flatMap(Arrays::stream).collect(Collectors.toSet()) : Collections.emptySet();
            Sets.SetView allSynonymTokens = Sets.union(exactSynonymTokens, approxSynonymTokens);
            Sets.SetView matchedTokens4id = Sets.intersection(taggedTokens, allSynonymTokens);
            Set entities4id = ids2synonymsExact.containsKey(geneId) ? ids2synonymsExact.get(geneId).values().stream().collect(Collectors.toSet()) : Collections.emptySet();
            Sets.SetView terms4Id = Sets.union(matchedTokens4id, entities4id);
            ids2entityTerms.put(geneId, terms4Id);
        }
        HashSet<String> geneIds = new HashSet<String>(ids2entities.keySet());
        for (String i : geneIds) {
            for (String j : geneIds) {
                boolean equalSize;
                if (i == j) continue;
                String toRemove = null;
                Set iTerms = (Set)ids2entityTerms.get(i);
                Set jTerms = (Set)ids2entityTerms.get(j);
                if (iTerms == null || jTerms == null) continue;
                boolean bl = equalSize = iTerms.size() == jTerms.size();
                if (!equalSize && iTerms.containsAll(jTerms)) {
                    toRemove = j;
                } else if (!equalSize && jTerms.containsAll(iTerms)) {
                    toRemove = i;
                }
                if (toRemove == null) continue;
                ids2entityTerms.remove(toRemove);
                ids2entities.remove(toRemove);
                ids2scores.remove(toRemove);
                ids2synonymsExact.remove(toRemove);
                ids2synonymsApprx.remove(toRemove);
            }
        }
    }

    private void setGeneSetContext2SynonymScores(GeneDocument document, Set<String> geneIdsToScore, Parameters parameters) {
        boolean scoreGscontextOnSynonymsLucene = parameters.getBoolean(Configuration.dot("disambiguation", "score_gscontext_on_synonyms_lucene"));
        boolean scoreGscontextOnSynonymsJaroWinkler = parameters.getBoolean(Configuration.dot("disambiguation", "score_gscontext_on_synonyms_jaro_winkler"));
        if (scoreGscontextOnSynonymsLucene || scoreGscontextOnSynonymsJaroWinkler) {
            int tokenWindowGenesetsSize = parameters.getInt(Configuration.dot("disambiguation", "token_window_gene_sets_size"));
            boolean makeTokenWindowDistinct = parameters.getBoolean(Configuration.dot("disambiguation", "make_token_window_distinct"));
            boolean normalizeLuceneSynonymScores = parameters.getBoolean(Configuration.dot("disambiguation", "normalize_lucene_synonym_scores"));
            boolean normalizeJaroWinklerSynonymScores = parameters.getBoolean(Configuration.dot("disambiguation", "normalize_jaro_winkler_synonym_scores"));
            for (GeneSet gs : document.getGeneSets()) {
                List<String> gsContext = ContextUtils.filterStopwords(gs.getDocumentContext(tokenWindowGenesetsSize, Collections.emptySet(), true, makeTokenWindowDistinct)).collect(Collectors.toList());
                HashMap<String, Double> ids2lucenescores = new HashMap<String, Double>();
                HashMultiset<String> idCounts = HashMultiset.create();
                Set idsInGs = gs.getCandidateGeneIds().collect(Collectors.toSet());
                Set<String> geneIdsToScoreInGs = geneIdsToScore.stream().filter(idsInGs::contains).collect(Collectors.toSet());
                Map synHits = this.candidateRetrieval.scoreIdsByBoWSynonyms(gsContext, geneIdsToScoreInGs, LuceneCandidateRetrieval.GENE_RECORDS_FLAT_DISJUNCTION).stream().collect(Collectors.toMap(SynHit::getId, Function.identity()));
                HashMultimap<String, String> id2synonyms = HashMultimap.create();
                for (String id : geneIdsToScoreInGs) {
                    SynHit sh = (SynHit)synHits.get(id);
                    if (sh == null) continue;
                    if (scoreGscontextOnSynonymsLucene) {
                        ids2lucenescores.merge(id, Double.valueOf(sh.getLuceneScore()), Double::sum);
                    }
                    idCounts.add(id);
                    id2synonyms.put(id, sh.getSynonym());
                }
                if (normalizeLuceneSynonymScores && scoreGscontextOnSynonymsLucene) {
                    ids2lucenescores.replaceAll((k, v) -> v / (double)idCounts.count(k));
                }
                for (String id : geneIdsToScoreInGs) {
                    Double luceneScore = (Double)ids2lucenescores.get(id);
                    if (scoreGscontextOnSynonymsLucene && luceneScore != null) {
                        gs.addContextScore(id, "gscontext_CANDIDATES_LUCENE", luceneScore);
                    }
                    if (!scoreGscontextOnSynonymsJaroWinkler) continue;
                    String contextString = String.join((CharSequence)" ", gsContext);
                    DoubleStream geneContextSynonymJaroWinklerScores = id2synonyms.get(id).stream().mapToDouble(s2 -> this.jaroWinkler.getScore((String)s2, contextString));
                    double jwScore = normalizeJaroWinklerSynonymScores ? geneContextSynonymJaroWinklerScores.average().orElse(0.0) : geneContextSynonymJaroWinklerScores.sum();
                    gs.addContextScore(id, "gscontext_CANDIDATES_JAROWINKLER", jwScore);
                }
            }
        }
    }

    public void addSynonym2ContextItemsScores(Map<String, Collection<GeneName>> ids2entities, Map<String, Map<String, Double>> ids2scores, Parameters parameters) throws IOException {
        boolean scoreSynonymsOnGenerifsLucene = parameters.getBoolean(Configuration.dot("disambiguation", "score_synonyms_on_generifs_lucene"));
        boolean scoreSynonymsOnGenerifsJaroWinkler = parameters.getBoolean(Configuration.dot("disambiguation", "score_synonyms_on_generifs_jaro_winkler"));
        boolean scoreSynonymsOnSummariesLucene = parameters.getBoolean(Configuration.dot("disambiguation", "score_synonyms_on_summaries_lucene"));
        boolean scoreSynonymsOnSummariesJaroWinkler = parameters.getBoolean(Configuration.dot("disambiguation", "score_synonyms_on_summaries_jaro_winkler"));
        boolean scoreSynonymsOnInteractionsJaroWinkler = parameters.getBoolean(Configuration.dot("disambiguation", "score_synonyms_on_interactions_jaro_winkler"));
        boolean scoreSynonymsOnInteractionsLucene = parameters.getBoolean(Configuration.dot("disambiguation", "score_synonyms_on_interactions_lucene"));
        boolean scoreDescriptionsWithLucene = parameters.getBoolean(Configuration.dot("disambiguation", "score_synonyms_on_descriptions_lucene"));
        boolean scoreDescriptionsWithJaroWinkler = parameters.getBoolean(Configuration.dot("disambiguation", "score_synonyms_on_descriptions_jaro_winkler"));
        boolean scoreSynonymsOnGenerifs = scoreSynonymsOnGenerifsLucene || scoreSynonymsOnGenerifsJaroWinkler;
        boolean scoreSynonymsOnInteractions = scoreSynonymsOnInteractionsLucene || scoreSynonymsOnInteractionsJaroWinkler;
        boolean scoreSynonymsOnSummaries = scoreSynonymsOnSummariesLucene || scoreSynonymsOnSummariesJaroWinkler;
        boolean scoreSynonymsOnDescriptions = scoreDescriptionsWithLucene || scoreDescriptionsWithJaroWinkler;
        HashMap<String, Map> id2queryResult = new HashMap<String, Map>();
        HashMap<String, String> id2concatenatedEntities = new HashMap<String, String>();
        for (String id : ids2entities.keySet()) {
            Map<String, Pair<Double, String>> synonymsDescResult;
            String synonymsString = ids2entities.get(id).stream().map(GeneName::getNormalizedText).collect(Collectors.joining(" "));
            id2concatenatedEntities.put(id, synonymsString);
            byte[] contextHash = DigestUtils.sha1(synonymsString.getBytes());
            ContextScoreCacheKey generifKey = new ContextScoreCacheKey(Collections.singleton(id), contextHash, "generif", scoreSynonymsOnGenerifsJaroWinkler);
            ContextScoreCacheKey intKey = new ContextScoreCacheKey(Collections.singleton(id), contextHash, "interaction", scoreSynonymsOnInteractionsJaroWinkler);
            ContextScoreCacheKey summaryKey = new ContextScoreCacheKey(Collections.singleton(id), contextHash, "summary", scoreSynonymsOnSummariesJaroWinkler);
            ContextScoreCacheKey descKey = new ContextScoreCacheKey(Collections.singleton(id), contextHash, "description", scoreSynonymsOnSummariesJaroWinkler);
            Map<String, Pair<Double, String>> synonymsGenerifResult = scoreSynonymsOnGenerifs ? this.persistentCache.get(generifKey) : null;
            Map<String, Pair<Double, String>> synonymsInteractionResult = scoreSynonymsOnInteractions ? this.persistentCache.get(intKey) : null;
            Map<String, Pair<Double, String>> synonymsSummaryResult = scoreSynonymsOnSummaries ? this.persistentCache.get(summaryKey) : null;
            Map<String, Pair<Double, String>> map = synonymsDescResult = scoreSynonymsOnDescriptions ? this.persistentCache.get(descKey) : null;
            if (scoreSynonymsOnGenerifs && synonymsGenerifResult == null) {
                BooleanQuery generifQ = ContextUtils.makeContextQuery(synonymsString, "generif");
                synonymsGenerifResult = this.contextItemsIndex.getContextItemScores(Collections.singleton(id), generifQ, "generif", scoreSynonymsOnGenerifsJaroWinkler);
                this.persistentCache.put(generifKey, synonymsGenerifResult);
                id2queryResult.compute(id, (k, v) -> v != null ? v : new HashMap()).put("generif", synonymsGenerifResult);
            }
            if (scoreSynonymsOnInteractions && synonymsInteractionResult == null) {
                BooleanQuery intQ = ContextUtils.makeContextQuery(synonymsString, "interaction");
                synonymsInteractionResult = this.contextItemsIndex.getContextItemScores(Collections.singleton(id), intQ, "interaction", scoreSynonymsOnInteractionsJaroWinkler);
                this.persistentCache.put(intKey, synonymsInteractionResult);
                id2queryResult.compute(id, (k, v) -> v != null ? v : new HashMap()).put("interaction", synonymsInteractionResult);
            }
            if (scoreSynonymsOnSummaries && synonymsSummaryResult == null) {
                BooleanQuery sumQ = ContextUtils.makeContextQuery(synonymsString, "summary");
                synonymsSummaryResult = this.contextItemsIndex.getContextItemScores(Collections.singleton(id), sumQ, "summary", scoreSynonymsOnSummariesJaroWinkler);
                this.persistentCache.put(summaryKey, synonymsSummaryResult);
                id2queryResult.compute(id, (k, v) -> v != null ? v : new HashMap()).put("summary", synonymsSummaryResult);
            }
            if (!scoreSynonymsOnDescriptions || synonymsDescResult != null) continue;
            BooleanQuery descQ = ContextUtils.makeContextQuery(synonymsString, "description");
            synonymsDescResult = this.contextItemsIndex.getContextItemScores(Collections.singleton(id), descQ, "description", scoreDescriptionsWithJaroWinkler);
            this.persistentCache.put(descKey, synonymsDescResult);
            id2queryResult.compute(id, (k, v) -> v != null ? v : new HashMap()).put("description", synonymsDescResult);
        }
        for (String id : id2queryResult.keySet()) {
            Map queryResult4id = (Map)id2queryResult.get(id);
            Map scores4id = ids2scores.compute(id, (k, v) -> v != null ? v : new HashMap());
            for (String contextField : queryResult4id.keySet()) {
                Map synonymContextItemScore = (Map)queryResult4id.get(contextField);
                Pair scoreAndContextItem = (Pair)synonymContextItemScore.get(id);
                if (scoreAndContextItem != null && (contextField.equals("generif") && scoreSynonymsOnGenerifsLucene || contextField.equals("interaction") && scoreSynonymsOnInteractionsLucene || contextField.equals("summary") && scoreSynonymsOnSummariesLucene)) {
                    scores4id.put(contextField + "_SYNONYMS_LUCENE", (Double)scoreAndContextItem.getLeft());
                }
                if (scoreAndContextItem.getRight() == null) continue;
                double jwscore = this.jaroWinkler.getScore((String)scoreAndContextItem.getRight(), (String)id2concatenatedEntities.get(id));
                scores4id.put(contextField + "_SYNONYMS_JAROWINKLER", jwscore);
            }
        }
    }

    public void setGeneSetContext2ContextItemsScores(GeneDocument document, Set<String> geneIdsToScore, Parameters parameters) throws IOException {
        boolean scoreGsContextOnDescriptions;
        boolean scoreGscontextOnGenerifsLucene = parameters.getBoolean(Configuration.dot("disambiguation", "score_gscontext_on_generifs_lucene"));
        boolean scoreGscontextOnInteractionsLucene = parameters.getBoolean(Configuration.dot("disambiguation", "score_gscontext_on_interactions_lucene"));
        boolean scoreGscontextOnSummariesLucene = parameters.getBoolean(Configuration.dot("disambiguation", "score_gscontext_on_summaries_lucene"));
        boolean scoreGscontextOnDescriptionsLucene = parameters.getBoolean(Configuration.dot("disambiguation", "score_gscontext_on_descriptions_lucene"));
        boolean scoreGscontextOnGenerifsJaroWinkler = parameters.getBoolean(Configuration.dot("disambiguation", "score_gscontext_on_generifs_jaro_winkler"));
        boolean scoreGscontextOnInteractionsJaroWinkler = parameters.getBoolean(Configuration.dot("disambiguation", "score_gscontext_on_interactions_jaro_winkler"));
        boolean scoreGscontextOnSummariesJaroWinkler = parameters.getBoolean(Configuration.dot("disambiguation", "score_gscontext_on_summaries_jaro_winkler"));
        boolean scoreGscontextOnDescriptionsJaroWinkler = parameters.getBoolean(Configuration.dot("disambiguation", "score_gscontext_on_descriptions_jaro_winkler"));
        boolean scoreGscontextOnGenerifs = scoreGscontextOnGenerifsLucene || scoreGscontextOnGenerifsJaroWinkler;
        boolean scoreGscontextOnInteractions = scoreGscontextOnInteractionsLucene || scoreGscontextOnInteractionsJaroWinkler;
        boolean scoreGsContextOnSummaries = scoreGscontextOnSummariesLucene || scoreGscontextOnSummariesJaroWinkler;
        boolean bl = scoreGsContextOnDescriptions = scoreGscontextOnDescriptionsLucene || scoreGscontextOnDescriptionsJaroWinkler;
        if (scoreGscontextOnGenerifs || scoreGscontextOnInteractions || scoreGsContextOnSummaries || scoreGsContextOnDescriptions) {
            boolean excludeGenesForContextItemScoring = parameters.getBoolean(Configuration.dot("disambiguation", "exclude_genes_for_context_item_scoring"));
            boolean makeTokenWindowDistinct = parameters.getBoolean(Configuration.dot("disambiguation", "make_token_window_distinct"));
            int tokenWindowGenesetsSize = parameters.getInt(Configuration.dot("disambiguation", "token_window_gene_sets_size"));
            for (GeneSet gs : document.getGeneSets()) {
                Map<String, Pair<Double, String>> doctextDescResult;
                Stream<String> gsContext = ContextUtils.filterStopwords(gs.getDocumentContext(tokenWindowGenesetsSize, Collections.emptySet(), excludeGenesForContextItemScoring, makeTokenWindowDistinct));
                String gsContextString = gsContext.collect(Collectors.joining(" "));
                byte[] contextHash = DigestUtils.sha1(gsContextString.getBytes());
                Set idsInGs = gs.getCandidateGeneIds().collect(Collectors.toSet());
                Set<String> geneIdsToScoreInGs = geneIdsToScore.stream().filter(idsInGs::contains).collect(Collectors.toSet());
                ContextScoreCacheKey generifKey = new ContextScoreCacheKey(geneIdsToScoreInGs, contextHash, "generif", scoreGscontextOnGenerifs);
                ContextScoreCacheKey intKey = new ContextScoreCacheKey(geneIdsToScoreInGs, contextHash, "interaction", scoreGscontextOnInteractions);
                ContextScoreCacheKey summaryKey = new ContextScoreCacheKey(geneIdsToScoreInGs, contextHash, "summary", scoreGsContextOnSummaries);
                ContextScoreCacheKey descKey = new ContextScoreCacheKey(new HashSet<String>(geneIdsToScoreInGs), contextHash, "description", scoreGsContextOnDescriptions);
                Map<String, Pair<Double, String>> doctextGenerifResult = scoreGscontextOnGenerifs ? this.persistentCache.get(generifKey) : null;
                Map<String, Pair<Double, String>> doctextInteractionResult = scoreGscontextOnInteractions ? this.persistentCache.get(intKey) : null;
                Map<String, Pair<Double, String>> doctextSummaryResult = scoreGsContextOnSummaries ? this.persistentCache.get(summaryKey) : null;
                Map<String, Pair<Double, String>> map = doctextDescResult = scoreGsContextOnDescriptions ? this.persistentCache.get(descKey) : null;
                if (scoreGscontextOnGenerifs && doctextGenerifResult == null) {
                    BooleanQuery generifQ = ContextUtils.makeContextQuery(gsContextString, "generif");
                    doctextGenerifResult = this.contextItemsIndex.getContextItemScores(geneIdsToScoreInGs, generifQ, "generif", scoreGscontextOnGenerifsJaroWinkler);
                    this.persistentCache.put(generifKey, doctextGenerifResult);
                }
                if (scoreGscontextOnInteractions && doctextInteractionResult == null) {
                    BooleanQuery intQ = ContextUtils.makeContextQuery(gsContextString, "interaction");
                    doctextInteractionResult = this.contextItemsIndex.getContextItemScores(geneIdsToScoreInGs, intQ, "interaction", scoreGscontextOnInteractionsJaroWinkler);
                    this.persistentCache.put(intKey, doctextInteractionResult);
                }
                if (scoreGsContextOnSummaries && doctextSummaryResult == null) {
                    BooleanQuery sumQ = ContextUtils.makeContextQuery(gsContextString, "summary");
                    doctextSummaryResult = this.contextItemsIndex.getContextItemScores(geneIdsToScoreInGs, sumQ, "summary", scoreGscontextOnSummariesJaroWinkler);
                    this.persistentCache.put(summaryKey, doctextSummaryResult);
                }
                if (scoreGsContextOnDescriptions && doctextDescResult == null) {
                    BooleanQuery descriptionQ = ContextUtils.makeContextQuery(gsContextString, "description");
                    doctextDescResult = this.contextItemsIndex.getContextItemScores(geneIdsToScoreInGs, descriptionQ, "description", scoreGscontextOnDescriptionsJaroWinkler);
                    this.persistentCache.put(descKey, doctextDescResult);
                }
                for (String id : geneIdsToScoreInGs) {
                    if (scoreGscontextOnGenerifsLucene) {
                        gs.addContextScore(id, "generif_GSTEXT_LUCENE", doctextGenerifResult.get(id).getLeft());
                    }
                    if (scoreGscontextOnInteractionsLucene) {
                        gs.addContextScore(id, "interaction_GSTEXT_LUCENE", doctextInteractionResult.get(id).getLeft());
                    }
                    if (scoreGscontextOnSummariesLucene) {
                        gs.addContextScore(id, "summary_GSTEXT_LUCENE", doctextSummaryResult.get(id).getLeft());
                    }
                    if (scoreGscontextOnDescriptionsLucene) {
                        gs.addContextScore(id, "description_GSTEXT_LUCENE", doctextDescResult.get(id).getLeft());
                    }
                    if (scoreGscontextOnGenerifsJaroWinkler && doctextGenerifResult.get(id).getRight() != null) {
                        gs.addContextScore(id, "generif_GSTEXT_JAROWINKLER", this.jaroWinkler.getScore(doctextGenerifResult.get(id).getRight(), gsContextString));
                    }
                    if (scoreGscontextOnInteractionsJaroWinkler && doctextInteractionResult.get(id).getRight() != null) {
                        gs.addContextScore(id, "interaction_GSTEXT_JAROWINKLER", this.jaroWinkler.getScore(doctextInteractionResult.get(id).getRight(), gsContextString));
                    }
                    if (scoreGscontextOnSummariesJaroWinkler && doctextSummaryResult.get(id).getRight() != null) {
                        gs.addContextScore(id, "summary_GSTEXT_JAROWINKLER", this.jaroWinkler.getScore(doctextSummaryResult.get(id).getRight(), gsContextString));
                    }
                    if (!scoreGscontextOnDescriptionsJaroWinkler || doctextDescResult.get(id).getRight() == null) continue;
                    gs.addContextScore(id, "description_GSTEXT_JAROWINKLER", this.jaroWinkler.getScore(doctextDescResult.get(id).getRight(), gsContextString));
                }
            }
        }
    }

    private Map<String, Map<String, Collection<GeneName>>> findTaggedSynonymsForIds(GeneDocument document, Parameters parameters) {
        int numCandidatesToDisambiguate = parameters.getInt(Configuration.dot("disambiguation", "top_n_candidates_for_reranking"));
        HashMap<String, Map<String, Collection<GeneName>>> tax2id2TaggedSynonyms = new HashMap<String, Map<String, Collection<GeneName>>>();
        for (GeneMention gm : document.getGenesIterable()) {
            for (String tax : gm.getMentionMappingResult().tax2originalCandidates.keySet()) {
                List<SynHit> candidates4tax = gm.getMentionMappingResult().tax2originalCandidates.get(tax);
                if (candidates4tax.isEmpty()) continue;
                for (int i = 0; i < candidates4tax.size() && i < numCandidatesToDisambiguate; ++i) {
                    SynHit candidate = candidates4tax.get(i);
                    Map taggedSynonyms4Tax = tax2id2TaggedSynonyms.compute(tax, (k, v) -> v == null ? new HashMap() : v);
                    Collection synonymsForId = taggedSynonyms4Tax.compute(candidate.getId(), (k, v) -> v == null ? new HashSet() : v);
                    synonymsForId.add(gm.getGeneName());
                }
            }
        }
        return tax2id2TaggedSynonyms;
    }

    public void addEntity2SynonymsJaroWinklerScores(Map<String, Collection<GeneName>> ids2entities, Map<String, Multimap<String, String>> id2synonyms, boolean exactMatches, Map<String, Map<String, Double>> ids2scores, Parameters parameters) {
        boolean normalizeJaroWinklerScores = parameters.getBoolean(Configuration.dot("disambiguation", "normalize_jaro_winkler_synonym_scores"));
        for (String id : id2synonyms.keySet()) {
            double score = 0.0;
            int synonymCount = 0;
            Collection<GeneName> taggedSynonyms = ids2entities.get(id);
            Multimap<String, String> field2names = id2synonyms.get(id);
            for (String field : field2names.keySet()) {
                Collection<String> names = field2names.get(field);
                for (String idSynonym : names) {
                    ++synonymCount;
                    if (taggedSynonyms == null) continue;
                    for (GeneName taggedSynonym : taggedSynonyms) {
                        score += this.jaroWinkler.getScore(idSynonym, taggedSynonym.getNormalizedText());
                    }
                }
                double score4id = (float)score;
                if (normalizeJaroWinklerScores) {
                    score4id /= (double)synonymCount;
                }
                Map scores4id = ids2scores.compute(id, (k, v) -> v != null ? v : new HashMap());
                if (exactMatches) {
                    scores4id.put("mention_EXACT_" + field + "_JAROWINKLER", score4id);
                    continue;
                }
                scores4id.put("mention_APPRX_" + field + "_JAROWINKLER", score4id);
            }
        }
    }

    public Map<String, Multimap<String, String>> addEntity2SynonymsLuceneScores(Map<String, Collection<GeneName>> ids2entities, String queryType, Map<String, Map<String, Double>> ids2scores, Parameters parameters) {
        boolean recordLuceneScores = parameters.getBoolean(Configuration.dot("disambiguation", "use_lucene_exact_scores")) && queryType.equals("exact") || parameters.getBoolean(Configuration.dot("disambiguation", "use_lucene_approx_scores")) && queryType.equals("apprx");
        boolean useLuceneSynonymSet = parameters.getBoolean(Configuration.dot("disambiguation", "use_lucene_synonym_set"));
        boolean useJaroWinklerExactScores = parameters.getBoolean(Configuration.dot("disambiguation", "use_jaro_winkler_exact_scores"));
        boolean useJaroWinklerForKeywords = parameters.getBoolean(Configuration.dot("disambiguation", "use_jaro_winkler_approx_scores"));
        HashMap<String, Multimap<String, String>> id2synonyms = new HashMap<String, Multimap<String, String>>();
        Stream allSynonymStream = ids2entities.values().stream().flatMap(Collection::stream);
        HashMap<String, Map> synonymScores = new HashMap<String, Map>();
        for (int i = 0; i < GeneRecordSynonymsQueryGenerator.ALL_FIELDS.length; ++i) {
            String field = GeneRecordSynonymsQueryGenerator.ALL_FIELDS[i];
            Function<GeneRecordHit, String[]> nameFunc = null;
            if (useJaroWinklerExactScores || useJaroWinklerForKeywords) {
                nameFunc = GeneRecordSynonymsQueryGenerator.ALL_FIELD_FUNCTIONS.get(i);
            }
            Pair<Map<String, Double>, Map<String, Set<String>>> scoresAndNames = this.candidateRetrieval.scoreSynonymsRecordIndex(queryType, ids2entities, nameFunc, new GeneRecordSynonymsQueryGenerator(queryType.equals("exact"), new String[]{field}, BooleanClause.Occur.SHOULD));
            Map<String, Double> scores = scoresAndNames.getLeft();
            if (recordLuceneScores) {
                for (String id : scores.keySet()) {
                    String featureName = "mention_" + field + "_" + queryType;
                    synonymScores.compute(id, (k, v) -> v != null ? v : new HashMap()).put(featureName, scores.get(id));
                }
            }
            Map<String, Set<String>> names = scoresAndNames.getRight();
            for (String id : names.keySet()) {
                id2synonyms.compute(id, (k, v) -> v != null ? v : HashMultimap.create()).putAll(field, (Iterable)names.get(id));
            }
        }
        for (String id : synonymScores.keySet()) {
            Map scoreTypes = (Map)synonymScores.get(id);
            for (String scoreType : scoreTypes.keySet()) {
                Double score = (Double)scoreTypes.get(scoreType);
                if (!(score > 0.0)) continue;
                ids2scores.compute(id, (k, v) -> v != null ? v : new HashMap()).put(scoreType, score);
            }
        }
        return id2synonyms;
    }

    private double getExactMatchThreshold(String prefix, Parameters parameters) {
        return parameters.getDouble(Configuration.dot(prefix, "threshold_exact_matches"));
    }

    private double getApproxMatchThreshold(String prefix, Parameters parameters) {
        return parameters.getDouble(Configuration.dot(prefix, "threshold_approx_matches"));
    }

    @Override
    public SemanticIndex getSemanticIndex() {
        return this.semanticContextIndex;
    }

    @Override
    public void clear() {
        this.classifier = null;
        this.ranker = null;
        if (this.transformerConnection != null && this.transformerConnection.isRunning()) {
            try {
                this.transformerConnection.stop();
            }
            catch (IOException | InterruptedException e) {
                log.error("Could not stop the pipe to the disambiguation transformer process.");
                throw new GeneMapperRuntimeException(e);
            }
            this.transformerConnection = null;
        }
    }

    public int getMaxAgglomerationCandidates() {
        return this.maxAgglomerationCandidates;
    }

    public void setMaxAgglomerationCandidates(int maxCandidates) {
        this.maxAgglomerationCandidates = maxCandidates;
    }

    public void shutdown() {
        this.candidateRetrieval.close();
    }

    public ContextItemsIndex getContextItemsIndex() {
        return this.contextItemsIndex;
    }

    public Parameters getParameters() {
        return null;
    }
}

