/*
 * Decompiled with CFR 0.152.
 */
package de.julielab.genemapper.mappingcores;

import cc.mallet.classify.Classification;
import cc.mallet.classify.Classifier;
import cc.mallet.types.Alphabet;
import cc.mallet.types.FeatureVector;
import cc.mallet.types.Instance;
import cc.mallet.types.InstanceList;
import cc.mallet.types.Label;
import cc.mallet.types.LabelAlphabet;
import cc.mallet.types.LabelVector;
import de.julielab.gene.candidateretrieval.CandidateRetrieval;
import de.julielab.gene.candidateretrieval.GeneRecordHit;
import de.julielab.gene.candidateretrieval.scoring.MaxEntScorerFeaturePipe;
import de.julielab.gene.candidateretrieval.scoring.MaxEntScorerPairExtractor;
import de.julielab.geneexpbase.candidateretrieval.SynHit;
import de.julielab.geneexpbase.classification.FeatureUtils;
import de.julielab.geneexpbase.configuration.Parameters;
import de.julielab.geneexpbase.genemodel.GeneMention;
import de.julielab.geneexpbase.genemodel.MentionMappingResult;
import de.julielab.geneexpbase.scoring.JaccardScorer;
import de.julielab.geneexpbase.scoring.JaroWinklerScorer;
import de.julielab.geneexpbase.scoring.LevenshteinScorer;
import de.julielab.geneexpbase.scoring.NeedlemanWunschScorer;
import de.julielab.geneexpbase.scoring.Scorer;
import de.julielab.geneexpbase.scoring.ScorerAvg;
import de.julielab.geneexpbase.scoring.SmithWatermanScorer;
import de.julielab.geneexpbase.scoring.TokenJaroSimilarityScorer;
import de.julielab.geneexpbase.services.CacheService;
import de.julielab.genemapper.Configuration;
import de.julielab.genemapper.classification.TransformerClassifier;
import de.julielab.genemapper.classification.TransformerDisambiguationDataUtils;
import de.julielab.genemapper.utils.GeneMapperRuntimeException;
import de.julielab.java.utilities.FileUtilities;
import de.julielab.ml.RankLibRanker;
import de.julielab.speciesassignment.mlcandidateranker.FeatureNormalization;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.OptionalDouble;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.BiFunction;
import java.util.function.Function;
import java.util.stream.Collectors;
import javax.inject.Inject;
import org.jetbrains.annotations.NotNull;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class DypsisCandidateRanker {
    private static final Logger log = LoggerFactory.getLogger(DypsisCandidateRanker.class);
    private static final Map<String, Function<GeneRecordHit, String[]>> geneRecordFulltextFieldGetters = Map.of("generif", GeneRecordHit::getGeneRifs, "interaction", GeneRecordHit::getInteractions, "godesc", GeneRecordHit::getGoDescriptors, "summary", GeneRecordHit::getSummaries);
    public static double avgDiffCorrect = 0.0;
    public static double avgRatioCorrect = 0.0;
    public static int correct = 0;
    public static double avgDiffWrong = 0.0;
    public static double avgRatioWrong = 0.0;
    public static double wrong = 0.0;
    private final CandidateRetrieval candidateRetrieval;
    private final MaxEntScorerPairExtractor maxEntScorerPairExtractor = new MaxEntScorerPairExtractor();
    private final de.julielab.geneexpbase.configuration.Configuration configuration;
    private final CacheService cacheService;
    private static final Map<String, TransformerClassifier> transformer = new ConcurrentHashMap<String, TransformerClassifier>();
    private Parameters parameters;

    @Inject
    public DypsisCandidateRanker(de.julielab.geneexpbase.configuration.Configuration configuration, CandidateRetrieval candidateRetrieval, CacheService cacheService) {
        this.configuration = configuration;
        this.candidateRetrieval = candidateRetrieval;
        this.cacheService = cacheService;
    }

    public static void setGoldRelevanceScores(GeneMention gm, MentionMappingResult mmr) {
        for (String tax : mmr.tax2originalCandidates.keySet()) {
            for (SynHit sh : mmr.tax2originalCandidates.get(tax)) {
                boolean correctId = gm.getGeneDocument().isGoldHasOffsets() ? gm.getAllGoldIdsAsList().contains(sh.getId()) : gm.getGeneDocument().getGoldIds().contains(sh.getId());
                sh.setRelevanceScore(correctId ? 1.0f : 0.0f);
            }
        }
    }

    public void saveModel(File modelDestination, Parameters parameters) {
        Parameters parametersToSave = new Parameters(parameters);
        HashSet keys = new HashSet(parametersToSave.keySet());
        for (String key : keys) {
            if (!key.startsWith("species_assignment")) continue;
            parametersToSave.remove(key);
        }
        try (BufferedOutputStream bos = FileUtilities.getOutputStreamToFile(modelDestination);
             ObjectOutputStream oos = new ObjectOutputStream(bos);){
            oos.writeObject(parametersToSave);
        }
        catch (IOException e) {
            throw new GeneMapperRuntimeException(e);
        }
    }

    public void loadModel(File modelSource) throws IOException, ClassNotFoundException {
        this.loadModel(FileUtilities.getInputStreamFromFile(modelSource));
    }

    @NotNull
    public Parameters mergeParameters(Parameters parameterMap) {
        if (parameterMap == null && this.parameters == null) {
            throw new IllegalArgumentException("No parameters were passed to an untrained instance.");
        }
        if (parameterMap == null) {
            return this.parameters;
        }
        if (this.parameters == null) {
            return parameterMap;
        }
        Parameters ret = new Parameters(this.parameters);
        for (String key : parameterMap.keySet()) {
            if (ret.containsKey(key) && !key.startsWith("disambiguation")) continue;
            ret.put(key, parameterMap.get(key));
        }
        return ret;
    }

    public void rankCandidates(GeneMention gm, AtomicInteger gmidCounter, Parameters parameterMap) {
        block32: {
            double maxScalingValue;
            double minScalingValue;
            String rankingAlgorithm;
            boolean scaleResultValues;
            MentionMappingResult mmr;
            Parameters parameters;
            block30: {
                block34: {
                    Map<String, List<SynHit>> tax2candidates;
                    boolean rankByExactMatchFirst;
                    String mlConfigRoot;
                    block33: {
                        double luceneScoreThresholdRatioForReranking;
                        FeatureNormalization featureNormalization;
                        int numRerank;
                        block31: {
                            parameters = this.mergeParameters(parameterMap);
                            mmr = gm.getMentionMappingResult();
                            String matchType = mmr.hasExactCandidateMatch() ? "exactmatch" : "approxmatch";
                            mlConfigRoot = Configuration.dot("candidate_retrieval", "ml", matchType);
                            scaleResultValues = parameters.getBoolean(Configuration.dot(mlConfigRoot, "scale_result_score"));
                            numRerank = 100;
                            rankByExactMatchFirst = parameters.getBoolean(Configuration.dot("candidate_retrieval", "mlranking.sort_by_exact_match_first"), false);
                            featureNormalization = new FeatureNormalization(Configuration.dot(mlConfigRoot));
                            tax2candidates = mmr.tax2originalCandidates;
                            rankingAlgorithm = (String)parameters.get(Configuration.dot(mlConfigRoot, "algorithm"));
                            if (mmr.tax2lexicallyRerankedCandidates == null) {
                                mmr.tax2lexicallyRerankedCandidates = new HashMap<String, List<SynHit>>();
                            }
                            minScalingValue = parameters.getDouble(Configuration.dot(mlConfigRoot, "min_result_score"), -1.0);
                            maxScalingValue = parameters.getDouble(Configuration.dot(mlConfigRoot, "max_result_score"), -1.0);
                            RankLibRanker ranker = (RankLibRanker)parameters.getOrDefault((Object)Configuration.dot(mlConfigRoot, "ranker"), (Object)null);
                            if (rankingAlgorithm == null || "lucene".equals(rankingAlgorithm)) break block30;
                            if (!"transformer".equals(rankingAlgorithm)) {
                                this.setCandidateRankingInstances(gm, mmr, gmidCounter, parameters, parameterMap);
                            }
                            luceneScoreThresholdRatioForReranking = parameters.getDouble(Configuration.dot(mlConfigRoot, "lucene_score_threshold_for_reranking"), 0.0);
                            if (mmr.tax2candidateRankingInstances == null || !rankingAlgorithm.equals("ltr")) break block31;
                            if (ranker == null) break block32;
                            for (String taxId : mmr.tax2candidateRankingInstances.keySet()) {
                                List rerankedCandidates4Tax = mmr.tax2lexicallyRerankedCandidates.compute(taxId, (k, v) -> v != null ? v : new ArrayList());
                                InstanceList instances4tax = mmr.tax2candidateRankingInstances.get(taxId);
                                if (!instances4tax.isEmpty()) {
                                    double bestLuceneScore = ((SynHit)((Instance)instances4tax.get(0)).getProperty("sh")).getLuceneScore();
                                    int numInstances = instances4tax.size();
                                    instances4tax = instances4tax.subList(0, Math.min(instances4tax.size(), numRerank));
                                    instances4tax = instances4tax.stream().filter(inst -> (double)((SynHit)inst.getProperty("sh")).getLuceneScore() / bestLuceneScore > luceneScoreThresholdRatioForReranking).collect(Collectors.toCollection(() -> new InstanceList(ranker.getAlphabet(), ranker.getTargetAlphabet())));
                                    if (numInstances > 0 && instances4tax.isEmpty()) {
                                        throw new IllegalStateException("There were " + numInstances + " re-ranking instances but after Lucene threshold filtering, none remained. This should not happen. The gene mention was: " + gm);
                                    }
                                    if (log.isTraceEnabled()) {
                                        log.trace("Re-ranking {} of {} instances (numRerank: {})", instances4tax.size(), mmr.tax2candidateRankingInstances.get(taxId).size(), numRerank);
                                    }
                                    featureNormalization.applyFeatureNormalization(instances4tax, parameters);
                                    InstanceList rankedInstances = ranker.rank(instances4tax);
                                    if (rankByExactMatchFirst) {
                                        rankedInstances.sort((i1, i2) -> {
                                            SynHit s1 = (SynHit)i1.getProperty("sh");
                                            SynHit s2 = (SynHit)i2.getProperty("sh");
                                            if (s1.isExactMatch() ^ s2.isExactMatch()) {
                                                if (s1.isExactMatch()) {
                                                    return -1;
                                                }
                                                return 1;
                                            }
                                            return Double.compare((Double)i2.getProperty("score"), (Double)i1.getProperty("score"));
                                        });
                                    }
                                    rankedInstances.stream().map(i -> {
                                        SynHit sh = (SynHit)i.getProperty("sh");
                                        double rankingScore = (Double)i.getProperty("score");
                                        if (scaleResultValues) {
                                            rankingScore = FeatureUtils.doMinMaxScaling(rankingScore, minScalingValue, maxScalingValue, 0.0, 1.0);
                                        }
                                        sh.setLexicalScore(rankingScore);
                                        if (sh == null) {
                                            throw new IllegalStateException("The SynHit was not set to the instance.");
                                        }
                                        return sh;
                                    }).forEach(rerankedCandidates4Tax::add);
                                    continue;
                                }
                                rerankedCandidates4Tax.add(MentionMappingResult.REJECTION);
                            }
                            break block32;
                        }
                        if (mmr.tax2candidateRankingInstances == null || !rankingAlgorithm.equals("maxent") && !rankingAlgorithm.equals("svm")) break block33;
                        Classifier classifier = (Classifier)parameters.getOrDefault((Object)Configuration.dot(mlConfigRoot, "classifier"), (Object)null);
                        if (classifier == null) break block32;
                        Label correctLabel = classifier.getLabelAlphabet().lookupLabel(Float.valueOf(1.0f), false);
                        for (String taxId : mmr.tax2candidateRankingInstances.keySet()) {
                            List rerankedCandidates4Tax = mmr.tax2lexicallyRerankedCandidates.compute(taxId, (k, v) -> v != null ? v : new ArrayList());
                            List<SynHit> candidates4tax = tax2candidates.get(taxId);
                            if (!candidates4tax.isEmpty()) {
                                int numInstances;
                                double bestLuceneScore = candidates4tax.get(0).getLuceneScore();
                                InstanceList instances4tax = mmr.tax2candidateRankingInstances.get(taxId);
                                if (mmr.hasExactCandidateMatch()) {
                                    int i3;
                                    for (i3 = 0; i3 < candidates4tax.size() && candidates4tax.get(i3).isExactMatch(); ++i3) {
                                    }
                                    numRerank = i3;
                                }
                                if ((numInstances = (instances4tax = instances4tax.subList(0, Math.min(instances4tax.size(), numRerank))).size()) > 0 && instances4tax.isEmpty()) {
                                    throw new IllegalStateException("There were " + numInstances + " re-ranking instances but after Lucene threshold filtering, none remained. This should not happen. The gene mention was: " + gm);
                                }
                                if (log.isTraceEnabled()) {
                                    log.trace("Re-ranking {} of {} instances (numRerank: {}, Lucene score ratio threshold: {})", instances4tax.size(), mmr.tax2candidateRankingInstances.get(taxId).size(), numRerank, luceneScoreThresholdRatioForReranking);
                                }
                                featureNormalization.applyFeatureNormalization(instances4tax, parameters);
                                ArrayList<Classification> classifications = classifier.classify(instances4tax);
                                for (Classification classification : classifications) {
                                    LabelVector labelVector = classification.getLabelVector();
                                    int correctLabelLocation = labelVector.location(correctLabel.getEntry());
                                    int correctLabelIndex = labelVector.indexAtLocation(correctLabelLocation);
                                    double rankingScore = labelVector.value(correctLabelIndex);
                                    if (scaleResultValues) {
                                        rankingScore = FeatureUtils.doMinMaxScaling(rankingScore, minScalingValue, maxScalingValue, 0.0, 1.0);
                                    }
                                    SynHit sh = (SynHit)classification.getInstance().getProperty("sh");
                                    sh.setLexicalScore(rankingScore);
                                    rerankedCandidates4Tax.add(sh);
                                }
                                if (rankByExactMatchFirst) {
                                    rerankedCandidates4Tax.sort(Comparator.comparing(SynHit::isExactMatch).thenComparingDouble(SynHit::getLexicalScore).reversed());
                                    continue;
                                }
                                rerankedCandidates4Tax.sort(Comparator.comparingDouble(SynHit::getLexicalScore).reversed());
                                continue;
                            }
                            rerankedCandidates4Tax.add(MentionMappingResult.REJECTION);
                        }
                        break block32;
                    }
                    if (!rankingAlgorithm.equals("transformer")) break block34;
                    TransformerClassifier transformer = this.getTransformerClassifier(mlConfigRoot, parameters);
                    String gmMarkedDocumentText = TransformerDisambiguationDataUtils.getGmMarkedDocumentText(gm, 256, true, false);
                    for (String taxId : mmr.tax2originalCandidates.keySet()) {
                        List rerankedCandidates4Tax = mmr.tax2lexicallyRerankedCandidates.compute(taxId, (k, v) -> v != null ? v : new ArrayList());
                        List<SynHit> candidates4tax = tax2candidates.get(taxId);
                        ArrayList<String> candidateQueryStrings = new ArrayList<String>(candidates4tax.size());
                        ArrayList<String> gmMarkedDocumentTexts = new ArrayList<String>(candidates4tax.size());
                        for (SynHit candidate : candidates4tax) {
                            try {
                                String candidateQueryString = TransformerDisambiguationDataUtils.getCandidateQueryString(candidate, this.candidateRetrieval);
                                candidateQueryStrings.add(candidateQueryString);
                                gmMarkedDocumentTexts.add(gmMarkedDocumentText);
                            }
                            catch (ExecutionException e) {
                                throw new GeneMapperRuntimeException(e);
                            }
                        }
                        try {
                            List<Float> candidateScores = transformer.classifySentencePairs(candidateQueryStrings, gmMarkedDocumentTexts);
                            for (int i4 = 0; i4 < candidates4tax.size(); ++i4) {
                                SynHit sh = candidates4tax.get(i4);
                                sh.setContextualScore(sh.getLexicalScore());
                                sh.setLexicalScore(candidateScores.get(i4).floatValue());
                                rerankedCandidates4Tax.add(sh);
                            }
                            if (rankByExactMatchFirst) {
                                rerankedCandidates4Tax.sort(Comparator.comparing(SynHit::isExactMatch).thenComparingDouble(SynHit::getLexicalScore).reversed());
                                continue;
                            }
                            rerankedCandidates4Tax.sort(Comparator.comparingDouble(SynHit::getLexicalScore).reversed());
                        }
                        catch (IOException e) {
                            throw new GeneMapperRuntimeException(e);
                        }
                    }
                    break block32;
                }
                if (mmr.tax2candidateRankingInstances != null) break block32;
                Map<String, List<SynHit>> originalCandidates = mmr.tax2originalCandidates;
                for (String tax : originalCandidates.keySet()) {
                    List<SynHit> candidates4tax = originalCandidates.get(tax);
                    mmr.tax2lexicallyRerankedCandidates.put(tax, new ArrayList<SynHit>(candidates4tax));
                }
                break block32;
            }
            if (rankingAlgorithm.equals("lucene")) {
                Map<String, List<SynHit>> originalCandidates = mmr.tax2originalCandidates;
                for (String tax : originalCandidates.keySet()) {
                    List<SynHit> candidates4tax = originalCandidates.get(tax);
                    if (scaleResultValues) {
                        for (SynHit sh : candidates4tax) {
                            double scaledLuceneScore = FeatureUtils.doMinMaxScaling(sh.getLexicalScore(), minScalingValue, maxScalingValue, 0.0, 1.0);
                            sh.setLexicalScore(scaledLuceneScore);
                        }
                    }
                    if ((candidates4tax = this.getOriginalExactMatches(gm, parameters, candidates4tax)).isEmpty()) {
                        candidates4tax.add(MentionMappingResult.REJECTION);
                    }
                    mmr.tax2lexicallyRerankedCandidates.put(tax, new ArrayList<SynHit>(candidates4tax));
                }
            }
        }
    }

    private TransformerClassifier getTransformerClassifier(String mlConfigRoot, Parameters parameters) {
        String modelPath = parameters.getString(Configuration.dot(mlConfigRoot, "transformer_ranking_model"));
        TransformerClassifier transformer = DypsisCandidateRanker.transformer.compute(mlConfigRoot, (k, v) -> {
            try {
                if (v != null && !v.getModelPath().equals(modelPath)) {
                    v.shutdown();
                    v = null;
                }
                return v == null ? new TransformerClassifier(this.cacheService, modelPath, this.configuration) : v;
            }
            catch (IOException e) {
                throw new GeneMapperRuntimeException(e);
            }
        });
        return transformer;
    }

    public List<SynHit> getOriginalExactMatches(GeneMention gm, Parameters parameters, List<SynHit> candidates4tax) {
        List<SynHit> originalNamesExactHits;
        List exactMatches;
        if (parameters.getBoolean(Configuration.dot("candidate_retrieval", "sort_exact_matches_by_original_similarity")) && (exactMatches = candidates4tax.stream().filter(SynHit::isExactMatch).collect(Collectors.toList())).size() > 2 && (originalNamesExactHits = this.candidateRetrieval.getOriginalNamesIndexRecords(exactMatches.stream().map(SynHit::getId).collect(Collectors.toList()), gm.getGeneName()).stream().filter(SynHit::isExactMatch).collect(Collectors.toList())).size() == 1) {
            return originalNamesExactHits;
        }
        return candidates4tax;
    }

    private void setCandidateRankingInstances(GeneMention gm, MentionMappingResult mmr, AtomicInteger gmidCounter, Parameters parameterMap, Parameters externalParameters) {
        if (mmr.hasExactCandidateMatch() && parameterMap.getString(Configuration.dot("candidate_retrieval", "ml", "exactmatch", "algorithm")).equals("lucene")) {
            return;
        }
        if (!mmr.hasExactCandidateMatch() && parameterMap.getString(Configuration.dot("candidate_retrieval", "ml", "approxmatch", "algorithm")).equals("lucene")) {
            return;
        }
        if (!gm.hasGoldMentions()) {
            return;
        }
        String matchType = mmr.hasExactCandidateMatch() ? "exactmatch" : "approxmatch";
        String mlConfigRoot = Configuration.dot("candidate_retrieval", "ml", matchType);
        RankLibRanker ranker = (RankLibRanker)parameterMap.getOrDefault((Object)Configuration.dot(mlConfigRoot, "ranker"), (Object)null);
        Classifier classifier = (Classifier)parameterMap.getOrDefault((Object)Configuration.dot(mlConfigRoot, "classifier"), (Object)null);
        boolean useJaroWinklerFeatures = parameterMap.getBoolean(Configuration.dot(mlConfigRoot, "use_jarowinkler_features"));
        boolean useTokenJaroFeatures = parameterMap.getBoolean(Configuration.dot(mlConfigRoot, "use_tokenjaro_features"));
        boolean useLevenshteinFeatures = parameterMap.getBoolean(Configuration.dot(mlConfigRoot, "use_levenshtein_features"));
        boolean useSmithWatermanFeatures = parameterMap.getBoolean(Configuration.dot(mlConfigRoot, "use_smithwaterman_features"));
        boolean useNeedlemanWunschFeatures = parameterMap.getBoolean(Configuration.dot(mlConfigRoot, "use_needlemanwunsch_features"));
        boolean useJaccardFeatures = parameterMap.getBoolean(Configuration.dot(mlConfigRoot, "use_jaccard_features"));
        boolean useTfidfFeatures = parameterMap.getBoolean(Configuration.dot(mlConfigRoot, "use_tfidf_features"));
        boolean useScorerAverage = parameterMap.getBoolean(Configuration.dot(mlConfigRoot, "use_scorer_average"), false);
        boolean useTokenFeaturesForLexicalReranking = parameterMap.getBoolean(Configuration.dot(mlConfigRoot, "use_token_features"));
        boolean useRecordContextJaroWinklerFeatures = parameterMap.getBoolean(Configuration.dot(mlConfigRoot, "recordcontext", "use_jarowinkler_features"));
        boolean useRecordContextSmithWatermanFeatures = parameterMap.getBoolean(Configuration.dot(mlConfigRoot, "recordcontext", "use_smithwaterman_features"));
        boolean useRecordContextJaccardFeatures = parameterMap.getBoolean(Configuration.dot(mlConfigRoot, "recordcontext", "use_jaccard_features"));
        boolean useRecordContextTfidfFeatures = parameterMap.getBoolean(Configuration.dot(mlConfigRoot, "recordcontext", "use_tfidf_features"));
        boolean useRecordContextScorerAverage = parameterMap.getBoolean(Configuration.dot(mlConfigRoot, "recordcontext", "use_scorer_average"), false);
        boolean useGenerif4RecordContext = parameterMap.getBoolean(Configuration.dot(mlConfigRoot, "recordcontext", "use_generif"));
        boolean useInteraction4RecordContext = parameterMap.getBoolean(Configuration.dot(mlConfigRoot, "recordcontext", "use_interaction"));
        boolean useGodesc4RecordContext = parameterMap.getBoolean(Configuration.dot(mlConfigRoot, "recordcontext", "use_godesc"));
        boolean useSummary4RecordContext = parameterMap.getBoolean(Configuration.dot(mlConfigRoot, "recordcontext", "use_summary"));
        String contextGeneFeatureGeneration = parameterMap.getString(Configuration.dot(mlConfigRoot, "context_gene_feature_generation"));
        boolean useAppositionsOnGeneContextFeatures = parameterMap.getBoolean(Configuration.dot(mlConfigRoot, "recordcontext", "use_appositions_on_gene_context_features"));
        int numRerank = parameterMap.getInt(Configuration.dot(mlConfigRoot, "top_n_candidates_for_reranking"));
        int numRerankingDisambiguation = parameterMap.getInt(Configuration.dot("disambiguation", "top_n_candidates_for_reranking"), 0);
        boolean isTrainMode = parameterMap.getBoolean(Configuration.dot("candidate_retrieval", "train_mode"), false);
        JaccardScorer jaccard = new JaccardScorer();
        int effectiveNumRerank = ranker == null && classifier == null ? Integer.MAX_VALUE : numRerank;
        Alphabet dataAlphabet = (Alphabet)parameterMap.getOrDefault((Object)Configuration.dot(mlConfigRoot, "data_alphabet"), (Object)null);
        if (dataAlphabet == null) {
            assert (!parameterMap.getString(Configuration.dot(mlConfigRoot, "algorithm")).equals("lucene")) : "There is no alphabet for lexical ranking features but re-ranking is not disabled for machine learning configuration root " + mlConfigRoot;
            dataAlphabet = (Alphabet)parameterMap.getOrDefault((Object)Configuration.dot("disambiguation", "ml", matchType, "data_alphabet"), (Object)null);
            if (dataAlphabet == null) {
                dataAlphabet = new Alphabet();
                externalParameters.put(Configuration.dot("disambiguation", "ml", matchType, "data_alphabet"), dataAlphabet);
            }
        }
        LabelAlphabet targetAlphabet = (LabelAlphabet)parameterMap.getOrDefault((Object)Configuration.dot(mlConfigRoot, "target_alphabet"), (Object)null);
        MaxEntScorerFeaturePipe tokenSimilarityPipe = useTokenFeaturesForLexicalReranking ? new MaxEntScorerFeaturePipe(dataAlphabet) : null;
        List<Scorer> scorers = new ArrayList<Scorer>();
        List<Scorer> recordContextScorers = new ArrayList<Scorer>();
        if (useJaroWinklerFeatures) {
            scorers.add(new JaroWinklerScorer());
        }
        if (useTokenJaroFeatures) {
            scorers.add(new TokenJaroSimilarityScorer());
        }
        if (useLevenshteinFeatures) {
            scorers.add(new LevenshteinScorer());
        }
        if (useSmithWatermanFeatures) {
            scorers.add(new SmithWatermanScorer());
        }
        if (useNeedlemanWunschFeatures) {
            scorers.add(new NeedlemanWunschScorer());
        }
        if (useJaccardFeatures) {
            scorers.add(new JaccardScorer());
        }
        if (useTfidfFeatures) {
            scorers.add(this.candidateRetrieval.getTFIDFOnGeneRecordNames());
        }
        if (useScorerAverage) {
            scorers = List.of(new ScorerAvg(scorers));
        }
        if (useRecordContextJaccardFeatures) {
            recordContextScorers.add(new JaccardScorer());
        }
        if (useRecordContextJaroWinklerFeatures) {
            recordContextScorers.add(new JaroWinklerScorer());
        }
        if (useRecordContextSmithWatermanFeatures) {
            recordContextScorers.add(new SmithWatermanScorer());
        }
        if (useRecordContextTfidfFeatures) {
            recordContextScorers.add(this.candidateRetrieval.getTFIDFOnGeneRecordNames());
        }
        if (useRecordContextScorerAverage) {
            recordContextScorers = List.of(new ScorerAvg(recordContextScorers));
        }
        String normalizedGeneName = gm.getNormalizedText();
        List<String> geneNameContext = !contextGeneFeatureGeneration.equals("none") ? (Collection)gm.getGeneDocument().getNonRejectedGenes().map(GeneMention::getNormalizedText).filter(name -> !name.equals(normalizedGeneName)).collect(Collectors.toSet()) : Collections.emptyList();
        String[] multiValueFields = new String[]{"Fullname", "Synonym", "Otherdesignation", "Xref", "Uniprotname", "Biothesaurus"};
        ArrayList<String> recordContextMultiValueFields = new ArrayList<String>();
        if (useGenerif4RecordContext) {
            recordContextMultiValueFields.add("generif");
        }
        if (useInteraction4RecordContext) {
            recordContextMultiValueFields.add("interaction");
        }
        if (useGodesc4RecordContext) {
            recordContextMultiValueFields.add("godesc");
        }
        if (useSummary4RecordContext) {
            recordContextMultiValueFields.add("summary");
        }
        Map<String, List<SynHit>> tax2originalCandidates = mmr.tax2originalCandidates;
        HashSet dataTrainingStrings = new HashSet();
        for (String tax : tax2originalCandidates.keySet()) {
            InstanceList instances = new InstanceList(dataAlphabet, targetAlphabet);
            List<SynHit> candidates4tax = tax2originalCandidates.get(tax);
            if (!recordContextScorers.isEmpty() || useAppositionsOnGeneContextFeatures) {
                this.candidateRetrieval.setFulltextFieldsToRecordHits(candidates4tax, recordContextMultiValueFields);
            }
            int candidateCounter = 0;
            int topN = Math.min(candidates4tax.size(), effectiveNumRerank);
            log.trace("Creating instances for {} of {} candidates for tax ID {} in {} mode (rumRerank: {}) for gene {}.", topN, candidates4tax.size(), tax, ranker == null && classifier == null ? "train" : "test", numRerank, gm.getText());
            boolean gotPositive = false;
            boolean gotNegative = false;
            for (int i = 0; !(i >= topN || isTrainMode && gotPositive && gotNegative); ++i) {
                boolean correctId;
                SynHit candidate = candidates4tax.get(i);
                GeneRecordHit sh = (GeneRecordHit)candidate;
                boolean bl = correctId = gm.getGeneDocument().isGoldHasOffsets() ? gm.getAllGoldIdsAsList().contains(sh.getId()) : gm.getGeneDocument().getGoldIds().contains(sh.getId());
                if (isTrainMode && !correctId && gotNegative) continue;
                gotPositive = gotPositive || correctId;
                gotNegative = gotNegative || !correctId;
                float labelValue = correctId ? 1.0f : 0.0f;
                Label l = targetAlphabet != null ? targetAlphabet.lookupLabel(Float.valueOf(labelValue)) : null;
                HashMap<String, Double> featureMap = new HashMap<String, Double>();
                Object[] tokenSimilarityData = new Object[]{featureMap, candidate, Float.valueOf(labelValue), "TaggedGene"};
                Object data = useTokenFeaturesForLexicalReranking ? tokenSimilarityData : featureMap;
                Instance instance = new Instance(data, l, "gm" + gmidCounter + "_doc" + gm.getDocId() + "_" + tax, candidateCounter++);
                if (!useTokenFeaturesForLexicalReranking) {
                    instance.setProperty("sh", candidate);
                }
                for (Scorer scorer : scorers) {
                    this.createScorerBasedFeatures(featureMap, normalizedGeneName, "TaggedGene", scorer, multiValueFields, sh);
                }
                for (Scorer scorer : recordContextScorers) {
                    this.createRecordContextBasedFeatures(featureMap, normalizedGeneName, "TaggedGene@RecordContext", scorer, contextGeneFeatureGeneration, recordContextMultiValueFields, sh);
                }
                if (useAppositionsOnGeneContextFeatures && gm.getGeneName().getAppositionContexts() != null && !gm.getGeneName().getAppositionContexts().isEmpty()) {
                    for (Scorer scorer : recordContextScorers) {
                        this.createRecordContextBasedFeatures(featureMap, gm.getGeneName().getAppositionContexts().get(0).getNormalizedText(), "Apposition@RecordContext", scorer, contextGeneFeatureGeneration, recordContextMultiValueFields, sh);
                    }
                }
                if (useTokenFeaturesForLexicalReranking) {
                    tokenSimilarityPipe.pipe(instance);
                }
                tokenSimilarityData[3] = "ContextGene";
                for (String name2 : geneNameContext) {
                    for (Scorer scorer : scorers) {
                        this.createScorerBasedFeatures(featureMap, name2, "ContextGene", scorer, multiValueFields, sh);
                    }
                    if (!useTokenFeaturesForLexicalReranking) continue;
                    instance.setData(tokenSimilarityData);
                    tokenSimilarityPipe.pipe(instance);
                }
                FeatureVector vector = this.convertToken2Vector(featureMap, dataAlphabet);
                instance.setData(vector);
                instance.setProperty("sh", sh);
                instances.add(instance);
            }
            log.trace("Done with feature creation for {} and tax ID {}.", (Object)gm.getText(), (Object)tax);
            if (mmr.tax2candidateRankingInstances == null) {
                mmr.tax2candidateRankingInstances = new HashMap<String, InstanceList>();
            }
            mmr.tax2candidateRankingInstances.put(tax, instances);
            if (!isTrainMode) continue;
            if (!parameterMap.containsKey(Configuration.dot(mlConfigRoot, "training_instances"))) {
                parameterMap.put(Configuration.dot(mlConfigRoot, "training_instances"), new HashMap());
            }
            parameterMap.getMap(Configuration.dot(mlConfigRoot, "training_instances")).merge(tax, instances, (l1, l2) -> {
                l1.addAll((Collection<? extends Instance>)l2);
                return l1;
            });
        }
    }

    private void createMentionBasedFeatures(GeneMention gm, String tax, Map<String, Double> featureMap) {
        if (gm.hasExactMatchInTax(tax)) {
            featureMap.put("IS_EXACT_MATCH", 1.0);
        }
        if (!gm.getGeneDocument().getOverlappingAcronymLongforms(gm.getOffsets()).isEmpty()) {
            featureMap.put("IS_ACRONYM_LONGFORM", 1.0);
        }
    }

    @NotNull
    private FeatureVector convertToken2Vector(Map<String, Double> featureVals, Alphabet dataAlphabet) {
        featureVals.keySet().forEach(dataAlphabet::lookupIndex);
        int[] fi = new int[featureVals.size()];
        double[] fv = new double[featureVals.size()];
        int pos = 0;
        for (int j = 0; j < dataAlphabet.size(); ++j) {
            Object featureName = dataAlphabet.lookupObject(j);
            Double value = featureVals.get(featureName);
            if (value == null) continue;
            fi[pos] = j;
            fv[pos] = value;
            ++pos;
        }
        FeatureVector vector = new FeatureVector(dataAlphabet, fi, fv);
        return vector;
    }

    private void createScorerBasedFeatures(Map<String, Double> featureMap, String normalizedGeneName, String featureSuffix, Scorer scorer, String[] multiValueFields, GeneRecordHit sh) {
        String[] keys = new String[20];
        double[] featureValues = new double[20];
        String t = normalizedGeneName;
        double symbolScore = scorer.getScore(t, sh.getSymbol());
        double nomenclatureScore = sh.getSymbolFromNomenclature() != null ? scorer.getScore(t, sh.getSymbolFromNomenclature()) : 0.0;
        double[] fullnameScores = Arrays.stream(sh.getFullNames()).mapToDouble(s2 -> scorer.getScore(t, (String)s2)).toArray();
        double[] synonymScores = Arrays.stream(sh.getSynonyms()).mapToDouble(s2 -> scorer.getScore(t, (String)s2)).toArray();
        double[] otherdesignationScores = Arrays.stream(sh.getOtherDesignations()).mapToDouble(s2 -> scorer.getScore(t, (String)s2)).toArray();
        double[] xrefScores = Arrays.stream(sh.getXrefs()).mapToDouble(s2 -> scorer.getScore(t, (String)s2)).toArray();
        double[] uniprotnameScores = Arrays.stream(sh.getUniprotNames()).mapToDouble(s2 -> scorer.getScore(t, (String)s2)).toArray();
        double[] biothesaurusScores = Arrays.stream(sh.getBioThesaurusNames()).mapToDouble(s2 -> scorer.getScore(t, (String)s2)).toArray();
        Arrays.sort(fullnameScores);
        Arrays.sort(synonymScores);
        Arrays.sort(otherdesignationScores);
        Arrays.sort(uniprotnameScores);
        Arrays.sort(biothesaurusScores);
        double[] fullnameStats = this.getMultiNameFieldScoreStatistics(fullnameScores);
        double[] synonymStats = this.getMultiNameFieldScoreStatistics(synonymScores);
        double[] otherdesignationStats = this.getMultiNameFieldScoreStatistics(otherdesignationScores);
        double[] xrefStats = this.getMultiNameFieldScoreStatistics(xrefScores);
        double[] uniprotnameStats = this.getMultiNameFieldScoreStatistics(uniprotnameScores);
        double[] biothesaurusStats = this.getMultiNameFieldScoreStatistics(biothesaurusScores);
        int j = 0;
        keys[j++] = "symbolScore" + featureSuffix + scorer.info();
        keys[j++] = "nomenclatureScore" + featureSuffix + scorer.info();
        for (String multiValueField : multiValueFields) {
            keys[j++] = "max" + multiValueField + "Score" + featureSuffix + scorer.info();
            keys[j++] = "mean" + multiValueField + "Score" + featureSuffix + scorer.info();
            keys[j++] = "median" + multiValueField + "Score" + featureSuffix + scorer.info();
        }
        j = 0;
        featureValues[j++] = symbolScore;
        featureValues[j++] = nomenclatureScore;
        System.arraycopy(fullnameStats, 0, featureValues, j, 3);
        System.arraycopy(synonymStats, 0, featureValues, j += 3, 3);
        System.arraycopy(otherdesignationStats, 0, featureValues, j += 3, 3);
        System.arraycopy(xrefStats, 0, featureValues, j += 3, 3);
        System.arraycopy(uniprotnameStats, 0, featureValues, j += 3, 3);
        System.arraycopy(biothesaurusStats, 0, featureValues, j += 3, 3);
        for (int i = 0; i < keys.length; ++i) {
            String key = keys[i];
            double value = featureValues[i];
            if (value == 0.0) continue;
            featureMap.merge(key, value, Math::max);
        }
    }

    private void createRecordContextBasedFeatures(Map<String, Double> featureMap, String normalizedGeneName, String featureSuffix, Scorer scorer, String contextGeneFeatureGeneration, List<String> fulltextFieldsToScore, GeneRecordHit sh) {
        String t = normalizedGeneName;
        List scoresOnFulltextFields = fulltextFieldsToScore.stream().map(fulltextField -> geneRecordFulltextFieldGetters.get(fulltextField).apply(sh)).map(value -> value != null ? Arrays.stream(value).mapToDouble(s2 -> scorer.getScore(t, (String)s2)).toArray() : new double[]{}).collect(Collectors.toList());
        List scoreStats = scoresOnFulltextFields.stream().map(this::getMultiNameFieldScoreStatistics).collect(Collectors.toList());
        BiFunction<Double, Double, Double> contextGeneScoreAggregationFunction = contextGeneFeatureGeneration.equals("sum") ? Double::sum : Double::max;
        for (int i = 0; i < fulltextFieldsToScore.size(); ++i) {
            String fulltextField2 = fulltextFieldsToScore.get(i);
            double[] scoreStats4Field = (double[])scoreStats.get(i);
            for (int k = 0; k < scoreStats4Field.length; ++k) {
                String key = contextGeneFeatureGeneration + fulltextField2 + "Score" + featureSuffix + scorer.info();
                double value2 = scoreStats4Field[k];
                if (value2 == 0.0) continue;
                featureMap.merge(key, value2, contextGeneScoreAggregationFunction);
            }
        }
    }

    private double[] getMultiNameFieldScoreStatistics(double[] nameScores) {
        OptionalDouble fullnameMax = Arrays.stream(nameScores).max();
        OptionalDouble fullnameMean = Arrays.stream(nameScores).average();
        double fullnameMedian = 0.0;
        if (nameScores.length > 0) {
            fullnameMedian = nameScores.length % 2 == 1 ? nameScores[(nameScores.length - 1) / 2] : (nameScores[(nameScores.length - 1) / 2] + nameScores[(int)Math.ceil((double)nameScores.length / 2.0)]) / 2.0;
        }
        return new double[]{fullnameMax.isPresent() ? fullnameMax.getAsDouble() : 0.0, fullnameMean.isPresent() ? fullnameMean.getAsDouble() : 0.0, fullnameMedian};
    }

    public void clear() {
        this.parameters = null;
    }

    public void loadModel(InputStream modelIs) throws IOException, ClassNotFoundException {
        try (ObjectInputStream ois = new ObjectInputStream(modelIs);){
            this.parameters = (Parameters)ois.readObject();
        }
        if (this.parameters != null && this.parameters.containsKey(Configuration.dot("candidate_retrieval", "data_alphabet"))) {
            ((Alphabet)this.parameters.get(Configuration.dot("candidate_retrieval", "data_alphabet"))).stopGrowth();
        }
    }

    public Parameters getModelParameters() {
        return this.parameters;
    }
}

