/*
 * Decompiled with CFR 0.152.
 */
package de.julielab.speciesassignment.hpo;

import cc.mallet.classify.Classifier;
import cc.mallet.pipe.Pipe;
import cc.mallet.pipe.SerialPipes;
import cc.mallet.types.AlphabetCarrying;
import cc.mallet.types.InstanceList;
import com.google.common.collect.Sets;
import com.google.inject.Guice;
import com.google.inject.Injector;
import com.lahodiuk.ahocorasick.AhoCorasickOptimized;
import de.julielab.evaluation.entities.EntityEvaluationResult;
import de.julielab.evaluation.entities.EntityEvaluationResults;
import de.julielab.evaluation.entities.EntityEvaluator;
import de.julielab.evaluation.entities.EvaluationData;
import de.julielab.evaluation.entities.EvaluationDataEntry;
import de.julielab.evaluation.entities.EvaluationMode;
import de.julielab.gene.candidateretrieval.ioc.IndexModule;
import de.julielab.geneexpbase.GeneIdCorrectness;
import de.julielab.geneexpbase.candidateretrieval.CandidateRetrieval;
import de.julielab.geneexpbase.candidateretrieval.QueryGenerator;
import de.julielab.geneexpbase.configuration.Parameters;
import de.julielab.geneexpbase.genemodel.GeneDocument;
import de.julielab.geneexpbase.genemodel.GeneMention;
import de.julielab.geneexpbase.genemodel.SpeciesMention;
import de.julielab.geneexpbase.hpo.CachedModel;
import de.julielab.geneexpbase.hpo.HpoException;
import de.julielab.geneexpbase.hpo.HpoInstance;
import de.julielab.geneexpbase.hpo.HpoRoute;
import de.julielab.geneexpbase.hpo.InspectionFilePrinter;
import de.julielab.geneexpbase.hpo.SplitType;
import de.julielab.geneexpbase.scoring.JaroWinklerScorer;
import de.julielab.ml.RankLibRanker;
import de.julielab.speciesassignment.Configuration;
import de.julielab.speciesassignment.GeneSpeciesAssigner;
import de.julielab.speciesassignment.SpeciesAssignmentException;
import de.julielab.speciesassignment.SpeciesAssignmentRuntimeException;
import de.julielab.speciesassignment.candidateretrieval.SimpleGeneNameQueryGenerator;
import de.julielab.speciesassignment.evaluation.SpeciesAssignmentCorrectnessRenderer;
import de.julielab.speciesassignment.evaluation.SpeciesCountDocument;
import de.julielab.speciesassignment.evaluation.SpeciesCountDocumentPartitioning;
import de.julielab.speciesassignment.evaluation.SpeciesCrossValPartition;
import de.julielab.speciesassignment.ioc.SpeciesAssignmentModule;
import de.julielab.speciesassignment.mlcandidateranker.FeatureNormalization;
import de.julielab.speciesassignment.mlcandidateranker.MLSpeciesAssigner;
import de.julielab.speciesassignment.mlcandidateranker.SpeciesAPrioriScorePipe;
import de.julielab.speciesassignment.mlcandidateranker.SpeciesCandidates2TokenPipe;
import de.julielab.speciesassignment.mlcandidateranker.SpeciesInstanceTools;
import de.julielab.speciesassignment.spi.SpeciesAssignmentFilter;
import de.julielab.speciesassignment.spi.SpeciesDocumentScoringService;
import de.julielab.speciesassignment.spi.SynonymSpeciesCooccurrenceService;
import java.io.File;
import java.io.IOException;
import java.io.Serializable;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Properties;
import java.util.Set;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.Function;
import java.util.function.Predicate;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import java.util.stream.Stream;
import javax.cache.Cache;
import org.apache.commons.lang3.tuple.ImmutablePair;
import org.apache.commons.lang3.tuple.Pair;
import org.jetbrains.annotations.NotNull;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public abstract class HpoSpeciesOptimizationRoute
extends HpoRoute
implements Serializable {
    public static final String GET_SPECIES_CUSTOM_EVAL_SCORE = "get_species_custom_eval_score";
    public static final String GET_DECA_EVAL_SCORE = "get_deca_eval_score";
    private static final long serialVersionUID = 295952L;
    private static final Logger log = LoggerFactory.getLogger(HpoSpeciesOptimizationRoute.class);
    private static final boolean PRINT_INSPECTION_FILES = true;
    private static final List<String> parameters4model = List.of("species_assignment.ml.use_docleveltax_score_feature", "species_assignment.ml.standardize_features", "species_assignment.ml.use_syn_tax_score_feature", "species_assignment.ml.minmax_scale_features", "species_assignment.use_synonym_statistics_when_no_species_mentions", "species_assignment.synonym_apriori_scores.service.scope_weights.document", "species_assignment.synonym_apriori_scores.service.scope_weights.mesh", "species_assignment.synonym_apriori_scores.service.scope_weights.sentence", "species_assignment.weights.text", "species_assignment.weights.title", "species_assignment.tax_frequency_norm", "species_assignment.ml.algorithm");
    private static final Set<String> parameters4modelNoMissingReport = Set.of("species_assignment.synonym_apriori_scores.service.scope_weights.document", "species_assignment.synonym_apriori_scores.service.scope_weights.sentence", "species_assignment.synonym_apriori_scores.service.scope_weights.mesh", "species_assignment.weights.title", "species_assignment.weights.text", "species_assignment.tax_frequency_norm", "species_assignment.ml.ltr.k", "species_assignment.ml.ltr.algorithm", "species_assignment.ml.ltr.metric", "species_assignment.ml.svm.type", "species_assignment.ml.svm.c", "species_assignment.ml.svm.kerneltype", "species_assignment.ml.svm.gamma", "species_assignment.ml.svm.coef0", "species_assignment.ml.svm.degree");
    private final transient EntityEvaluator entityEvaluator;
    private final transient ExecutorService executorService;
    private Cache<String, CachedModel> modelCache;
    private final InspectionFilePrinter inspectionFilePrinter;

    public HpoSpeciesOptimizationRoute(Configuration configuration) {
        super(log, configuration);
        Properties evaluatorSettings = new Properties();
        evaluatorSettings.setProperty("grouping-type", EvaluationData.GroupingType.LABEL.name());
        this.entityEvaluator = new EntityEvaluator(evaluatorSettings);
        AtomicInteger threadNum = new AtomicInteger(1);
        this.executorService = Executors.newFixedThreadPool(5, r -> new Thread(r, "SpeciesWorker-" + threadNum.getAndIncrement()));
        Function<GeneMention, String> correctIdRenderer = SpeciesAssignmentCorrectnessRenderer::renderCorrectTaxonomyGeneMention;
        Function<GeneMention, String> wrongIdRenderer = gm -> SpeciesAssignmentCorrectnessRenderer.renderWrongTaxonomyGeneMention(gm, this.injector.getInstance(CandidateRetrieval.class), new SimpleGeneNameQueryGenerator());
        Function<GeneMention, String> fpRenderer = SpeciesAssignmentCorrectnessRenderer::renderNoCorrectCandidateTaxonomyGeneMention;
        Function<GeneMention, Stream<String>> geneMentionGoldIdFunction = gm -> gm.getTaxonomyIds().stream();
        Function<GeneDocument, Stream<String>> documentGoldIdFunction = d -> d.getGoldTaxonomyIds().stream();
        Function<GeneMention, Stream<String>> predictedIdFunction = gm -> gm.getTaxonomyIds().stream();
        Function<GeneMention, GeneDocument.MentionCorrectness> correctnessFunction = gm -> GeneIdCorrectness.getIdCorrectnessLevel(gm, "taxonomy", this.injector.getInstance(CandidateRetrieval.class), new SimpleGeneNameQueryGenerator(), new JaroWinklerScorer(), 0.9);
        this.inspectionFilePrinter = new InspectionFilePrinter(correctnessFunction, correctIdRenderer, wrongIdRenderer, fpRenderer, geneMentionGoldIdFunction, documentGoldIdFunction, predictedIdFunction);
    }

    @Override
    protected Injector createGuiceInjector(de.julielab.geneexpbase.configuration.Configuration configuration) {
        return Guice.createInjector(new SpeciesAssignmentModule(configuration), new IndexModule(configuration));
    }

    @Override
    protected String getTaskName() {
        return "species-assignment";
    }

    @Override
    protected synchronized Pair<List<List<GeneDocument>>, List<GeneDocument>> getTrainDevSplit(List<GeneDocument> allTrainingData, File splitMapping) {
        List<GeneDocument> trainingWODev;
        List<GeneDocument> devset;
        log.info("Partitioning {} documents into {} cross validation partitions and a development set with a sampling frequency of {}.", allTrainingData.size(), this.getNumSplits(), this.getDevSamplingFrequency());
        if (splitMapping.exists()) {
            return this.loadDataSplits(allTrainingData, splitMapping);
        }
        if (this.getDevSamplingFrequency() > 0) {
            log.info("Using 1/{} of the data as dev set.", (Object)this.getDevSamplingFrequency());
            List<SpeciesCrossValPartition> trainAndDev = SpeciesCountDocumentPartitioning.partitionDocuments(allTrainingData.stream(), this.getDevSamplingFrequency());
            devset = trainAndDev.get(0).stream().map(SpeciesCountDocument::getDocument).collect(Collectors.toList());
            trainingWODev = IntStream.range(1, trainAndDev.size()).mapToObj(trainAndDev::get).flatMap(Collection::stream).map(SpeciesCountDocument::getDocument).collect(Collectors.toList());
        } else {
            log.info("Dev sampling frequency is set to {}. Creating {} cross-validation splits without a dev set.", (Object)this.getDevSamplingFrequency(), (Object)this.getNumSplits());
            trainingWODev = allTrainingData;
            devset = Collections.emptyList();
        }
        ArrayList<List<GeneDocument>> crossvalSplit = new ArrayList<List<GeneDocument>>();
        List<SpeciesCrossValPartition> trainCrossvalPartitioning = SpeciesCountDocumentPartitioning.partitionDocuments(trainingWODev.stream(), this.getNumSplits());
        trainCrossvalPartitioning.forEach(partition -> crossvalSplit.add(partition.stream().map(SpeciesCountDocument::getDocument).collect(Collectors.toList())));
        this.saveDataSplit(crossvalSplit, devset, splitMapping);
        return new ImmutablePair<List<List<GeneDocument>>, List<GeneDocument>>(crossvalSplit, devset);
    }

    @Override
    protected HpoRoute.Metric getDefaultMetric() {
        return HpoRoute.Metric.F;
    }

    @Override
    protected String calculateScore(HpoInstance hpoInstance, Parameters parameterMap, int seed, int cutoffTime, int resourceBudget, int maxResourceBudget, HpoRoute.Metric returnMetric, int runId) {
        ScoreCalculationWorker scoreWorker = new ScoreCalculationWorker(hpoInstance, parameterMap, seed, resourceBudget, maxResourceBudget, runId);
        Future<?> scoreFuture = this.executorService.submit(scoreWorker);
        try {
            scoreFuture.get(cutoffTime, TimeUnit.SECONDS);
        }
        catch (InterruptedException | TimeoutException e) {
            log.error("Could not obtain configuration score within {} seconds. Returning a score of 0 and continuing. Note that the started evaluation will continue to run and possibly create output until it can be safely terminated.", (Object)cutoffTime, (Object)e);
            scoreWorker.interrupt();
        }
        catch (ExecutionException e) {
            log.error("Error occurred during evaluation for instance {} and configuration {}.", hpoInstance, parameterMap, e);
        }
        return scoreWorker.getResultScore();
    }

    public EntityEvaluationResults evaluate(HpoInstance hpoInstance, GeneSpeciesAssigner speciesAssigner, boolean trainWithDev, int seed, int runId, int resourceBudget, int maxResourceBudget, Parameters parameterMap) throws HpoException {
        try {
            List<GeneDocument> copies = this.getDocuments4Instance(hpoInstance).stream().map(GeneDocument::new).collect(Collectors.toList());
            EntityEvaluationResults evalResult = this.evaluateSpeciesAssigner(speciesAssigner, copies, parameterMap);
            this.inspectionFilePrinter.printInspectionFile(evalResult.getOverallResult(), "taxonomy", HpoRoute.Metric.F, hpoInstance, copies, this.injector.getInstance(CandidateRetrieval.class), this.injector.getInstance(QueryGenerator.class));
            if (hpoInstance.getSplitType() == SplitType.TRAIN && speciesAssigner instanceof MLSpeciesAssigner) {
                String algorithm = parameterMap.getString(de.julielab.geneexpbase.configuration.Configuration.dot("species_assignment", "ml", "algorithm"));
                DecimalFormat df = new DecimalFormat("0.####");
                String modelBaseName = "sa-model-" + algorithm + "-" + runId + "-" + df.format(evalResult.getOverallResult().getMicroFMeasureMentionWise());
                this.saveModelAndReevaluate(hpoInstance, (MLSpeciesAssigner)speciesAssigner, parameterMap, evalResult, false, modelBaseName);
                if (trainWithDev) {
                    log.info("Retraining model on complete training data, including the dev set.");
                    try {
                        ((MLSpeciesAssigner)speciesAssigner).setClassifier(null);
                        ((MLSpeciesAssigner)speciesAssigner).setRanker(null);
                        AlphabetCarrying model = this.trainMlModel(hpoInstance, speciesAssigner, true, seed, resourceBudget, maxResourceBudget, parameterMap);
                        this.setModelToMLSpeciesAssigner((MLSpeciesAssigner)speciesAssigner, model);
                        EntityEvaluationResults evalResultsWithDev = this.evaluateSpeciesAssigner(speciesAssigner, this.getTrainingData(hpoInstance, true, true).stream().map(GeneDocument::new).collect(Collectors.toList()), parameterMap);
                        String modelBaseNameWithDev = "sa-final-model-" + algorithm + "-" + runId + "-" + df.format(evalResultsWithDev.getOverallResult().getMicroFMeasureMentionWise());
                        this.saveModelAndReevaluate(hpoInstance, (MLSpeciesAssigner)speciesAssigner, parameterMap, evalResultsWithDev, trainWithDev, modelBaseNameWithDev);
                    }
                    catch (SpeciesAssignmentException e) {
                        e.printStackTrace();
                    }
                }
            }
            return evalResult;
        }
        catch (SpeciesAssignmentException e) {
            throw new HpoException(e);
        }
    }

    public void saveModelAndReevaluate(HpoInstance hpoInstance, MLSpeciesAssigner speciesAssigner, Parameters parameterMap, EntityEvaluationResults evalResult, boolean trainWithDev, String modelBaseName) {
        File modelPath = new File(modelBaseName + ".mod");
        log.info("Saving ML species assignment model to {}", (Object)modelPath);
        try {
            speciesAssigner.saveModel(modelPath, parameterMap);
        }
        catch (IOException e) {
            log.error("Could not save the ML assignment model.", e);
        }
        log.info("Checking saved model by loading a new species assigner from it and redoing the evaluation.");
        List<GeneDocument> copies = trainWithDev ? this.getTrainingData(hpoInstance, true, true) : this.getDocuments4Instance(hpoInstance);
        copies = copies.stream().map(GeneDocument::new).collect(Collectors.toList());
        this.evalWithSavedModel(modelPath, parameterMap, copies, evalResult);
    }

    private AlphabetCarrying trainMlModel(HpoInstance si, GeneSpeciesAssigner speciesAssigner, boolean trainWithDev, int seed, int resourceBudget, int maxResourceBudget, Parameters parameterMap) throws HpoException {
        try {
            AlphabetCarrying model;
            CachedModel cachedModel;
            boolean useAllActiveCorporaForTraining = parameterMap.getBoolean(de.julielab.geneexpbase.configuration.Configuration.dot("species_assignment", "ml", "use_all_corpus_trainsplits"), false);
            List<GeneDocument> trainPartition = this.getTrainingData4Budget4Instance(si, trainWithDev, useAllActiveCorporaForTraining, resourceBudget, maxResourceBudget);
            StringBuilder keyBuilder = new StringBuilder();
            keyBuilder.append(useAllActiveCorporaForTraining);
            Stream documentEntitiesKey = trainPartition.stream().sorted(Comparator.comparing(GeneDocument::getId)).flatMap(doc -> Stream.concat(Stream.of(doc.getId()), Stream.concat(Stream.concat(doc.getGenes().map(GeneMention::getText), doc.getSpecies().getAllMentionCandidates().values().stream().map(SpeciesMention::getText)), doc.getSpecies().getMeshCandidates().stream())));
            documentEntitiesKey.forEach(keyBuilder::append);
            keyBuilder.append(parameterMap.get(de.julielab.geneexpbase.configuration.Configuration.dot("species_assignment", "ml", "standardize_features")));
            keyBuilder.append(parameterMap.get(de.julielab.geneexpbase.configuration.Configuration.dot("species_assignment", "ml", "minmax_scale_features")));
            parameters4model.stream().map(parameterMap::get).forEach(keyBuilder::append);
            if (parameterMap.getBoolean(de.julielab.geneexpbase.configuration.Configuration.dot("species_assignment", "ml", "use_docleveltax_score_feature"))) {
                keyBuilder.append(parameterMap.get(Configuration.PARAM_SYNONYM_APRIORI_SERVICE_SCOPE_WEIGHT_DOCUMENT));
                keyBuilder.append(parameterMap.get(Configuration.PARAM_SYNONYM_APRIORI_SERVICE_SCOPE_WEIGHT_SENTENCE));
                keyBuilder.append(parameterMap.get(Configuration.PARAM_SYNONYM_APRIORI_SERVICE_SCOPE_WEIGHT_MESH));
                keyBuilder.append(parameterMap.get(de.julielab.geneexpbase.configuration.Configuration.dot("species_assignment", "tax_frequency_norm")));
                keyBuilder.append(parameterMap.get(de.julielab.geneexpbase.configuration.Configuration.dot("species_assignment", "weights.title")));
                keyBuilder.append(parameterMap.get(de.julielab.geneexpbase.configuration.Configuration.dot("species_assignment", "weights.text")));
            }
            if (parameterMap.getString(de.julielab.geneexpbase.configuration.Configuration.dot("species_assignment", "ml", "algorithm")).equals("ltr")) {
                keyBuilder.append(parameterMap.get(de.julielab.geneexpbase.configuration.Configuration.dot("species_assignment", "ml", "ltr.k")));
                keyBuilder.append(parameterMap.get(de.julielab.geneexpbase.configuration.Configuration.dot("species_assignment", "ml", "ltr.algorithm")));
                keyBuilder.append(parameterMap.get(de.julielab.geneexpbase.configuration.Configuration.dot("species_assignment", "ml", "ltr.metric")));
            }
            if (parameterMap.getString(de.julielab.geneexpbase.configuration.Configuration.dot("species_assignment", "ml", "algorithm")).equals("svm")) {
                keyBuilder.append(parameterMap.get(de.julielab.geneexpbase.configuration.Configuration.dot("species_assignment", "ml", "svm.type")));
                keyBuilder.append(parameterMap.get(de.julielab.geneexpbase.configuration.Configuration.dot("species_assignment", "ml", "svm.c")));
                keyBuilder.append(parameterMap.get(de.julielab.geneexpbase.configuration.Configuration.dot("species_assignment", "ml", "svm.kerneltype")));
                keyBuilder.append(parameterMap.get(de.julielab.geneexpbase.configuration.Configuration.dot("species_assignment", "ml", "svm.gamma")));
                keyBuilder.append(parameterMap.get(de.julielab.geneexpbase.configuration.Configuration.dot("species_assignment", "ml", "svm.coef0")));
                keyBuilder.append(parameterMap.get(de.julielab.geneexpbase.configuration.Configuration.dot("species_assignment", "ml", "svm.degree")));
            }
            if ((cachedModel = null) == null) {
                log.info("Creating features on the training split corresponding to HPO instance {}", (Object)si);
                log.debug("Using all active corpora for training: {}", (Object)useAllActiveCorporaForTraining);
                if (log.isDebugEnabled()) {
                    log.debug("Received {} documents for training containing {} gene mentions.", (Object)trainPartition.size(), (Object)trainPartition.stream().flatMap(GeneDocument::getGenes).count());
                }
                ArrayList trainingInstances = null;
                for (GeneDocument trainDoc : trainPartition) {
                    GeneDocument copy = new GeneDocument(trainDoc);
                    speciesAssigner.setSpeciesHints(parameterMap, copy);
                    speciesAssigner.assign(copy, parameterMap);
                    Collection<String> trackedParameters = parameterMap.getTrackedParameters(MLSpeciesAssigner.class.getSimpleName());
                    List unaccountedParameters = trackedParameters.stream().filter(p -> !parameters4model.contains(p)).collect(Collectors.toList());
                    if (!unaccountedParameters.isEmpty()) {
                        throw new IllegalStateException("The cache key used to cache species assignment models does not include the following configuration settings tracked in the training method: " + unaccountedParameters);
                    }
                    Optional<InstanceList> anyInstanceList = copy.getGenes().map(GeneMention::getInstances).filter(Objects::nonNull).filter(Predicate.not(Collection::isEmpty)).findAny();
                    if (!anyInstanceList.isPresent()) continue;
                    if (trainingInstances == null) {
                        trainingInstances = new InstanceList(anyInstanceList.get().getPipe());
                        parameterMap.put("featurePipe", ((InstanceList)trainingInstances).getPipe());
                    }
                    for (GeneMention gm : copy.getGenesIterable()) {
                        if (gm.getInstances() == null) continue;
                        ((InstanceList)trainingInstances).addAll(gm.getInstances());
                    }
                }
                if (trainingInstances == null) {
                    log.warn("There is no gene with an non-empty instance list.");
                }
                log.info("Training on training split corresponding to HPO instance {} with {} training instances from {} documents", si, trainingInstances.size(), trainPartition.size());
                parameterMap.startParameterUsageTracking(HpoSpeciesOptimizationRoute.class.getSimpleName());
                model = this.train((InstanceList)trainingInstances, parameterMap, de.julielab.geneexpbase.configuration.Configuration.dot("species_assignment", "ml"), seed);
                parameterMap.stopParameterUsageTracking(HpoSpeciesOptimizationRoute.class.getSimpleName());
                AlphabetCarrying classifier = (AlphabetCarrying)parameterMap.getOrDefault((Object)de.julielab.geneexpbase.configuration.Configuration.dot("species_assignment", "ml", "classifier"), (Object)null);
                AlphabetCarrying ranker = (AlphabetCarrying)parameterMap.getOrDefault((Object)de.julielab.geneexpbase.configuration.Configuration.dot("species_assignment", "ml", "ranker"), (Object)null);
                Object minMaxScalingValues = parameterMap.getOrDefault((Object)de.julielab.geneexpbase.configuration.Configuration.dot("species_assignment", "ml", "minmax_scaling_values"), (Object)null);
                Object standardizationValues = parameterMap.getOrDefault((Object)de.julielab.geneexpbase.configuration.Configuration.dot("species_assignment", "ml", "standardization_values"), (Object)null);
                cachedModel = new CachedModel(classifier != null ? classifier : ranker, minMaxScalingValues, standardizationValues);
            } else {
                model = cachedModel.getModel();
                this.setPipeServices(model);
                log.debug("Reusing cached model of {} for instance {}.", (Object)model.getClass(), (Object)si);
                parameterMap.put(de.julielab.geneexpbase.configuration.Configuration.dot("species_assignment", "ml", model instanceof RankLibRanker ? "ranker" : "classifier"), model);
                if (cachedModel.getMinMaxScalingValues() != null) {
                    parameterMap.put(de.julielab.geneexpbase.configuration.Configuration.dot("species_assignment", "ml", "minmax_scaling_values"), cachedModel.getMinMaxScalingValues());
                }
                if (cachedModel.getStandardizationValues() != null) {
                    parameterMap.put(de.julielab.geneexpbase.configuration.Configuration.dot("species_assignment", "ml", "standardization_values"), cachedModel.getStandardizationValues());
                }
                parameterMap.put("featurePipe", model instanceof RankLibRanker ? ((RankLibRanker)model).getInstancePipe() : ((Classifier)model).getInstancePipe());
            }
            return model;
        }
        catch (SpeciesAssignmentException e) {
            throw new HpoException(e);
        }
    }

    private void setPipeServices(AlphabetCarrying model) throws HpoException {
        Method instancePipeGetter = null;
        SerialPipes instancePipe = null;
        try {
            if (model instanceof Classifier) {
                instancePipeGetter = Classifier.class.getMethod("getInstancePipe", new Class[0]);
            } else if (model instanceof RankLibRanker) {
                instancePipeGetter = RankLibRanker.class.getMethod("getInstancePipe", new Class[0]);
            }
        }
        catch (NoSuchMethodException e) {
            log.error("Could not access the instancePipe getter of {}", (Object)model);
            throw new HpoException(e);
        }
        if (instancePipeGetter != null) {
            try {
                instancePipe = (SerialPipes)instancePipeGetter.invoke((Object)model, new Object[0]);
            }
            catch (IllegalAccessException | InvocationTargetException e) {
                log.error("Could not invoke the instancePipe getter on {}", (Object)model);
            }
        }
        if (instancePipe != null) {
            for (Pipe p : instancePipe.pipes()) {
                if (p instanceof SpeciesAPrioriScorePipe) {
                    SpeciesDocumentScoringService documentScoringService = this.injector.getInstance(SpeciesDocumentScoringService.class);
                    SynonymSpeciesCooccurrenceService synonymSpeciesCooccurrenceService = this.injector.getInstance(SynonymSpeciesCooccurrenceService.class);
                    ((SpeciesAPrioriScorePipe)p).setDocumentScoringService(documentScoringService);
                    ((SpeciesAPrioriScorePipe)p).setSpeciesCooccurrenceService(synonymSpeciesCooccurrenceService);
                    continue;
                }
                if (!(p instanceof SpeciesCandidates2TokenPipe)) continue;
                SpeciesInstanceTools instanceTools = this.injector.getInstance(SpeciesInstanceTools.class);
                ((SpeciesCandidates2TokenPipe)p).setSpeciesInstanceTools(instanceTools);
            }
        }
    }

    public EntityEvaluationResults evaluateSpeciesAssigner(GeneSpeciesAssigner speciesAssigner, List<GeneDocument> copies, Parameters parameterMap) throws SpeciesAssignmentException {
        log.info("Evaluating previously trained species assignment model on {} documents.", (Object)copies.size());
        SpeciesAssignmentFilter assignmentFilter = this.injector.getInstance(SpeciesAssignmentFilter.class);
        EvaluationData goldData = new EvaluationData();
        EvaluationData predData = new EvaluationData();
        AhoCorasickOptimized mammalAc = new AhoCorasickOptimized("mammal", "mammals", "mammalian");
        for (GeneDocument copy : copies) {
            SpeciesMention sm;
            Iterator it = copy.getSpecies().getTitleCandidates().values().iterator();
            while (it.hasNext()) {
                sm = (SpeciesMention)it.next();
                if (!sm.getText().toLowerCase().matches("(fe)?males?")) continue;
                it.remove();
            }
            it = copy.getSpecies().getTextCandidates().values().iterator();
            while (it.hasNext()) {
                sm = (SpeciesMention)it.next();
                if (!sm.getText().toLowerCase().matches("(fe)?males?")) continue;
                it.remove();
            }
            copy.getGenes().forEach(gm -> gm.setTaxonomyIds(Collections.emptyList()));
            speciesAssigner.assign(copy, parameterMap);
            if (copy.isGoldHasOffsets() && !copy.isGoldOffsetsInferred()) {
                copy.getGoldGenes().values().stream().flatMap(Collection::stream).flatMap(gm -> gm.getTaxonomyIds().stream().map(id -> new EvaluationDataEntry(gm.getDocId(), (String)id, gm.getBegin(), gm.getEnd(), gm.getText()))).forEach(goldData::add);
            } else {
                copy.getGoldTaxonomyIds().stream().map(id -> new EvaluationDataEntry(copy.getId(), (String)id)).forEach(goldData::add);
            }
            copy.getGenes().flatMap(gm -> gm.getTaxonomyIds().stream().map(id -> id.equals("559292") ? "4932" : id).map(id -> id.equals("8355") ? "8364" : id).map(id -> new EvaluationDataEntry(gm.getDocId(), (String)id, gm.getBegin(), gm.getEnd(), gm.getText()))).forEach(predData::add);
        }
        this.printUnaccountedParameters(parameterMap);
        log.debug("Evaluating on {} gold and {} test instances.", (Object)goldData.size(), (Object)predData.size());
        EntityEvaluationResults evalResult = this.entityEvaluator.evaluate(goldData, predData);
        log.info("Evaluation done.");
        return evalResult;
    }

    private void printUnaccountedParameters(Parameters parameterMap) {
        Collection<String> trackedParameters = parameterMap.getTrackedParameters(HpoSpeciesOptimizationRoute.class.getSimpleName());
        Sets.SetView unaccountedParameters = Sets.difference(trackedParameters instanceof Set ? (Set<Object>)trackedParameters : new HashSet<String>(trackedParameters), Sets.union(parameters4model.stream().collect(Collectors.toSet()), parameters4modelNoMissingReport));
        if (!unaccountedParameters.isEmpty()) {
            log.warn("Unaccounted parameters for model caching: " + unaccountedParameters);
        }
    }

    private void evalWithSavedModel(File modelPath, Parameters parameterMap, List<GeneDocument> copies, EntityEvaluationResults evalResult) {
        try {
            MLSpeciesAssigner speciesAssigner = (MLSpeciesAssigner)this.injector.getInstance(GeneSpeciesAssigner.class);
            speciesAssigner.loadModel(modelPath);
            EntityEvaluationResults repeatedEvalResult = this.evaluateSpeciesAssigner(speciesAssigner, copies, parameterMap);
            log.info("Reloaded the saved assignment model from {} and got a micro FScore of {}", (Object)modelPath, (Object)repeatedEvalResult.getOverallResult().getMicroFMeasureMentionWise());
            if (Math.abs(repeatedEvalResult.getOverallResult().getMicroFMeasureMentionWise() - evalResult.getOverallResult().getMicroFMeasureMentionWise()) > 0.001) {
                log.warn("The evaluation result of the reloaded model differs from the original evaluation result. Reloaded: {}; original: {}", (Object)repeatedEvalResult.getOverallResult().getMicroFMeasureMentionWise(), (Object)evalResult.getOverallResult().getMicroFMeasureMentionWise());
            }
        }
        catch (SpeciesAssignmentException e) {
            log.error("Could not reload the saved model and do species assignment with it.");
            throw new SpeciesAssignmentRuntimeException(e);
        }
    }

    @NotNull
    public GeneSpeciesAssigner getSpeciesAssigner(Map<String, Object> parameterMap) {
        try {
            Class<?> speciesAssignerClass = Class.forName(Objects.requireNonNull((String)parameterMap.get("species_assigner")));
            return (GeneSpeciesAssigner)this.injector.getInstance(speciesAssignerClass);
        }
        catch (ClassNotFoundException e) {
            throw new SpeciesAssignmentRuntimeException(e);
        }
    }

    public void setModelToMLSpeciesAssigner(MLSpeciesAssigner speciesAssigner, AlphabetCarrying mlModel) {
        if (mlModel instanceof Classifier) {
            speciesAssigner.setClassifier((Classifier)mlModel);
            ((Classifier)mlModel).getInstancePipe().getAlphabet().stopGrowth();
        } else if (mlModel instanceof RankLibRanker) {
            speciesAssigner.setRanker((RankLibRanker)mlModel);
            ((RankLibRanker)mlModel).getInstancePipe().getAlphabet().stopGrowth();
        }
    }

    private class ScoreCalculationWorker
    extends Thread
    implements Serializable {
        private static final long serialVersionUID = 295952L;
        private final HpoInstance hpoInstance;
        private final Parameters parameterMap;
        private final int seed;
        private final int resourceBudget;
        private final int maxResourceBudget;
        private final int runId;
        private String resultScore = "ERROR";

        public ScoreCalculationWorker(HpoInstance hpoInstance, Parameters parameterMap, int seed, int resourceBudget, int maxResourceBudget, int runId) {
            this.hpoInstance = hpoInstance;
            this.parameterMap = parameterMap;
            this.seed = seed;
            this.resourceBudget = resourceBudget;
            this.maxResourceBudget = maxResourceBudget;
            this.runId = runId;
            this.setName("Species_" + hpoInstance.toString());
        }

        @Override
        public void run() {
            try {
                GeneSpeciesAssigner speciesAssigner = HpoSpeciesOptimizationRoute.this.getSpeciesAssigner(this.parameterMap);
                String mlAlgorithmKey = de.julielab.geneexpbase.configuration.Configuration.dot("species_assignment", "ml", "algorithm");
                if (this.parameterMap.containsKey(mlAlgorithmKey) && Set.of("svm", "maxent", "ltr").contains(this.parameterMap.get(mlAlgorithmKey))) {
                    if (!(speciesAssigner instanceof MLSpeciesAssigner)) {
                        throw new IllegalArgumentException("The machine learning algorithm " + this.parameterMap.get(mlAlgorithmKey) + " is configured for use but the configured species assignment class " + speciesAssigner.getClass().getCanonicalName() + " is not not ML-based.");
                    }
                    try {
                        ((MLSpeciesAssigner)speciesAssigner).setInstanceTools(HpoSpeciesOptimizationRoute.this.injector.getInstance(SpeciesInstanceTools.class));
                        ((MLSpeciesAssigner)speciesAssigner).setFeatureNormalization(new FeatureNormalization(de.julielab.geneexpbase.configuration.Configuration.dot("species_assignment", "ml")));
                        AlphabetCarrying mlModel = HpoSpeciesOptimizationRoute.this.trainMlModel(this.hpoInstance, speciesAssigner, false, this.seed, this.resourceBudget, this.maxResourceBudget, this.parameterMap);
                        HpoSpeciesOptimizationRoute.this.setModelToMLSpeciesAssigner((MLSpeciesAssigner)speciesAssigner, mlModel);
                    }
                    catch (HpoException e) {
                        log.error("Error while training an ML model: ", e);
                        throw e;
                    }
                }
                if (!this.isInterrupted()) {
                    double fscore;
                    EntityEvaluationResults evaluate = HpoSpeciesOptimizationRoute.this.evaluate(this.hpoInstance, speciesAssigner, true, this.seed, this.runId, this.resourceBudget, this.maxResourceBudget, this.parameterMap);
                    EntityEvaluationResult overallResult = evaluate.getOverallResult();
                    double recall = overallResult.getEvaluationMode() == EvaluationMode.MENTION ? overallResult.getMicroRecallMentionWise() : overallResult.getMicroRecallDocWise();
                    double precision = overallResult.getEvaluationMode() == EvaluationMode.MENTION ? overallResult.getMicroPrecisionMentionWise() : overallResult.getMicroPrecisionDocWise();
                    double d = fscore = overallResult.getEvaluationMode() == EvaluationMode.MENTION ? overallResult.getMicroFMeasureMentionWise() : overallResult.getMicroFMeasureDocWise();
                    if (log.isDebugEnabled()) {
                        log.debug("Error statistics [TP / FP / FN]: {} / {} / {}", overallResult.getSumTpMentionWise(), overallResult.getSumFpMentionWise(), overallResult.getSumFnMentionWise());
                    }
                    log.info("Got [R/P/F] {} / {} / {} for {} ({} level)", new Object[]{recall, precision, fscore, this.hpoInstance, overallResult.getEvaluationMode()});
                    if (fscore < 1.0E-4) {
                        log.warn("Got null score. Configuration was:\n{}", (Object)this.parameterMap);
                    }
                    this.resultScore = String.valueOf(fscore * -1.0);
                }
            }
            catch (Throwable e) {
                this.resultScore = "ERROR: " + e.getMessage();
                throw new SpeciesAssignmentRuntimeException(e);
            }
        }

        public String getResultScore() {
            return this.resultScore;
        }
    }
}

