/*
 * Decompiled with CFR 0.152.
 */
package de.julielab.geneexpbase.hpo;

import cc.mallet.classify.MaxEntTrainer;
import cc.mallet.types.Alphabet;
import cc.mallet.types.AlphabetCarrying;
import cc.mallet.types.FeatureVector;
import cc.mallet.types.Instance;
import cc.mallet.types.InstanceList;
import cc.mallet.types.Label;
import ciir.umass.edu.learning.RANKER_TYPE;
import ciir.umass.edu.metric.METRIC;
import com.google.inject.Injector;
import de.julielab.geneexpbase.GeneExpException;
import de.julielab.geneexpbase.GeneExpRuntimeException;
import de.julielab.geneexpbase.classification.FeatureUtils;
import de.julielab.geneexpbase.classification.MinMaxScalingStats;
import de.julielab.geneexpbase.classification.SVMClassifier;
import de.julielab.geneexpbase.classification.svm.SVMTrainOptions;
import de.julielab.geneexpbase.configuration.Configuration;
import de.julielab.geneexpbase.configuration.Parameters;
import de.julielab.geneexpbase.data.DocumentLoader;
import de.julielab.geneexpbase.data.DocumentSourceFiles;
import de.julielab.geneexpbase.data.UnknownCorpusException;
import de.julielab.geneexpbase.genemodel.GeneDocument;
import de.julielab.geneexpbase.hpo.HPOEvaluationException;
import de.julielab.geneexpbase.hpo.HpoCorpusRegistry;
import de.julielab.geneexpbase.hpo.HpoInstance;
import de.julielab.geneexpbase.hpo.SplitType;
import de.julielab.java.utilities.FileUtilities;
import de.julielab.ml.RankLibRanker;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.IOException;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.Predicate;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import org.apache.commons.lang3.tuple.ImmutablePair;
import org.apache.commons.lang3.tuple.Pair;
import org.slf4j.Logger;
import spark.Request;
import spark.Response;
import spark.Route;

public abstract class HpoRoute
implements Route,
Serializable {
    public static final Set<String> KNOWN_ML_ALGORITHMS = Set.of("maxent", "svm", "ltr");
    private static final Pattern ML_PREFIX_PATTERN = Pattern.compile("(.*?\\.ml\\.[^.]+).*$");
    protected static AtomicInteger runCounter = new AtomicInteger(0);
    private final DocumentLoader documentLoader;
    protected transient Configuration configuration;
    protected transient Map<String, Map<String, Pair<List<List<GeneDocument>>, List<GeneDocument>>>> loadedCorpora = new HashMap<String, Map<String, Pair<List<List<GeneDocument>>, List<GeneDocument>>>>();
    protected transient Logger log;
    protected transient Injector injector;

    public HpoRoute(Logger log, Configuration configuration) {
        this.log = log;
        this.configuration = configuration;
        this.injector = this.createGuiceInjector(configuration);
        this.documentLoader = this.injector.getInstance(DocumentLoader.class);
    }

    protected abstract Injector createGuiceInjector(Configuration var1);

    public abstract String getRouteEndpoint();

    public abstract int getNumSplits();

    public abstract int getDevSamplingFrequency();

    @Override
    public Object handle(Request req, Response res) throws Exception {
        String score;
        if (req.queryParams().contains("SHUTDOWN")) {
            this.log.info("SHUTDOWN signal received, commit caches and shutting down.");
            this.log.info("Committing all caches is done, server can be shutdown.");
            return 0;
        }
        Parameters parameterMap = null;
        HpoInstance hpoInstance = null;
        try {
            Set<String> queryParams = req.queryParams();
            String instanceName = null;
            String instanceInfo = null;
            int cutoffTime = Integer.MAX_VALUE;
            int cutoffLength = Integer.MAX_VALUE;
            int resourceBudget = Integer.MAX_VALUE;
            int maxResourceBudget = Integer.MAX_VALUE;
            int seed = 0;
            Metric returnMetric = this.getDefaultMetric();
            ArrayList<String> parameters = new ArrayList<String>(queryParams.size());
            Iterator<String> iterator = queryParams.iterator();
            block22: while (iterator.hasNext()) {
                String queryParam;
                switch (queryParam = iterator.next()) {
                    case "instance": {
                        instanceName = req.queryParams(queryParam);
                        continue block22;
                    }
                    case "instance_info": {
                        instanceInfo = req.queryParams(queryParam);
                        continue block22;
                    }
                    case "cutoff_time": {
                        cutoffTime = (int)Math.round(Double.valueOf(req.queryParams(queryParam)));
                        continue block22;
                    }
                    case "cutoff_length": {
                        cutoffLength = (int)Math.round(Double.valueOf(req.queryParams(queryParam)));
                        continue block22;
                    }
                    case "seed": {
                        seed = Integer.valueOf(req.queryParams(queryParam));
                        continue block22;
                    }
                    case "TRIAL_BUDGET": {
                        resourceBudget = Integer.valueOf(req.queryParams(queryParam));
                        continue block22;
                    }
                    case "max_budget": {
                        maxResourceBudget = Integer.valueOf(req.queryParams(queryParam));
                        continue block22;
                    }
                    case "return_metric": {
                        returnMetric = Metric.valueOf(req.queryParams(queryParam).toUpperCase());
                        continue block22;
                    }
                }
                parameters.add(queryParam.replaceAll("([a-z])D([a-z])", "$1.$2").replaceAll("__", "."));
                parameters.add(req.queryParams(queryParam).replaceAll("_D_", "."));
            }
            parameterMap = this.parseParameters(parameters, this.configuration);
            parameterMap.setKeysGivenBySmac();
            hpoInstance = this.parseInstanceName(instanceName, instanceInfo);
            if (resourceBudget < Integer.MAX_VALUE) {
                this.log.info("Processing score request for instance {} with a resource budget of {}", (Object)hpoInstance, (Object)resourceBudget);
            } else {
                this.log.info("Processing score request for instance {}", (Object)hpoInstance);
            }
            score = this.calculateScore(hpoInstance, parameterMap, seed, cutoffTime, resourceBudget, maxResourceBudget, returnMetric, runCounter.getAndIncrement());
            if (score.startsWith("ERROR")) {
                throw new HPOEvaluationException("Instance evaluation failed with error message '" + score + "'.");
            }
        }
        catch (Throwable e) {
            File errorConfig = new File("genemapper-failed-" + hpoInstance + "-" + System.nanoTime() + ".properties");
            this.log.error("Error occurred during evaluation for instance {}. The active configuration is stored to {} for debugging purposes.", hpoInstance, errorConfig, e);
            parameterMap.store(errorConfig, true);
            throw e;
        }
        this.log.debug("Returning value {}", (Object)score);
        return score;
    }

    protected abstract List<HpoInstance> getActiveCorpora();

    protected abstract String getTaskName();

    protected Pair<Double, Integer> getResultScalingFactors(HpoInstance si) {
        if (si.getSplitType() != SplitType.TESTSPLIT && si.getSplitType() != SplitType.TRAINSPLIT) {
            return new ImmutablePair<Double, Integer>(1.0, 1);
        }
        List<HpoInstance> activeCorpora = this.getActiveCorpora();
        int totalGenes = 0;
        int totalSmacInstances = 0;
        for (HpoInstance corpus : activeCorpora) {
            File splitMappingFile = this.getSplitMappingFile(this.getTaskName(), corpus.getCorpus(), corpus.getSubcorpus(), si.isMergeCorpora());
            for (int i = 0; i < this.getNumSplits(); ++i) {
                corpus.setCrossvalRound(i);
                corpus.setSplitType(si.getSplitType());
                totalGenes = (int)((long)totalGenes + this.getCorpusPartition(corpus, splitMappingFile).stream().flatMap(GeneDocument::getGenes).count());
                ++totalSmacInstances;
            }
        }
        int thisPartitionSize = (int)this.getDocuments4Instance(si).stream().flatMap(GeneDocument::getGenes).count();
        double thisFraction = (double)thisPartitionSize / (double)totalGenes;
        this.log.debug("Partition for instance {} has size {}. Total number of genes is {}. So the fraction of this data is {}", si, thisPartitionSize, totalGenes, thisFraction);
        return new ImmutablePair<Double, Integer>(thisFraction, totalSmacInstances);
    }

    protected List<GeneDocument> getAllCorporaTrainingDocuments4Instance(HpoInstance si) {
        boolean isDev = si.getSplitType() == SplitType.DEV;
        ArrayList<GeneDocument> allCorporaTrainDocs = isDev ? new ArrayList<GeneDocument>() : this.getDocuments4Instance(si.getCorpus(), si.getSubcorpus(), SplitType.TRAINSPLIT, si.isMergeCorpora(), si.getCrossvalRound());
        for (HpoInstance activeCorpus : this.getActiveCorpora()) {
            if (!isDev && activeCorpus.getCorpus().equals(si.getCorpus()) && activeCorpus.getSubcorpus().equals(si.getSubcorpus())) continue;
            allCorporaTrainDocs.addAll(this.getDocuments4Instance(activeCorpus.getCorpus(), activeCorpus.getSubcorpus(), SplitType.TRAINSPLIT, si.isMergeCorpora(), 0));
            allCorporaTrainDocs.addAll(this.getDocuments4Instance(activeCorpus.getCorpus(), activeCorpus.getSubcorpus(), SplitType.TESTSPLIT, si.isMergeCorpora(), 0));
        }
        return allCorporaTrainDocs;
    }

    protected List<GeneDocument> getDocuments4Budget(List<GeneDocument> allDocuments4instance, int budget, int maximumBudget) {
        this.log.debug("Shorting list of {} documents down for budget {}/{}", allDocuments4instance.size(), budget, maximumBudget);
        assert (budget > 0) : "The given resource budget is " + budget + " but it must be positive.";
        assert (maximumBudget >= budget) : "The specified maximum budget of " + maximumBudget + " is not larger or equal to the current resource budget.";
        double budgetFraction = (double)budget / (double)maximumBudget;
        int listLength = (int)((double)allDocuments4instance.size() * budgetFraction);
        List<GeneDocument> budgetedList = allDocuments4instance.subList(0, listLength);
        this.log.debug("Budgeted list has length {}", (Object)budgetedList.size());
        return budgetedList;
    }

    protected List<GeneDocument> getDocuments4Budget4Instance(HpoInstance si, int budget, int maximumBudget) {
        List<GeneDocument> corpus = this.getDocuments4Instance(si);
        if (budget == Integer.MAX_VALUE) {
            return corpus;
        }
        return this.getDocuments4Budget(corpus, budget, maximumBudget);
    }

    protected List<GeneDocument> getDocuments4Instance(HpoInstance si) {
        String corpus = si.getCorpus();
        String subcorpus = si.getSubcorpus();
        SplitType splitType = si.getSplitType();
        File splitMappingFile = this.getSplitMappingFile(this.getTaskName(), corpus, subcorpus, si.isMergeCorpora());
        if (!(si.isMergeCorpora() || splitType != SplitType.DEV && splitType != SplitType.TRAIN)) {
            ArrayList<GeneDocument> allSplitTypePartitions = new ArrayList<GeneDocument>();
            List<HpoInstance> activeCorpora = this.getActiveCorpora();
            for (HpoInstance activeSi : activeCorpora) {
                activeSi.setSplitType(splitType);
                allSplitTypePartitions.addAll(this.getCorpusPartition(activeSi, splitMappingFile));
            }
            return allSplitTypePartitions;
        }
        if (si.isMergeCorpora() && si.getSplitType() == SplitType.TESTSPLIT) {
            boolean isKnownCorpus = false;
            try {
                HpoCorpusRegistry.getCorpusFiles(si);
                isKnownCorpus = true;
            }
            catch (UnknownCorpusException activeCorpora) {
                // empty catch block
            }
            if (!isKnownCorpus) {
                return this.getCorpusPartition(si, splitMappingFile);
            }
            HpoInstance nonMergedInstance = new HpoInstance(si.getCorpus(), si.getSubcorpus(), false, si.getSplitType(), si.getCrossvalRound(), si.getInstanceInfo());
            return this.getCorpusPartition(nonMergedInstance, this.getSplitMappingFile(this.getTaskName(), si.getCorpus(), si.getSubcorpus(), false));
        }
        return this.getCorpusPartition(si, splitMappingFile);
    }

    protected File getSplitMappingFile(String task, String corpus, String subcorpus, boolean merged) {
        File mappingsDir = new File("splitmappings");
        if (!mappingsDir.exists()) {
            mappingsDir.mkdirs();
        }
        return new File(mappingsDir, task + "-" + String.join((CharSequence)"-", corpus, subcorpus, String.valueOf(this.getNumSplits())) + (merged ? "-merged-" : "") + "split-" + this.getDevSamplingFrequency() + "devfreq.txt");
    }

    protected Parameters parseParameters(List<String> parameters, Configuration configuration) {
        Parameters map = new Parameters(configuration);
        for (int i = 0; i < parameters.size(); ++i) {
            String s2 = parameters.get(i);
            if (i % 2 != 1) continue;
            map.put(parameters.get(i - 1), s2);
        }
        return map;
    }

    protected List<GeneDocument> getDocuments4Instance(String corpus, String subcorpus, SplitType splitType, boolean mergeCorpora, int crossvalRound) {
        return this.getDocuments4Instance(new HpoInstance(corpus, subcorpus, mergeCorpora, splitType, crossvalRound, null));
    }

    protected List<GeneDocument> getCorpusSplitByType(SplitType splitType, int crossvalRound, Pair<List<List<GeneDocument>>, List<GeneDocument>> subcorpusSplit) {
        if (splitType == SplitType.DEV) {
            return subcorpusSplit.getRight();
        }
        if (splitType == SplitType.TRAIN) {
            return subcorpusSplit.getLeft().stream().flatMap(Collection::stream).collect(Collectors.toList());
        }
        if (splitType == SplitType.TESTSPLIT) {
            return subcorpusSplit.getLeft().get(crossvalRound);
        }
        if (splitType == SplitType.TRAINSPLIT) {
            return this.getNumSplits() == 1 ? subcorpusSplit.getLeft().get(0) : IntStream.range(0, this.getNumSplits()).filter(i -> i != crossvalRound).mapToObj(i -> (List)((List)subcorpusSplit.getLeft()).get(i)).flatMap(Collection::stream).collect(Collectors.toList());
        }
        throw new IllegalArgumentException("Unsupported split type: " + splitType + ". Should be one of 'dev', 'train', 'trainsplit' or 'testsplit'.");
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    protected List<GeneDocument> getCorpusPartition(HpoInstance si, File splitMappingFile) {
        Map<String, Map<String, Pair<List<List<GeneDocument>>, List<GeneDocument>>>> map = this.loadedCorpora;
        synchronized (map) {
            String merged = si.isMergeCorpora() ? "-merged" : "";
            Pair subcorpusSplit = this.loadedCorpora.compute(si.getCorpus() + merged, (k, v) -> v != null ? v : new HashMap()).compute(si.getSubcorpus(), (k, v) -> {
                try {
                    return v != null ? v : this.loadData(si, splitMappingFile);
                }
                catch (GeneExpException | IOException e) {
                    throw new GeneExpRuntimeException(e);
                }
            });
            return this.getCorpusSplitByType(si.getSplitType(), si.getCrossvalRound(), subcorpusSplit);
        }
    }

    protected Pair<List<List<GeneDocument>>, List<GeneDocument>> loadData(HpoInstance si, File splitMapping) throws IOException, GeneExpException {
        Pair<List<List<GeneDocument>>, List<GeneDocument>> trainDevSplit;
        if (!si.isMergeCorpora()) {
            DocumentSourceFiles documentFiles = HpoCorpusRegistry.getCorpusFiles(si);
            String type = si.getSubcorpus();
            List<GeneDocument> documents = this.documentLoader.getDocuments(documentFiles).collect(Collectors.toList());
            this.log.info("Loaded {} documents of {}", (Object)documents.size(), (Object)documentFiles.getName());
            trainDevSplit = this.getTrainDevSplit(documents, splitMapping);
            this.log.info("TrainDev distribution - training: {} documents", (Object)trainDevSplit.getLeft().stream().mapToInt(Collection::size).sum());
            this.log.info("TrainDev distribution - devset: {} documents", (Object)trainDevSplit.getRight().size());
            this.log.info("TrainDev distribution - cross validation sets: {} documents, respectively", (Object)trainDevSplit.getLeft().stream().map(Collection::size).map(String::valueOf).collect(Collectors.joining(", ")));
            if (this.log.isInfoEnabled()) {
                this.log.info("Gene number distribution for data at {}/{}:", (Object)documentFiles.getName(), (Object)type);
                List<List<GeneDocument>> splits = trainDevSplit.getLeft();
                for (int i = 0; i < splits.size(); ++i) {
                    List<GeneDocument> geneDocuments = splits.get(i);
                    this.log.info("Split {} of data at {}/{}: {}", i, documentFiles.getName(), type, geneDocuments.stream().flatMap(GeneDocument::getGenes).count());
                }
                this.log.info("Genes in dev set for data at {}/{}: {}", documentFiles.getName(), type, trainDevSplit.getRight().stream().flatMap(GeneDocument::getGenes).count());
            }
        } else {
            ArrayList<GeneDocument> documents = new ArrayList<GeneDocument>();
            for (HpoInstance corpus : this.getActiveCorpora()) {
                DocumentSourceFiles documentFiles = HpoCorpusRegistry.getCorpusFiles(corpus);
                List corpusDocuments = this.documentLoader.getDocuments(documentFiles).collect(Collectors.toList());
                this.log.info("Loaded {} documents of {}", (Object)corpusDocuments.size(), (Object)documentFiles.getName());
                documents.addAll(corpusDocuments);
            }
            this.log.info("Loaded all active corpora with a union of {} documents.", (Object)documents.size());
            trainDevSplit = this.getTrainDevSplit(documents, splitMapping);
            this.log.info("TrainDev distribution - training: {} documents", (Object)trainDevSplit.getLeft().stream().mapToInt(Collection::size).sum());
            this.log.info("TrainDev distribution - devset: {} documents", (Object)trainDevSplit.getRight().size());
            this.log.info("TrainDev distribution - cross validation sets: {} documents, respectively", (Object)trainDevSplit.getLeft().stream().map(Collection::size).map(String::valueOf).collect(Collectors.joining(", ")));
        }
        return trainDevSplit;
    }

    protected Pair<List<List<GeneDocument>>, List<GeneDocument>> loadDataSplits(List<GeneDocument> allTrainingData, File splitMapping) {
        Map<String, String> id2split;
        try (BufferedReader br = FileUtilities.getReaderFromFile(splitMapping);){
            id2split = br.lines().filter(Predicate.not(String::isBlank)).filter(l -> !l.startsWith("#")).map(l -> l.split("\t")).collect(Collectors.toMap(s2 -> s2[0], s2 -> s2[1]));
        }
        catch (IOException e) {
            throw new GeneExpRuntimeException(e);
        }
        ArrayList<GeneDocument> devset = new ArrayList<GeneDocument>();
        ArrayList crossvalSplit = new ArrayList();
        for (int i = 0; i < this.getNumSplits(); ++i) {
            crossvalSplit.add(new ArrayList());
        }
        for (GeneDocument doc : allTrainingData) {
            String split = id2split.get(doc.getId());
            if (split != null) {
                if (split.equals("dev")) {
                    devset.add(doc);
                    continue;
                }
                try {
                    int partitionNum = Integer.parseInt(split);
                    if (partitionNum >= this.getNumSplits()) {
                        throw new IndexOutOfBoundsException(partitionNum);
                    }
                    ((List)crossvalSplit.get(partitionNum)).add(doc);
                    continue;
                }
                catch (NumberFormatException e) {
                    throw new IllegalArgumentException("The split mapping file at " + splitMapping.getAbsolutePath() + " defines an illegal split index: " + split);
                }
                catch (IndexOutOfBoundsException e) {
                    throw new IllegalArgumentException("The split mapping file at " + splitMapping.getAbsolutePath() + " defines an illegal split index: " + split);
                }
            }
            throw new IllegalStateException("The stored data split at " + splitMapping.getAbsolutePath() + " is not compatible to the current data. It does not have an entry for document ID " + doc.getId() + ". A common cause of this error is the evaluation of a corpus that is not configured in the active corpora of the optimization route.");
        }
        return new ImmutablePair<List<List<GeneDocument>>, List<GeneDocument>>(crossvalSplit, devset);
    }

    protected void saveDataSplit(List<List<GeneDocument>> trainTestSplit, List<GeneDocument> devSplit, File splitMapping) {
        int count = 0;
        try (BufferedWriter bw = FileUtilities.getWriterToFile(splitMapping);){
            for (int i = 0; i < trainTestSplit.size(); ++i) {
                List<GeneDocument> geneDocuments = trainTestSplit.get(i);
                for (int j = 0; j < geneDocuments.size(); ++j) {
                    String docId = geneDocuments.get(j).getId();
                    bw.write(String.join((CharSequence)"\t", docId, String.valueOf(i)));
                    bw.newLine();
                    ++count;
                }
            }
            for (GeneDocument geneDocument : devSplit) {
                String docId = geneDocument.getId();
                bw.write(String.join((CharSequence)"\t", docId, "dev"));
                bw.newLine();
                ++count;
            }
            System.out.println("Stored documents: " + count);
        }
        catch (IOException e) {
            throw new GeneExpRuntimeException(e);
        }
    }

    protected Pair<List<List<GeneDocument>>, List<GeneDocument>> getTrainDevSplit(List<GeneDocument> allTrainingData, File splitMapping) {
        this.log.info("Partitioning {} documents into {} cross validation partitions and a development set with a sampling frequency of {}.", allTrainingData.size(), this.getNumSplits(), this.getDevSamplingFrequency());
        if (splitMapping.exists()) {
            return this.loadDataSplits(allTrainingData, splitMapping);
        }
        List<Object> devset = new ArrayList();
        List<Object> trainingWODev = new ArrayList();
        if (this.getDevSamplingFrequency() > 0) {
            this.log.info("Using 1/{} of the data as dev set.", (Object)this.getDevSamplingFrequency());
            for (int i2 = 0; i2 < allTrainingData.size(); ++i2) {
                GeneDocument document = allTrainingData.get(i2);
                if (i2 % this.getDevSamplingFrequency() == 0) {
                    devset.add(document);
                    continue;
                }
                trainingWODev.add(document);
            }
        } else {
            this.log.info("Dev sampling frequency is set to {}. Creating {} cross-validation splits without a dev set.", (Object)this.getDevSamplingFrequency(), (Object)this.getNumSplits());
            trainingWODev = allTrainingData;
            devset = Collections.emptyList();
        }
        ArrayList<List<GeneDocument>> crossvalSplit = new ArrayList<List<GeneDocument>>();
        IntStream.range(0, this.getNumSplits()).forEach(i -> crossvalSplit.add(new ArrayList()));
        for (int i3 = 0; i3 < trainingWODev.size(); ++i3) {
            GeneDocument document = (GeneDocument)trainingWODev.get(i3);
            int index = i3 % this.getNumSplits();
            ((List)crossvalSplit.get(index)).add(document);
        }
        this.saveDataSplit(crossvalSplit, devset, splitMapping);
        return new ImmutablePair<List<List<GeneDocument>>, List<GeneDocument>>(crossvalSplit, devset);
    }

    protected List<GeneDocument> getTrainingData4Budget4Instance(HpoInstance si, boolean trainWithDev, boolean useAllActiveCorporaForTraining, int budget, int maximumBudget) {
        this.log.debug("Obtaining data for instance {} with budget {}/{}", si, budget, maximumBudget);
        List<GeneDocument> corpus = this.getTrainingData(si, trainWithDev, useAllActiveCorporaForTraining);
        if (budget == Integer.MAX_VALUE) {
            return corpus;
        }
        return this.getDocuments4Budget(corpus, budget, maximumBudget);
    }

    protected List<GeneDocument> getTrainingData(HpoInstance si, boolean trainWithDev, boolean useAllActiveCorporaForTraining) {
        List<GeneDocument> trainPartition;
        if (si.getSplitType() == SplitType.DEV && !si.isMergeCorpora()) {
            trainPartition = this.getAllCorporaTrainingDocuments4Instance(si);
            if (trainWithDev) {
                ArrayList<GeneDocument> trainDev = new ArrayList<GeneDocument>(trainPartition);
                trainDev.addAll(this.getDocuments4Instance(si));
                this.log.debug("Training with DEV data. Train size is {}, DEV size is {}, total: {}", trainPartition.size(), trainDev.size() - trainPartition.size(), trainDev.size());
                trainPartition = trainDev;
            }
        } else if (si.getSplitType() == SplitType.DEV) {
            trainPartition = this.getDocuments4Instance(si.getCorpus(), si.getSubcorpus(), SplitType.TRAIN, si.isMergeCorpora(), -1);
            if (trainWithDev) {
                ArrayList<GeneDocument> trainDev = new ArrayList<GeneDocument>(trainPartition);
                trainDev.addAll(this.getDocuments4Instance(si));
                this.log.debug("Training with DEV data. Train size is {}, DEV size is {}, total: {}", trainPartition.size(), trainDev.size() - trainPartition.size(), trainDev.size());
                trainPartition = trainDev;
            }
        } else {
            trainPartition = useAllActiveCorporaForTraining && !si.isMergeCorpora() ? this.getAllCorporaTrainingDocuments4Instance(si) : this.getDocuments4Instance(si.getCorpus(), si.getSubcorpus(), SplitType.TRAINSPLIT, si.isMergeCorpora(), si.getCrossvalRound());
        }
        return trainPartition;
    }

    protected abstract Metric getDefaultMetric();

    protected abstract String calculateScore(HpoInstance var1, Parameters var2, int var3, int var4, int var5, int var6, Metric var7, int var8);

    protected AlphabetCarrying train(InstanceList trainingInstances, Parameters parameterMap, String PREFIX, int seed) {
        Serializable normalizationValues;
        AlphabetCarrying classifier = null;
        String algorithmType = (String)parameterMap.get(Configuration.dot(PREFIX, "algorithm"));
        this.log.info("Got data alphabet of size {} for training", (Object)trainingInstances.getAlphabet().size());
        if (parameterMap.getBoolean(Configuration.dot(PREFIX, "standardize_features"))) {
            this.log.info("Performing train feature standardization (Z-score normalization)");
            normalizationValues = FeatureUtils.standardizeFeatures(trainingInstances);
            parameterMap.put(Configuration.dot(PREFIX, "standardization_values"), normalizationValues);
            this.log.info("Got standardization parameters of length {} (means), {} (stdevs)", (Object)normalizationValues.means.length, (Object)normalizationValues.stdDeviations.length);
        }
        if (parameterMap.getBoolean(Configuration.dot(PREFIX, "minmax_scale_features"))) {
            this.log.info("Performing train feature min-max scaling (potentially on the already Z-score normalized data).");
            normalizationValues = FeatureUtils.scaleFeatures(trainingInstances);
            parameterMap.put(Configuration.dot(PREFIX, "minmax_scaling_values"), normalizationValues);
            this.log.info("Got scaling parameters of length {} (max vals), {} (min vals)", (Object)((MinMaxScalingStats)normalizationValues).maxValues.length, (Object)((MinMaxScalingStats)normalizationValues).minValues.length);
        }
        if (!trainingInstances.isEmpty() && this.log.isDebugEnabled()) {
            this.log.debug("Example feature vector after potential preprocessing of gene {}: [focus tax {}] {}", ((Instance)trainingInstances.get(0)).getProperty("gm"), ((Instance)trainingInstances.get(0)).getSource(), ((FeatureVector)((Instance)trainingInstances.get(0)).getData()).toString(true));
        }
        if (algorithmType.equals("maxent")) {
            MaxEntTrainer maxEntTrainer = new MaxEntTrainer();
            this.log.info("Training maximum entropy model");
            classifier = maxEntTrainer.train(trainingInstances);
            parameterMap.put(Configuration.dot(PREFIX, "classifier"), classifier);
            this.log.info("Finished maximum entropy training.");
        } else if (algorithmType.equals("svm")) {
            SVMTrainOptions svmTrainOptions = new SVMTrainOptions();
            svmTrainOptions.svmType = Integer.parseInt((String)parameterMap.get(Configuration.dot(PREFIX, "svm.type")));
            if (parameterMap.containsKey(Configuration.dot(PREFIX, "svm.c"))) {
                svmTrainOptions.C = Double.parseDouble((String)parameterMap.get(Configuration.dot(PREFIX, "svm.c")));
            }
            if (parameterMap.containsKey(Configuration.dot(PREFIX, "svm.kerneltype"))) {
                svmTrainOptions.kernelType = Integer.parseInt((String)parameterMap.get(Configuration.dot(PREFIX, "svm.kerneltype")));
            }
            if (parameterMap.containsKey(Configuration.dot(PREFIX, "svm.coef0"))) {
                svmTrainOptions.coef0 = Double.parseDouble((String)parameterMap.get(Configuration.dot(PREFIX, "svm.coef0")));
            }
            if (parameterMap.containsKey(Configuration.dot(PREFIX, "svm.gamma"))) {
                svmTrainOptions.svmGamma = Double.parseDouble((String)parameterMap.get(Configuration.dot(PREFIX, "svm.gamma")));
            }
            if (parameterMap.containsKey(Configuration.dot(PREFIX, "svm.degree"))) {
                svmTrainOptions.svmDegree = Integer.parseInt((String)parameterMap.get(Configuration.dot(PREFIX, "svm.degree")));
            }
            svmTrainOptions.probability = true;
            List positiveInstances = trainingInstances.stream().filter(i -> ((Label)i.getTarget()).getEntry().equals(Float.valueOf(1.0f))).collect(Collectors.toList());
            long numPositive = positiveInstances.size();
            long numNegative = (long)trainingInstances.size() - numPositive;
            boolean moreNegative = numNegative > numPositive;
            double ratio = moreNegative ? (double)numPositive / (double)numNegative : (double)numNegative / (double)numPositive;
            Alphabet targetAlphabet = trainingInstances.getTargetAlphabet();
            int posIndex = targetAlphabet.lookupIndex(Float.valueOf(1.0f));
            int negIndex = targetAlphabet.lookupIndex(Float.valueOf(0.0f));
            if (moreNegative) {
                svmTrainOptions.addClassWeight(posIndex, ratio);
                svmTrainOptions.addClassWeight(negIndex, 1.0 - ratio);
            } else {
                svmTrainOptions.addClassWeight(posIndex, 1.0 - ratio);
                svmTrainOptions.addClassWeight(negIndex, ratio);
            }
            SVMClassifier svmClassifier = new SVMClassifier();
            this.log.info("Training SVM with the following options: {}", (Object)svmTrainOptions);
            svmClassifier.train(trainingInstances, svmTrainOptions);
            parameterMap.put(Configuration.dot(PREFIX, "classifier"), svmClassifier);
            classifier = svmClassifier;
            this.log.info("Finished SVM training.");
        } else if (algorithmType.equals("ltr")) {
            RANKER_TYPE rankerType = RANKER_TYPE.valueOf((String)parameterMap.get(Configuration.dot(PREFIX, "ltr.algorithm")));
            METRIC metric = METRIC.valueOf((String)parameterMap.get(Configuration.dot(PREFIX, "ltr.metric")));
            int k = Integer.parseInt((String)parameterMap.get(Configuration.dot(PREFIX, "ltr.k")));
            RankLibRanker ranker = new RankLibRanker(rankerType, null, metric, k, null);
            this.log.info("Training {} model.", (Object)rankerType);
            ranker.train(trainingInstances, true, 0.8f, seed);
            parameterMap.put(Configuration.dot(PREFIX, "ranker"), ranker);
            classifier = ranker;
            this.log.info("Finished training {} model.", (Object)rankerType);
        }
        return classifier;
    }

    protected HpoInstance parseInstanceName(String instanceName, String instanceInfo) {
        SplitType splitType;
        String splitTypeString;
        String subcorpus;
        String corpus;
        boolean mergeActiveCorpora;
        String[] split = instanceName.split("-");
        int crossvalRound = -1;
        boolean bl = mergeActiveCorpora = split.length > 1 && (split[1].equals("merged") || split[2].equals("merged"));
        if (split.length == 5 && split[2].equals("merged")) {
            corpus = split[0];
            subcorpus = split[1];
            splitTypeString = split[3];
            crossvalRound = Integer.parseInt(split[4]);
        } else if (split.length > 2) {
            corpus = split[0];
            subcorpus = split[1];
            splitTypeString = split[2];
            if (split.length > 3) {
                try {
                    crossvalRound = Integer.parseInt(split[3]);
                }
                catch (NumberFormatException e) {
                    instanceInfo = split[3];
                }
            }
        } else if (split.length == 1) {
            corpus = "allactive";
            subcorpus = "dev";
            splitTypeString = instanceName;
        } else {
            throw new IllegalArgumentException("Illegal instance name: " + instanceName);
        }
        switch (splitTypeString) {
            case "testsplit": {
                splitType = SplitType.TESTSPLIT;
                break;
            }
            case "trainsplit": {
                splitType = SplitType.TRAINSPLIT;
                break;
            }
            case "dev": {
                splitType = SplitType.DEV;
                break;
            }
            case "train": {
                splitType = SplitType.TRAIN;
                break;
            }
            default: {
                throw new IllegalArgumentException("Illegal split type '" + splitTypeString + "'.");
            }
        }
        return new HpoInstance(corpus, subcorpus, mergeActiveCorpora, splitType, crossvalRound, instanceInfo);
    }

    protected List<String> getMachineLearningPrefixes(Parameters parameters) {
        return parameters.keySet().stream().map(ML_PREFIX_PATTERN::matcher).filter(Matcher::matches).map(m3 -> m3.group(1)).filter(prefix -> KNOWN_ML_ALGORITHMS.contains(parameters.getOrDefault((Object)Configuration.dot(prefix, "algorithm"), ""))).distinct().collect(Collectors.toList());
    }

    public static enum Metric {
        RECALL,
        MAX_RECALL,
        PRECISION,
        F,
        NDCG,
        P1,
        MAX_REC_10,
        RECALL_REJECTION,
        PRECISION_REJECTION,
        F_REJECTION;

    }
}

