/*
 * Decompiled with CFR 0.152.
 */
package de.julielab.genemapper.disambig;

import de.julielab.geneexpbase.candidateretrieval.SynHit;
import de.julielab.geneexpbase.configuration.Parameters;
import de.julielab.geneexpbase.genemodel.GeneDocument;
import de.julielab.geneexpbase.genemodel.GeneMention;
import de.julielab.geneexpbase.genemodel.GeneSet;
import de.julielab.geneexpbase.genemodel.GeneSpeciesOccurrence;
import de.julielab.geneexpbase.genemodel.MentionMappingResult;
import de.julielab.genemapper.Configuration;
import de.julielab.java.utilities.JavaStreamUtilities;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.OptionalDouble;
import java.util.Set;
import java.util.function.Function;
import java.util.function.Predicate;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class DypsisGeneSetHomogenizator {
    private static final Logger log = LoggerFactory.getLogger(DypsisGeneSetHomogenizator.class);

    public static void homogenize(GeneDocument document, Parameters parameters) {
        DypsisGeneSetHomogenizator.determineAnchorMentions(document, parameters);
        DypsisGeneSetHomogenizator.homogenizeRejection(document, parameters);
        DypsisGeneSetHomogenizator.homogenizeGenesetIds(document, parameters);
    }

    private static void determineAnchorMentions(GeneDocument document, Parameters parameters) {
        if (!parameters.getString(Configuration.dot((String[])new String[]{"disambiguation", Configuration.PARAM_GS_REJECTION_HOMOGENIZATION_METHOD}), "no_gs_homogenization").equals("no_gs_homogenization") || !parameters.getString(Configuration.dot((String[])new String[]{"disambiguation", Configuration.PARAM_GS_HOMOGENIZATION_METHOD}), "no_gs_homogenization").equals("no_gs_homogenization")) {
            double exactMatchRejectionThreshold = DypsisGeneSetHomogenizator.getExactMatchThreshold(Configuration.dot((String[])new String[]{"disambiguation", "anchoring"}), parameters);
            for (GeneMention gm : document.getNonRejectedGenesIterable()) {
                MentionMappingResult mmr = gm.getMentionMappingResult();
                for (String taxId : mmr.tax2finalRankedCandidates.keySet()) {
                    SynHit resultEntry = (SynHit)((List)mmr.tax2finalRankedCandidates.get(taxId)).get(0);
                    if (resultEntry == MentionMappingResult.REJECTION) continue;
                    double anchoringThreshold = resultEntry.isExactMatch() ? exactMatchRejectionThreshold : DypsisGeneSetHomogenizator.getApproxMatchThreshold(Configuration.dot((String[])new String[]{"disambiguation", "anchoring"}), parameters);
                    if (!(resultEntry.getOverallScore() >= anchoringThreshold)) continue;
                    resultEntry.setAnchor(true);
                }
            }
        }
    }

    private static void homogenizeRejection(GeneDocument document, Parameters parameters) {
        if (parameters.getBoolean(Configuration.dot((String[])new String[]{"disambiguation", "do_low_score_rejection"}), false) || parameters.getBoolean(Configuration.dot((String[])new String[]{"disambiguation", "do_family_rejection"}), false)) {
            String homogenizationKey = Configuration.dot((String[])new String[]{"disambiguation", Configuration.PARAM_GS_REJECTION_HOMOGENIZATION_METHOD});
            String homogenizationMethod = parameters.getString(homogenizationKey);
            block10: for (GeneSet gs : document.getGeneSets()) {
                switch (homogenizationMethod) {
                    case "majority_vote": {
                        DypsisGeneSetHomogenizator.homogenizeGenesetRejectionByMajorityVote(gs, parameters);
                        continue block10;
                    }
                    case "mean_score": {
                        DypsisGeneSetHomogenizator.homogenizeGenesetRejectionByMeanScore(gs, parameters);
                        continue block10;
                    }
                    case "no_gs_homogenization": {
                        continue block10;
                    }
                }
                throw new IllegalArgumentException("Illegal value for " + homogenizationKey + ": " + homogenizationMethod);
            }
        }
    }

    private static void homogenizeGenesetRejectionByMeanScore(GeneSet gs, Parameters parameters) {
        if (gs.hasRejectedAndNonRejectedMentions()) {
            double gsRejectionThreshold = parameters.getDouble(Configuration.dot((String[])new String[]{"disambiguation", Configuration.PARAM_THRESHOLD_GS_REJECTION}));
            OptionalDouble average = gs.stream().flatMap(gm -> gm.isRejected() ? gm.getMentionMappingResult().tax2originalCandidates.values().stream().filter(Predicate.not(Collection::isEmpty)).map(list -> (SynHit)list.get(0)) : gm.getMentionMappingResult().tax2finalRankedCandidates.values().stream().map(l -> !l.isEmpty() ? (SynHit)l.get(0) : null).filter(Objects::nonNull).filter(sh -> sh != MentionMappingResult.REJECTION)).mapToDouble(SynHit::getOverallScore).average();
            if (average.isEmpty() || average.getAsDouble() < gsRejectionThreshold) {
                gs.setSetId(List.of(MentionMappingResult.REJECTION));
                gs.setRejectReason(MentionMappingResult.RejectReason.DISAMBIGUATION_BELOW_THRESHOLD);
            } else {
                DypsisGeneSetHomogenizator.unrejectGeneset(gs, parameters);
            }
        }
    }

    private static void unrejectGeneset(GeneSet gs, Parameters parameters) {
        boolean allowCandidateTransfer = parameters.getBoolean(Configuration.dot((String[])new String[]{"disambiguation", Configuration.PARAM_GS_HOMOGENIZATION_ALLOW_CANDIDATE_TRANSFER}), false);
        SynHit transferredCandidate = null;
        List<GeneMention> toBeRemovedFromSet = Collections.emptyList();
        for (GeneMention gm : gs) {
            MentionMappingResult mmr = gm.getMentionMappingResult();
            if (!gm.isRejected()) continue;
            for (String taxId : gm.getTaxonomyIds()) {
                List candidates4tax = mmr.tax2originalCandidates.getOrDefault(taxId, Collections.emptyList());
                if (candidates4tax.isEmpty()) continue;
                mmr.setFinalRankedCandidates(taxId, candidates4tax);
                mmr.removeRejectReason(taxId);
            }
            if (!gm.isRejected() || !allowCandidateTransfer) continue;
            if (transferredCandidate == null) {
                Optional<SynHit> bestOverallCandidate = gs.stream().map(GeneMention::getMentionMappingResult).map(r -> r.tax2finalRankedCandidates.values()).flatMap(Collection::stream).flatMap(Collection::stream).filter(e -> e != MentionMappingResult.REJECTION).sorted(Comparator.comparingDouble(SynHit::getOverallScore).reversed()).findFirst();
                if (bestOverallCandidate.isPresent()) {
                    transferredCandidate = bestOverallCandidate.get();
                } else {
                    throw new IllegalStateException("Could not find any non-rejected mention. This should not be possible at this point.");
                }
            }
            String transferredCandidateTaxId = transferredCandidate.getTaxId();
            boolean doUnreject = false;
            if (gm.getTaxonomyIdsSet().contains(transferredCandidateTaxId)) {
                doUnreject = true;
            } else {
                Collection geneSpeciesOccurrences = gm.getTaxonomyOccurrences().values();
                if (!geneSpeciesOccurrences.contains(GeneSpeciesOccurrence.COMPOUND) && !geneSpeciesOccurrences.contains(GeneSpeciesOccurrence.SPECIES_PREFIX)) {
                    gm.getTaxonomyOccurrences().put((Object)transferredCandidateTaxId, (Object)GeneSpeciesOccurrence.CANDIDATE_TRANSFER);
                    doUnreject = true;
                } else {
                    if (toBeRemovedFromSet.isEmpty()) {
                        toBeRemovedFromSet = new ArrayList<GeneMention>();
                    }
                    toBeRemovedFromSet.add(gm);
                }
            }
            if (!doUnreject) continue;
            gm.getMentionMappingResult().setFinalRankedCandidates(transferredCandidateTaxId, List.of(transferredCandidate));
            gm.getMentionMappingResult().removeRejectReason(transferredCandidateTaxId);
        }
        for (GeneMention gm : toBeRemovedFromSet) {
            gs.remove((Object)gm);
            GeneSet newGs = new GeneSet();
            newGs.add(gm);
            newGs.setSetId(List.of(MentionMappingResult.REJECTION));
            gm.addGeneSet(newGs);
            gs.setRejectReason(MentionMappingResult.RejectReason.FAILED_TO_UNREJECT);
            gm.getGeneDocument().addGeneSet((Collection)newGs);
        }
    }

    private static void homogenizeGenesetRejectionByMajorityVote(GeneSet gs, Parameters parameters) {
        int numRejected = 0;
        int numAccepted = 0;
        for (GeneMention gm : gs) {
            if (gm.isRejected()) {
                ++numRejected;
                continue;
            }
            ++numAccepted;
        }
        if (numRejected > numAccepted) {
            gs.setSetId(List.of(MentionMappingResult.REJECTION));
            gs.setRejectReason(MentionMappingResult.RejectReason.REJECTION_MAJORITY_VOTE);
        } else if (numRejected < numAccepted) {
            DypsisGeneSetHomogenizator.unrejectGeneset(gs, parameters);
        } else {
            DypsisGeneSetHomogenizator.homogenizeGenesetRejectionByMeanScore(gs, parameters);
        }
    }

    private static void homogenizeGenesetIds(GeneDocument document, Parameters parameters) {
        String homogenizationKey = Configuration.dot((String[])new String[]{"disambiguation", Configuration.PARAM_GS_HOMOGENIZATION_METHOD});
        String homogenizationMethod = parameters.getString(homogenizationKey, "no_gs_homogenization");
        if (!homogenizationMethod.equals("no_gs_homogenization")) {
            boolean homogenizeByAbbreviations = parameters.getBoolean(Configuration.dot((String[])new String[]{"disambiguation", Configuration.PARAM_GS_HOMOGENIZATION_BY_ABBREVIATION}));
            boolean homogenizeByLongforms = parameters.getBoolean(Configuration.dot((String[])new String[]{"disambiguation", Configuration.PARAM_GS_HOMOGENIZATION_BY_LONGFORM}));
            assert (!homogenizeByAbbreviations || !homogenizeByLongforms) : "Geneset ID homogenization is activated by abbreviation and by longform which is illegal.";
            block12: for (GeneSet gs : document.getGeneSets()) {
                if (gs.isRejected()) continue;
                DypsisGeneSetHomogenizator.checkRejectionConsistency(gs, parameters);
                DypsisGeneSetHomogenizator.homogenizeGenesetIdsByAnchors(gs);
                if (homogenizeByAbbreviations) {
                    DypsisGeneSetHomogenizator.homogenizeGenesetIdsByAbbreviations(gs);
                } else if (homogenizeByLongforms) {
                    DypsisGeneSetHomogenizator.homogenizeGenesetIdsByLongforms(gs);
                }
                switch (homogenizationMethod) {
                    case "majority_vote": {
                        DypsisGeneSetHomogenizator.homogenizeGenesetIdsByMajorityVote(gs, parameters);
                        continue block12;
                    }
                    case "max_balanced_score": {
                        DypsisGeneSetHomogenizator.homogenizeGenesetIdsByMaxBalancedScore(gs);
                        continue block12;
                    }
                    case "max_balanced_score_sum": {
                        DypsisGeneSetHomogenizator.homogenizeGenesetIdsByMaxBalancedScoreSum(gs);
                        continue block12;
                    }
                    case "no_gs_homogenization": {
                        continue block12;
                    }
                }
                throw new IllegalArgumentException("Illegal value for " + homogenizationKey + ": " + homogenizationMethod);
            }
        }
    }

    private static void homogenizeGenesetIdsByAbbreviations(GeneSet gs) {
        if (gs.getSetId() == null) {
            Optional<GeneMention> anyAbbreviation = gs.stream().filter(GeneMention::isAbbreviation).findAny();
            anyAbbreviation.ifPresent(geneMention -> gs.setSetId(List.of(geneMention.getResultCandidates().max(Comparator.comparingDouble(SynHit::getOverallScore)).get())));
        }
    }

    private static void homogenizeGenesetIdsByLongforms(GeneSet gs) {
        if (gs.getSetId() == null) {
            Optional<GeneMention> anyAbbreviation = gs.stream().filter(GeneMention::isAbbreviationLongForm).findAny();
            anyAbbreviation.ifPresent(geneMention -> gs.setSetId(List.of(geneMention.getResultCandidates().max(Comparator.comparingDouble(SynHit::getOverallScore)).get())));
        }
    }

    private static void homogenizeGenesetIdsByAnchors(GeneSet gs) {
        Map<String, List<SynHit>> anchors = gs.stream().flatMap(gm -> gm.getResultCandidates()).filter(SynHit::isAnchor).collect(Collectors.groupingBy(SynHit::getId));
        if (anchors.size() == 1) {
            anchors.values().stream().flatMap(Collection::stream).findAny().ifPresent(sh -> gs.setSetId(List.of(sh)));
        } else if (anchors.size() > 1) {
            double maxAvg = 0.0;
            String maxId = null;
            for (String id : anchors.keySet()) {
                OptionalDouble average = anchors.get(id).stream().mapToDouble(SynHit::getOverallScore).average();
                if (!average.isPresent() || !(average.getAsDouble() > maxAvg)) continue;
                maxAvg = average.getAsDouble();
                maxId = id;
            }
            if (maxId != null) {
                anchors.get(maxId).stream().findAny().ifPresent(sh -> gs.setSetId(List.of(sh)));
            }
        }
    }

    private static void homogenizeGenesetIdsByMaxBalancedScoreSum(GeneSet gs) {
        if (gs.getSetId() == null || gs.getSetId().isEmpty()) {
            log.trace("Performing max balanced score sum gene set ID homogenization for document {}", (Object)gs.getDocId());
            if (gs.hasContradictingGeneIdMappings()) {
                SynHit maxBalancedScoreSumId = DypsisGeneSetHomogenizator.getMaxBalancedScoreSumId(gs.getResultSynHits());
                gs.setSetId(List.of(maxBalancedScoreSumId));
            }
        }
    }

    private static void homogenizeGenesetIdsByMaxBalancedScore(GeneSet gs) {
        if (gs.getSetId() == null || gs.getSetId().isEmpty()) {
            log.trace("Performing max balanced score gene set ID homogenization for document {}", (Object)gs.getDocId());
            if (gs.hasContradictingGeneIdMappings()) {
                SynHit maxBalancedScoreId = DypsisGeneSetHomogenizator.getMaxBalancedScoreId(gs.getResultSynHits());
                gs.setSetId(List.of(maxBalancedScoreId));
            }
        }
    }

    private static SynHit getMaxBalancedScoreId(Stream<SynHit> synHits) {
        Iterator synIt = synHits.iterator();
        SynHit maxSh = null;
        double maxBalancedScore = Double.NEGATIVE_INFINITY;
        int numSynHits = 0;
        while (synIt.hasNext()) {
            SynHit sh = (SynHit)synIt.next();
            ++numSynHits;
            double balancedScore = sh.getOverallScore();
            if (!(balancedScore > maxBalancedScore)) continue;
            maxBalancedScore = balancedScore;
            maxSh = sh;
        }
        assert (maxSh != null) : "could not find any best SynHit hit with max balanced score. Number of SynHits: " + numSynHits;
        log.trace("Got max balanced score of {} for SynHit with ID {}.", (Object)maxBalancedScore, (Object)maxSh.getId());
        return maxSh;
    }

    private static SynHit getMaxBalancedScoreSumId(Stream<SynHit> synHits) {
        Optional<Map.Entry> highestBalancedScoreSumEntry = synHits.peek(sh -> sh.setCompareType(SynHit.CompareType.ID)).collect(Collectors.toMap(Function.identity(), SynHit::getOverallScore, Double::sum)).entrySet().stream().max(Comparator.comparingDouble(Map.Entry::getValue));
        assert (highestBalancedScoreSumEntry.isPresent());
        Map.Entry highestScoreSumHit = highestBalancedScoreSumEntry.get();
        log.trace("Got max balanced score sum of {} for gene ID ID {}.", highestScoreSumHit.getValue(), (Object)((SynHit)highestScoreSumHit.getKey()).getId());
        return (SynHit)highestScoreSumHit.getKey();
    }

    private static void homogenizeGenesetIdsByMajorityVote(GeneSet gs, Parameters parameters) {
        if (gs.getSetId() == null || gs.getSetId().isEmpty()) {
            log.trace("Performing majority vote gene set ID homogenization for document {}", (Object)gs.getDocId());
            if (gs.hasContradictingGeneIdMappings()) {
                SynHit homogenizedGsId;
                List mostFrequentIds = gs.getResultSynHits().peek(sh -> sh.setCompareType(SynHit.CompareType.ID)).collect(Collectors.toMap(Function.identity(), x -> 1, Integer::sum)).entrySet().stream().sorted(Comparator.comparingInt(Map.Entry::getValue).thenComparingDouble(e -> ((SynHit)e.getKey()).getOverallScore()).reversed()).takeWhile(JavaStreamUtilities.equalsFirstSeenValue(Map.Entry::getValue)).map(Map.Entry::getKey).collect(Collectors.toList());
                if (mostFrequentIds.size() == 1) {
                    homogenizedGsId = (SynHit)mostFrequentIds.get(0);
                } else {
                    Set tiedIds = mostFrequentIds.stream().map(SynHit::getId).collect(Collectors.toSet());
                    Stream<SynHit> synHitsWithTiedIds = gs.getResultSynHits().filter(sh -> tiedIds.contains(sh.getId()));
                    String tieBreakerKey = parameters.getString(Configuration.dot((String[])new String[]{"disambiguation", Configuration.PARAM_GS_MAJORITY_HOMOGENIZATION_TIE_BREAKER_METHOD}));
                    if (log.isTraceEnabled()) {
                        log.trace("Got a tie between {}. Using {} as a tie breaker.", (Object)mostFrequentIds.stream().map(SynHit::getId).collect(Collectors.joining(", ")), (Object)tieBreakerKey);
                    }
                    switch (tieBreakerKey) {
                        case "max_balanced_score": {
                            homogenizedGsId = DypsisGeneSetHomogenizator.getMaxBalancedScoreId(synHitsWithTiedIds);
                            break;
                        }
                        case "max_balanced_score_sum": {
                            homogenizedGsId = DypsisGeneSetHomogenizator.getMaxBalancedScoreSumId(synHitsWithTiedIds);
                            break;
                        }
                        default: {
                            throw new IllegalStateException("Unexpected value: " + tieBreakerKey);
                        }
                    }
                }
                gs.setSetId(List.of(homogenizedGsId));
            }
        }
    }

    private static void checkRejectionConsistency(GeneSet gs, Parameters parameters) {
        if (parameters.getBoolean(Configuration.dot((String[])new String[]{"disambiguation", "do_low_score_rejection"})) || parameters.getBoolean(Configuration.dot((String[])new String[]{"disambiguation", "do_family_rejection"}))) {
            String rejectionHomogenizationMethod = parameters.getString(Configuration.dot((String[])new String[]{"disambiguation", Configuration.PARAM_GS_REJECTION_HOMOGENIZATION_METHOD}));
            if (!gs.isRejected() && gs.hasRejectedAndNonRejectedMentions() && !rejectionHomogenizationMethod.equals("no_gs_homogenization")) {
                throw new IllegalStateException("Unsolved case: A geneset contains rejected and not-rejected items although there was rejection homogenization by " + rejectionHomogenizationMethod + ". Document ID " + gs.getDocId() + ", gene names: " + gs.stream().map(GeneMention::getText).collect(Collectors.joining(", ")));
            }
        }
    }

    private static double getExactMatchThreshold(String prefix, Parameters parameters) {
        return parameters.getDouble(Configuration.dot((String[])new String[]{prefix, "threshold_exact_matches"}));
    }

    private static double getApproxMatchThreshold(String prefix, Parameters parameters) {
        return parameters.getDouble(Configuration.dot((String[])new String[]{prefix, "threshold_approx_matches"}));
    }
}

