/*
 * Decompiled with CFR 0.152.
 */
package de.julielab.speciesassignment;

import com.google.common.collect.Multimap;
import de.julielab.geneexpbase.configuration.Parameters;
import de.julielab.geneexpbase.genemodel.GeneDocument;
import de.julielab.geneexpbase.genemodel.GeneMention;
import de.julielab.geneexpbase.genemodel.GeneSpeciesOccurrence;
import de.julielab.geneexpbase.genemodel.SpeciesCandidates;
import de.julielab.geneexpbase.genemodel.SpeciesMention;
import de.julielab.java.utilities.spanutils.OffsetMap;
import de.julielab.speciesassignment.GeneSpeciesAssigner;
import de.julielab.speciesassignment.SpeciesAssignmentException;
import de.julielab.speciesassignment.services.SpeciesHintSetter;
import de.julielab.speciesassignment.spi.SpeciesAssignmentFilter;
import de.julielab.speciesassignment.spi.SpeciesDocumentScoringService;
import java.util.Collection;
import java.util.Collections;
import java.util.EnumSet;
import java.util.Map;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import javax.inject.Inject;
import org.apache.commons.lang3.Range;

public class GNormPlusGeneSpeciesAssigner
implements GeneSpeciesAssigner {
    private static final boolean EXPLAIN = false;
    private final SpeciesDocumentScoringService speciesDocumentScoringService;
    private final SpeciesAssignmentFilter speciesAssignmentFilter;

    @Inject
    public GNormPlusGeneSpeciesAssigner(SpeciesDocumentScoringService speciesDocumentScoringService, SpeciesAssignmentFilter speciesAssignmentFilter) {
        this.speciesDocumentScoringService = speciesDocumentScoringService;
        this.speciesAssignmentFilter = speciesAssignmentFilter;
    }

    @Override
    public void assign(GeneDocument document, Parameters parameterMap) throws SpeciesAssignmentException {
        SpeciesHintSetter.setSpeciesMeshHeadings(document);
        this.speciesAssignmentFilter.filterSpeciesMentions(document);
        this.setSpeciesHints(parameterMap, document);
        this.speciesAssignmentFilter.filterAssignments(document);
        document.expectState(EnumSet.of(GeneDocument.State.SPECIES_CANDIDATES_ASSIGNED));
        SpeciesHintSetter.setSpeciesMeshHeadings(document);
        Map<String, Double> taxDocScores = this.computeTaxDocScores(document);
        String highestGlobalScoringTax = null;
        for (GeneMention gm : document.getGenesIterable()) {
            Multimap<String, GeneSpeciesOccurrence> taxonomyCandidates = gm.getTaxonomyOccurrences();
            boolean taxIdAssigned = false;
            for (String taxId : taxonomyCandidates.keys()) {
                if (!taxonomyCandidates.get(taxId).contains((Object)GeneSpeciesOccurrence.SPECIES_PREFIX)) continue;
                gm.setTaxonomyIds(Collections.singletonList(taxId));
                taxIdAssigned = true;
            }
            Range<Integer> gmSentence = document.getOverlappingSentence(gm);
            OffsetMap<SpeciesMention> allCandidates = document.getSpecies().getAllMentionCandidates();
            if (gmSentence != null && !taxIdAssigned) {
                OffsetMap<SpeciesMention> allSpecies = allCandidates;
                Map.Entry<Range<Integer>, SpeciesMention> nearestPreviousSpeciesMention = allSpecies.getOverlapping(gm.getOffsets()).firstEntry();
                Integer speciesBegin = nearestPreviousSpeciesMention != null ? nearestPreviousSpeciesMention.getKey().getMinimum() : 0;
                if (nearestPreviousSpeciesMention == null || speciesBegin > gm.getBegin()) {
                    nearestPreviousSpeciesMention = document.getNearestPreviousSpeciesMention(gm.getOffsets(), null);
                }
                speciesBegin = nearestPreviousSpeciesMention != null ? nearestPreviousSpeciesMention.getKey().getMinimum() : 0;
                if (nearestPreviousSpeciesMention != null && speciesBegin > 0 && (speciesBegin < document.getTitleOffsets().getMaximum() || speciesBegin > document.getAbstractOffsets().getMinimum()) && speciesBegin <= gm.getBegin() && gmSentence.containsRange(nearestPreviousSpeciesMention.getKey())) {
                    String taxId = nearestPreviousSpeciesMention.getValue().getTaxId();
                    gm.setTaxonomyIds(Collections.singletonList(taxId));
                    taxIdAssigned = true;
                }
            }
            if (!taxIdAssigned) {
                Map.Entry<Range<Integer>, SpeciesMention> nearestNextSpeciesMention = allCandidates.getOverlapping(gm.getOffsets()).lastEntry();
                if (nearestNextSpeciesMention == null || nearestNextSpeciesMention.getKey().getMaximum() < gm.getEnd()) {
                    nearestNextSpeciesMention = document.getNearestNextSpeciesMention(gm.getOffsets(), null);
                }
                if (nearestNextSpeciesMention != null && nearestNextSpeciesMention.getKey().getMaximum() >= gm.getEnd() && gmSentence.containsRange(nearestNextSpeciesMention.getKey())) {
                    String taxId = nearestNextSpeciesMention.getValue().getTaxId();
                    gm.setTaxonomyIds(Collections.singletonList(taxId));
                    taxIdAssigned = true;
                }
            }
            if (taxIdAssigned || taxDocScores.isEmpty()) continue;
            if (highestGlobalScoringTax == null) {
                double bestScore = 0.0;
                for (String taxId : taxDocScores.keySet()) {
                    Double taxScore = taxDocScores.get(taxId);
                    if (!(taxScore > bestScore)) continue;
                    highestGlobalScoringTax = taxId;
                    bestScore = taxScore;
                }
            }
            gm.setTaxonomyIds(Collections.singletonList(highestGlobalScoringTax));
        }
        document.addState(GeneDocument.State.SPECIES_ASSIGNED_TO_GENES);
    }

    @Override
    public void setSpeciesHints(GeneMention gm, Parameters parameterMap) {
        SpeciesHintSetter.setSpeciesHints(gm, parameterMap);
    }

    private String explain(String tax, String explanation) {
        return String.format("%s:%s", explanation, tax);
    }

    private Map<String, Double> computeTaxDocScores(GeneDocument document) {
        SpeciesCandidates speciesMentions = document.getSpecies();
        Collection titleSpeciesMentions = speciesMentions.getTitleCandidates().values();
        Collection textSpeciesMentions = speciesMentions.getTextCandidates().values();
        titleSpeciesMentions.forEach(textSpeciesMentions::remove);
        Function<String, String> virusToHumanFunction = taxId -> this.speciesDocumentScoringService.humanIndicatingVirusTaxIdsContain((String)taxId) ? "9606" : taxId;
        Map<String, Double> taxDocScores = Stream.concat(titleSpeciesMentions.stream(), textSpeciesMentions.stream()).map(SpeciesMention::getTaxId).map(virusToHumanFunction).distinct().collect(Collectors.toMap(Function.identity(), t -> (double)this.speciesDocumentScoringService.getTaxFrequency((String)t) / 2.0E8));
        titleSpeciesMentions.stream().map(SpeciesMention::getTaxId).map(virusToHumanFunction).forEach(taxId -> taxDocScores.merge((String)taxId, 2.0, Double::sum));
        textSpeciesMentions.stream().map(SpeciesMention::getTaxId).map(virusToHumanFunction).forEach(taxId -> taxDocScores.merge((String)taxId, 1.0, Double::sum));
        if (taxDocScores.isEmpty()) {
            taxDocScores.put("9606", 1.0);
        }
        return taxDocScores;
    }

    @Override
    public void shutdown() {
    }

    @Override
    public void assign(GeneDocument document) throws SpeciesAssignmentException {
        this.assign(document, null);
    }
}

