/*
 * Decompiled with CFR 0.152.
 */
package de.julielab.speciesassignment.services;

import com.google.common.collect.HashMultimap;
import com.google.common.collect.Multimap;
import com.google.common.collect.TreeMultimap;
import de.julielab.geneexpbase.candidateretrieval.SynHit;
import de.julielab.geneexpbase.configuration.Parameters;
import de.julielab.geneexpbase.genemodel.GeneDocument;
import de.julielab.geneexpbase.genemodel.GeneMention;
import de.julielab.geneexpbase.genemodel.GeneSpeciesOccurrence;
import de.julielab.geneexpbase.genemodel.MentionMappingResult;
import de.julielab.geneexpbase.genemodel.MeshHeading;
import de.julielab.geneexpbase.genemodel.SpeciesCandidates;
import de.julielab.geneexpbase.genemodel.SpeciesMention;
import de.julielab.java.utilities.FileUtilities;
import de.julielab.java.utilities.IOStreamUtilities;
import de.julielab.java.utilities.spanutils.OffsetMap;
import de.julielab.speciesassignment.Configuration;
import java.io.IOException;
import java.io.InputStream;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.EnumSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.NavigableMap;
import java.util.NavigableSet;
import java.util.Set;
import java.util.SortedMap;
import java.util.function.BiConsumer;
import java.util.stream.Collectors;
import org.apache.commons.lang3.Range;
import org.apache.commons.lang3.StringUtils;
import org.jetbrains.annotations.Nullable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class SpeciesHintSetter {
    private static final Logger log = LoggerFactory.getLogger(SpeciesHintSetter.class);
    private static final String defaultSpecies = "9606";
    private static Multimap<String, String> prefix2TaxId;
    private static Set<String> speciesPrefixExceptions;
    private static Map<String, List<String>> meshHeadings2TaxId;

    public static String getDefaultSpecies() {
        return defaultSpecies;
    }

    public static List<String> getMeshHeadings2TaxId(String heading) {
        return meshHeadings2TaxId.get(heading);
    }

    public static void setSpeciesMeshHeadings(GeneDocument document) {
        for (MeshHeading heading : document.getMeshHeadings()) {
            String[] split;
            String name = heading.getHeading();
            for (String s2 : split = name.split(",\\s+")) {
                List<String> taxIds = SpeciesHintSetter.getMeshHeadings2TaxId(s2.trim());
                if (taxIds == null) continue;
                for (String taxId : taxIds) {
                    heading.addTaxonomyId(taxId);
                }
            }
        }
        document.addState(GeneDocument.State.MESH_TAX_IDS_ASSIGNED);
    }

    public static Multimap<String, GeneSpeciesOccurrence> setSpeciesHints(GeneMention gm, Parameters parameterMap) {
        MentionMappingResult mmr;
        Range<Integer> firstSentence;
        OffsetMap<SpeciesMention> titleCandidates;
        Range<Integer> previousSentence;
        GeneDocument document = gm.getGeneDocument();
        document.expectState(EnumSet.of(GeneDocument.State.SPECIES_MENTIONS_SET, GeneDocument.State.MESH_TAX_IDS_ASSIGNED));
        OffsetMap<String> chunks = document.getChunks();
        NavigableSet<Range<Integer>> sentences = document.getSentences();
        SpeciesCandidates species = document.getSpecies();
        Collection<MeshHeading> meshHeadings = document.getMeshHeadings();
        String documentTitle = document.getDocumentTitle();
        Range<Integer> geneOffsets = gm.getOffsets();
        Range<Integer> sentence = document.getOverlappingSentence(geneOffsets);
        NavigableMap<Range<Integer>, String> sentenceChunks = chunks.restrictTo(sentence);
        TreeMultimap<String, GeneSpeciesOccurrence> mentions = TreeMultimap.create();
        OffsetMap<SpeciesMention> candidates = species.getAllMentionCandidates();
        List<String> meshTaxIds = meshHeadings != null ? meshHeadings.stream().map(MeshHeading::getTaxonomyIds).flatMap(Collection::stream).collect(Collectors.toList()) : Collections.emptyList();
        Set taxIdsInDocument = candidates.values().stream().map(SpeciesMention::getTaxId).collect(Collectors.toSet());
        taxIdsInDocument.addAll(meshTaxIds);
        NavigableMap<Range<Integer>, SpeciesMention> sentenceSpecies = candidates.restrictTo(sentence);
        if (sentence.containsRange(geneOffsets)) {
            mentions.putAll(SpeciesHintSetter.speciesInNounPhrase(document, gm, sentenceSpecies, sentenceChunks));
            SpeciesHintSetter.speciesInSentenceExcludePhrases(mentions, candidates, chunks, sentence, gm, parameterMap);
        }
        if (null == sentence || (previousSentence = sentences.lower(sentence)) != null) {
            // empty if block
        }
        if (!(titleCandidates = species.getTitleCandidates()).isEmpty()) {
            for (SpeciesMention speciesMention : titleCandidates.values()) {
                mentions.put(speciesMention.getTaxId(), GeneSpeciesOccurrence.TITLE);
            }
        }
        if ((firstSentence = sentences.floor(Range.between(documentTitle.length() + 1, documentTitle.length() + 1))) != null && firstSentence.equals(sentences.first())) {
            firstSentence = sentences.higher(firstSentence);
        }
        if (firstSentence != null) {
            Iterator<String> speciesInFirstSentence = SpeciesHintSetter.speciesInSentence(candidates, firstSentence, GeneSpeciesOccurrence.FIRST);
            mentions.putAll((Multimap<String, GeneSpeciesOccurrence>)((Object)speciesInFirstSentence));
        }
        if (!candidates.isEmpty()) {
            for (SpeciesMention speciesMention : candidates.values()) {
                mentions.put(speciesMention.getTaxId(), GeneSpeciesOccurrence.ANYWHERE);
            }
        }
        if (meshHeadings != null) {
            for (String string : meshTaxIds) {
                mentions.put(string, GeneSpeciesOccurrence.MESH);
            }
        } else {
            for (String string : species.getMeshCandidates()) {
                mentions.put(string, GeneSpeciesOccurrence.MESH);
            }
        }
        if (prefix2TaxId != null) {
            boolean bl;
            String prefix = SpeciesHintSetter.getSpeciesPrefix(gm);
            boolean bl2 = bl = prefix != null;
            if (bl && !speciesPrefixExceptions.contains(gm.getText().toLowerCase())) {
                Collection<String> taxIds = SpeciesHintSetter.getTaxIdsForPrefix(prefix);
                for (String taxId : taxIds) {
                    if (!taxIdsInDocument.contains(taxId)) continue;
                    mentions.put(taxId, GeneSpeciesOccurrence.SPECIES_PREFIX);
                    String nameWOPrefix = gm.getText().startsWith(prefix) ? gm.getText().substring(prefix.length()) : gm.getText();
                    gm.setText(nameWOPrefix);
                }
            }
        }
        if (mentions.isEmpty() && !StringUtils.isBlank(defaultSpecies)) {
            mentions.put(defaultSpecies, GeneSpeciesOccurrence.DEFAULT);
        }
        if ((mmr = gm.getMentionMappingResult()) != null && mmr.candidatesNoTaxRestriction != null && !mmr.candidatesNoTaxRestriction.isEmpty()) {
            SynHit synHit = mmr.candidatesNoTaxRestriction.get(0);
            double bestScore = synHit.getLuceneScore();
            mmr.candidatesNoTaxRestriction.stream().filter(sh -> (double)sh.getLuceneScore() - bestScore < 0.001 || sh.isExactMatch()).forEach(hit -> mentions.put(hit.getTaxId(), GeneSpeciesOccurrence.HIGHEST_RANKED_GENE_CANDIDATE));
        }
        gm.setTaxonomyOcurrences(mentions);
        return mentions;
    }

    public static Collection<String> getTaxIdsForPrefix(String prefix) {
        return prefix2TaxId.get(prefix);
    }

    @Nullable
    public static String getSpeciesPrefix(GeneMention gm) {
        boolean foundPrefixInMap;
        String prefix = gm.getText().length() > 3 ? gm.getText().substring(0, 3) : "";
        boolean bl = foundPrefixInMap = prefix2TaxId.containsKey(prefix) && gm.getText().length() > 3 && Character.isUpperCase(gm.getText().charAt(3));
        if (!foundPrefixInMap) {
            prefix = gm.getText().length() > 2 ? gm.getText().substring(0, 2) : "";
            boolean bl2 = foundPrefixInMap = prefix2TaxId.containsKey(prefix) && gm.getText().length() > 3 && Character.isUpperCase(gm.getText().charAt(2));
        }
        if (!foundPrefixInMap) {
            try {
                prefix = String.valueOf(gm.getText().charAt(0));
            }
            catch (Exception e) {
                log.error("Could not get first char of gene text {}", (Object)gm, (Object)e);
                throw e;
            }
            boolean bl3 = foundPrefixInMap = prefix2TaxId.containsKey(prefix) && gm.getText().length() > 2 && Character.isUpperCase(gm.getText().charAt(1));
            if (!foundPrefixInMap) {
                prefix = null;
            }
        }
        return prefix;
    }

    private static Multimap<String, GeneSpeciesOccurrence> speciesInNounPhrase(GeneDocument document, GeneMention gm, NavigableMap<Range<Integer>, SpeciesMention> sentenceSpecies, NavigableMap<Range<Integer>, String> sentenceChunks) {
        SortedMap<Range<Integer>, SpeciesMention> speciesInMention;
        TreeMultimap<String, GeneSpeciesOccurrence> mentionMap = TreeMultimap.create();
        if (sentenceSpecies.isEmpty()) {
            return mentionMap;
        }
        String docId = document.getId();
        Range<Integer> geneOffsets = gm.getOffsets();
        try {
            speciesInMention = sentenceSpecies.subMap(Range.between(geneOffsets.getMinimum(), geneOffsets.getMinimum()), Range.between(geneOffsets.getMaximum(), geneOffsets.getMaximum()));
        }
        catch (IllegalArgumentException e) {
            log.error("Could not retrieve species mentions in range {} for document {} and source map {}", geneOffsets, docId, sentenceSpecies);
            throw e;
        }
        for (SpeciesMention speciesMention : speciesInMention.values()) {
            mentionMap.put(speciesMention.getTaxId(), GeneSpeciesOccurrence.COMPOUND);
        }
        Range<Integer> enclosingChunk = sentenceChunks.floorKey(geneOffsets);
        if (enclosingChunk != null && enclosingChunk.isOverlappedBy(geneOffsets)) {
            NavigableMap<Range<Integer>, SpeciesMention> mentions = sentenceSpecies.subMap(Range.between(enclosingChunk.getMinimum(), enclosingChunk.getMinimum()), true, Range.between(geneOffsets.getMaximum(), geneOffsets.getMaximum()), true);
            if (!mentions.isEmpty()) {
                for (SpeciesMention s2 : mentions.values()) {
                    mentionMap.put(s2.getTaxId(), GeneSpeciesOccurrence.COMPOUND);
                }
            }
            try {
                mentions = sentenceSpecies.subMap(Range.between(geneOffsets.getMinimum(), geneOffsets.getMinimum()), true, Range.between(enclosingChunk.getMaximum(), enclosingChunk.getMaximum()), true);
            }
            catch (IllegalArgumentException e) {
                log.trace("Cannot get species within a gene NP because of invalid offsets {}-{}. This might be due to missing chunks.", geneOffsets.getMinimum(), enclosingChunk.getMinimum(), e);
            }
            if (!mentions.isEmpty()) {
                for (SpeciesMention s2 : mentions.values()) {
                    mentionMap.put(s2.getTaxId(), GeneSpeciesOccurrence.COMPOUND);
                }
            }
        }
        return mentionMap;
    }

    private static void speciesInSentenceExcludePhrases(Multimap<String, GeneSpeciesOccurrence> occurrenceAssignments, OffsetMap<SpeciesMention> speciesCandidates, OffsetMap<String> chunks, Range<Integer> sentence, GeneMention gm, Parameters parameterMap) {
        OffsetMap<SpeciesMention> allCandidates;
        OffsetMap<SpeciesMention> allSpecies;
        Map.Entry<Range<Integer>, SpeciesMention> nearestPreviousSpeciesMention;
        String taxId;
        Range<Integer> geneOffsets = gm.getOffsets();
        GeneDocument document = gm.getGeneDocument();
        boolean omitSpeciesInPhrasesForSentenceLevel = false;
        try {
            omitSpeciesInPhrasesForSentenceLevel = parameterMap.getBoolean(Configuration.PARAM_SYNONYM_APRIORI_SINGULAR_OMIT_IN_COMPOUND_FOR_SENTENCES);
        }
        catch (Exception e) {
            e.printStackTrace();
        }
        boolean b = omitSpeciesInPhrasesForSentenceLevel;
        BiConsumer<Map, GeneSpeciesOccurrence> consumer = (speciesMap, occurrence) -> {
            for (SpeciesMention s2 : speciesMap.values()) {
                List overlappingChunks = chunks.getOverlapping(s2).entrySet().stream().filter(e -> ((String)e.getValue()).equals("ChunkNP")).collect(Collectors.toList());
                int chunksStart = overlappingChunks.isEmpty() ? 0 : (Integer)((Range)((Map.Entry)overlappingChunks.get(0)).getKey()).getMinimum();
                int chunksEnd = overlappingChunks.isEmpty() ? 0 : (Integer)((Range)((Map.Entry)overlappingChunks.get(overlappingChunks.size() - 1)).getKey()).getMaximum();
                Range<Integer> overlappingOffsets = Range.between(chunksStart, chunksEnd);
                if (document.getOverlappingGenes(overlappingOffsets).findAny().isPresent() && b) continue;
                occurrenceAssignments.put(s2.getTaxId(), (GeneSpeciesOccurrence)((Object)occurrence));
            }
        };
        NavigableMap<Range<Integer>, SpeciesMention> precedingSentenceSpecies = speciesCandidates.restrictTo(Range.between(sentence.getMinimum(), geneOffsets.getMinimum()));
        consumer.accept(precedingSentenceSpecies, GeneSpeciesOccurrence.SENTENCE_PRECED);
        if (parameterMap.getBoolean(Configuration.PARAM_HINT_SUCCEEDING_SPECIES)) {
            NavigableMap<Range<Integer>, SpeciesMention> successiveSentenceSpecies = speciesCandidates.restrictTo(Range.between(geneOffsets.getMaximum(), sentence.getMaximum()));
            for (Map.Entry e : successiveSentenceSpecies.entrySet()) {
                String previousWord;
                Range offsets = (Range)e.getKey();
                taxId = ((SpeciesMention)e.getValue()).getTaxId();
                Map.Entry rangePosTagEntry = document.getPosTags().lowerEntry(offsets);
                String string = previousWord = rangePosTagEntry != null ? document.getCoveredText(rangePosTagEntry.getKey()) : null;
                if (!"in".equals(previousWord) && !"of".equals(previousWord) && !"on".equals(previousWord)) continue;
                occurrenceAssignments.put(taxId, GeneSpeciesOccurrence.SENTENCE_SUCCED);
            }
        }
        Integer speciesBegin = (nearestPreviousSpeciesMention = (allSpecies = (allCandidates = document.getSpecies().getAllMentionCandidates())).getOverlapping(geneOffsets).firstEntry()) != null ? nearestPreviousSpeciesMention.getKey().getMinimum() : 0;
        if (nearestPreviousSpeciesMention == null || speciesBegin > geneOffsets.getMinimum()) {
            nearestPreviousSpeciesMention = document.getNearestPreviousSpeciesMention(geneOffsets, null);
        }
        speciesBegin = nearestPreviousSpeciesMention != null ? nearestPreviousSpeciesMention.getKey().getMinimum() : 0;
        if (nearestPreviousSpeciesMention != null && speciesBegin >= 0 && (speciesBegin < document.getTitleOffsets().getMaximum() || speciesBegin > document.getAbstractOffsets().getMinimum()) && speciesBegin <= geneOffsets.getMinimum() && sentence.containsRange(nearestPreviousSpeciesMention.getKey())) {
            taxId = nearestPreviousSpeciesMention.getValue().getTaxId();
            occurrenceAssignments.put(taxId, GeneSpeciesOccurrence.SENTENCE_NEAREST_PRECEDE);
        }
        if (parameterMap.getBoolean(Configuration.PARAM_HINT_SUCCEEDING_SPECIES)) {
            Map.Entry<Range<Integer>, SpeciesMention> nearestNextSpeciesMention = allCandidates.getOverlapping(geneOffsets).lastEntry();
            if (nearestNextSpeciesMention == null || nearestNextSpeciesMention.getKey().getMaximum() < geneOffsets.getMaximum()) {
                nearestNextSpeciesMention = document.getNearestNextSpeciesMention(geneOffsets, null);
            }
            if (nearestNextSpeciesMention != null && nearestNextSpeciesMention.getKey().getMaximum() >= geneOffsets.getMaximum() && sentence.containsRange(nearestNextSpeciesMention.getKey())) {
                boolean correct;
                String previousWord;
                String taxId2 = nearestNextSpeciesMention.getValue().getTaxId();
                Map.Entry previousPos = document.getPosTags().lowerEntry(nearestNextSpeciesMention.getKey());
                String string = previousWord = previousPos != null ? document.getCoveredText(previousPos.getKey()) : null;
                if (("in".equals(previousWord) || "of".equals(previousWord) || "on".equals(previousWord)) && (correct = gm.getAllGoldTaxonomyIdsAsSet().contains(taxId2))) {
                    occurrenceAssignments.put(taxId2, GeneSpeciesOccurrence.SENTENCE_NEAREST_SUCCEED);
                }
            }
        }
    }

    private static Multimap<String, GeneSpeciesOccurrence> speciesInSentence(OffsetMap<SpeciesMention> speciesCandidates, Range<Integer> sentence, GeneSpeciesOccurrence order) {
        TreeMultimap<String, GeneSpeciesOccurrence> mentionMap = TreeMultimap.create();
        if (speciesCandidates.isEmpty()) {
            return mentionMap;
        }
        NavigableMap<Range<Integer>, SpeciesMention> candidatesInSentence = speciesCandidates.restrictTo(sentence);
        for (SpeciesMention s2 : candidatesInSentence.values()) {
            mentionMap.put(s2.getTaxId(), order);
        }
        return mentionMap;
    }

    private static synchronized void readMeshHeadings2TaxIdMap() {
        if (meshHeadings2TaxId == null) {
            try {
                InputStream descriptorMapping = FileUtilities.findResource("/desc2tax.gz");
                if (descriptorMapping == null) {
                    descriptorMapping = FileUtilities.findResource("/desc2tax");
                }
                meshHeadings2TaxId = IOStreamUtilities.getLinesFromInputStream(descriptorMapping).stream().map(line -> line.split("\t")).collect(Collectors.groupingBy(split -> split[0], Collectors.mapping(split -> split[1], Collectors.toList())));
            }
            catch (IOException e) {
                log.warn("Could not read the mapping from descriptor names to taxonomy IDs at the classpath resource /desc2tax.gz or /desc2tax. Taxonomy ID recognition quality will be decreased.");
            }
        }
    }

    private static synchronized void readSpeciesPrefixes() {
        if (prefix2TaxId == null) {
            try {
                prefix2TaxId = HashMultimap.create();
                IOStreamUtilities.getLinesFromInputStream(SpeciesHintSetter.class.getResourceAsStream("/speciesprefixes.map")).stream().filter(l -> !l.startsWith("#")).map(line -> line.split("\t")).peek(s2 -> {
                    if (((String[])s2).length != 2) {
                        log.warn("Reading species prefix file; line {} does not have exactly two columns.", (Object)Arrays.toString(s2));
                    }
                }).forEach(s2 -> prefix2TaxId.put(s2[1], s2[0]));
            }
            catch (IOException e) {
                log.warn("Could not read the species prefixes map which helps with species disambiguation. Species recognition performance will be somewhat lower. This is not a critical error, execution can continue.", e);
            }
        }
    }

    private static synchronized void readSpeciesPrefixExceptions() {
        if (speciesPrefixExceptions == null) {
            try {
                speciesPrefixExceptions = IOStreamUtilities.getLinesFromInputStream(SpeciesHintSetter.class.getResourceAsStream("/speciesprefixexceptions.txt")).stream().filter(l -> !l.startsWith("#")).collect(Collectors.toSet());
            }
            catch (IOException e) {
                log.warn("Could not read the species prefixes map which helps with species disambiguation. Species recognition performance will be somewhat lower. This is not a critical error, execution can continue.", e);
            }
        }
    }

    static {
        SpeciesHintSetter.readSpeciesPrefixes();
        SpeciesHintSetter.readSpeciesPrefixExceptions();
        SpeciesHintSetter.readMeshHeadings2TaxIdMap();
    }
}

