/*
 * Decompiled with CFR 0.152.
 */
package de.julielab.speciesassignment.services;

import de.julielab.geneexpbase.configuration.Parameters;
import de.julielab.geneexpbase.genemodel.GeneDocument;
import de.julielab.geneexpbase.genemodel.SpeciesCandidates;
import de.julielab.geneexpbase.genemodel.SpeciesMention;
import de.julielab.java.utilities.FileUtilities;
import de.julielab.java.utilities.IOStreamUtilities;
import de.julielab.speciesassignment.Configuration;
import de.julielab.speciesassignment.spi.SpeciesDocumentScoringService;
import de.julielab.speciesassignment.spi.SynonymSpeciesCooccurrenceService;
import java.io.BufferedReader;
import java.io.IOException;
import java.util.Collection;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import javax.inject.Inject;
import org.apache.commons.lang3.tuple.Pair;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class SpeciesDocumentScoringServiceImpl
implements SpeciesDocumentScoringService {
    public static final String TAX_FREQUENCIES_GNORM_PLUS = "taxonomy_freq_gnormplus.txt";
    public static final String TAX_FREQUENCIES_GENO_PUBMED = "genetaxstats-pubmed.tsv.gz";
    public static final String TAX_FREQUENCIES_GENO_PMC = "genetaxstats-pmc.tsv.gz";
    public static final String TAX_FREQUENCIES_GENO_BOTH = "genetaxstats-both";
    private static final Logger log = LoggerFactory.getLogger(SpeciesDocumentScoringServiceImpl.class);
    private Set<String> humanIndicatingVirusTaxIds;
    private Map<String, Integer> taxFrequencyMap;
    private final SynonymSpeciesCooccurrenceService synonymSpeciesCooccurrenceService;
    private static final AtomicInteger numInstances = new AtomicInteger(0);

    @Inject
    public SpeciesDocumentScoringServiceImpl(String taxFrequencyFileLocation, SynonymSpeciesCooccurrenceService synonymSpeciesCooccurrenceService) throws IOException {
        this.synonymSpeciesCooccurrenceService = synonymSpeciesCooccurrenceService;
        if (taxFrequencyFileLocation.equals(TAX_FREQUENCIES_GENO_BOTH)) {
            this.readGeNoTaxFrequencyFile(TAX_FREQUENCIES_GENO_PUBMED);
            this.readGeNoTaxFrequencyFile(TAX_FREQUENCIES_GENO_PMC);
        } else if (taxFrequencyFileLocation.equals(TAX_FREQUENCIES_GENO_PUBMED) || taxFrequencyFileLocation.equals(TAX_FREQUENCIES_GENO_PMC)) {
            this.readGeNoTaxFrequencyFile(taxFrequencyFileLocation);
        } else {
            this.readTwoColumnTaxFrequencyFile(taxFrequencyFileLocation);
        }
        this.readHumanIndicatingVirusTaxIds("SP_Virus2HumanList.txt");
        assert (numInstances.incrementAndGet() == 1) : "There is more than one instance of " + SpeciesDocumentScoringServiceImpl.class.getCanonicalName() + " but it should be a singleton.";
    }

    @Override
    public boolean humanIndicatingVirusTaxIdsContain(String taxId) {
        return this.humanIndicatingVirusTaxIds.contains(taxId);
    }

    private void readHumanIndicatingVirusTaxIds(String location) throws IOException {
        if (this.humanIndicatingVirusTaxIds == null) {
            try (BufferedReader br = IOStreamUtilities.getReaderFromInputStream(FileUtilities.findResource(location));){
                this.humanIndicatingVirusTaxIds = br.lines().collect(Collectors.toSet());
            }
            catch (IOException | NullPointerException e) {
                log.error("Could not read human-indicating virus species taxonomy IDs from location {}", (Object)location, (Object)e);
                throw e;
            }
        }
    }

    private void readTwoColumnTaxFrequencyFile(String location) throws IOException {
        if (this.taxFrequencyMap == null) {
            try (BufferedReader br = IOStreamUtilities.getReaderFromInputStream(FileUtilities.findResource(location));){
                this.taxFrequencyMap = br.lines().map(s2 -> s2.split("\\s+")).collect(Collectors.toMap(s2 -> s2[0].intern(), s2 -> Integer.parseInt(s2[1]), (count1, count2) -> count1 + count2));
            }
            catch (IOException | NullPointerException e) {
                log.error("Could not read two-column tax frequency file from location {}", (Object)location, (Object)e);
                throw e;
            }
        }
    }

    private void readGeNoTaxFrequencyFile(String location) throws IOException {
        if (this.taxFrequencyMap == null) {
            try (BufferedReader br = IOStreamUtilities.getReaderFromInputStream(FileUtilities.findResource(location));){
                this.taxFrequencyMap = br.lines().map(s2 -> s2.split("\\t")).collect(Collectors.toMap(s2 -> s2[1].intern(), s2 -> Integer.parseInt(s2[4]), (count1, count2) -> count1 + count2));
            }
        }
    }

    @Override
    public int getTaxFrequency(String taxId) {
        return this.taxFrequencyMap.getOrDefault(taxId, 0);
    }

    @Override
    public Map<String, Double> computeTaxDocScores(GeneDocument document, Parameters parameters) {
        Optional<Pair<String, Double>> bestDocumentLevelAPrioriTaxId;
        boolean useSynonymLevelStatistics;
        double frequencyNormalizationFactor = parameters.getDouble(de.julielab.geneexpbase.configuration.Configuration.dot("species_assignment", "tax_frequency_norm"));
        double titleWeight = parameters.getDouble(de.julielab.geneexpbase.configuration.Configuration.dot("species_assignment", "weights.title"));
        double textWeight = parameters.getDouble(de.julielab.geneexpbase.configuration.Configuration.dot("species_assignment", "weights.text"));
        SpeciesCandidates speciesMentions = document.getSpecies();
        Collection titleSpeciesMentions = speciesMentions.getTitleCandidates().values();
        Collection textSpeciesMentions = speciesMentions.getTextCandidates().values();
        titleSpeciesMentions.forEach(textSpeciesMentions::remove);
        Function<String, String> virusToHumanFunction = taxId -> this.humanIndicatingVirusTaxIdsContain((String)taxId) ? "9606" : taxId;
        Map<String, Double> taxDocScores = Stream.concat(titleSpeciesMentions.stream(), textSpeciesMentions.stream()).map(SpeciesMention::getTaxId).map(virusToHumanFunction).distinct().collect(Collectors.toMap(Function.identity(), t -> (double)this.getTaxFrequency((String)t) / frequencyNormalizationFactor));
        titleSpeciesMentions.stream().map(SpeciesMention::getTaxId).map(virusToHumanFunction).forEach(taxId -> taxDocScores.merge((String)taxId, titleWeight, Double::sum));
        textSpeciesMentions.stream().map(SpeciesMention::getTaxId).map(virusToHumanFunction).forEach(taxId -> taxDocScores.merge((String)taxId, textWeight, Double::sum));
        if (taxDocScores.isEmpty() && (useSynonymLevelStatistics = parameters.getBoolean(Configuration.PARAM_SPECIES_USE_SYNONYM_STATISTICS_WHEN_NO_SPECIES_MENTIONS)) && (bestDocumentLevelAPrioriTaxId = this.synonymSpeciesCooccurrenceService.getBestDocumentLevelAPrioriTaxId(document, null, parameters)).isPresent()) {
            taxDocScores.put(bestDocumentLevelAPrioriTaxId.get().getLeft(), bestDocumentLevelAPrioriTaxId.get().getRight());
        }
        if (taxDocScores.isEmpty()) {
            taxDocScores.put("9606", 1.0);
        }
        return taxDocScores;
    }
}

