/*
 * Decompiled with CFR 0.152.
 */
package de.julielab.genemapper.resources.uima;

import com.google.common.collect.HashMultimap;
import com.google.common.collect.Multimap;
import de.julielab.costosys.configuration.FieldConfig;
import de.julielab.costosys.dbconnection.CoStoSysConnection;
import de.julielab.costosys.dbconnection.DataBaseConnector;
import de.julielab.costosys.dbconnection.util.CoStoSysSQLRuntimeException;
import de.julielab.geneexpbase.TermNormalizer;
import de.julielab.java.utilities.FileUtilities;
import de.julielab.java.utilities.IOStreamUtilities;
import de.julielab.jcore.types.ChunkNP;
import de.julielab.jcore.types.EntityMention;
import de.julielab.jcore.types.Gene;
import de.julielab.jcore.types.MeshHeading;
import de.julielab.jcore.types.Organism;
import de.julielab.jcore.types.Sentence;
import de.julielab.jcore.utility.JCoReTools;
import de.julielab.jcore.utility.index.JCoReOverlapAnnotationIndex;
import de.julielab.xml.JulieXMLTools;
import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.sql.PreparedStatement;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Stream;
import org.apache.commons.lang3.tuple.ImmutableTriple;
import org.apache.commons.lang3.tuple.Triple;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.CASException;
import org.apache.uima.cas.FSIterator;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.descriptor.ResourceMetaData;
import org.apache.uima.fit.descriptor.TypeCapability;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.tcas.Annotation;
import org.apache.uima.resource.ResourceInitializationException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

@ResourceMetaData(name="JCoRe Synonym Species Occurrences DB Writer", description="Counts common occurrences of gene synonyms and species mentions and stores the counts in a Postgres database. The idea is to create an a priori distribution of species for each synonym to use for disambiguation. For this purpose, two sources of gene mentions are used: A gazetteer based on the synonym dictionary created by the _makeDictionary.sh script and a higher-precision machine learning-based approach for false positive filtering. The gazetteer gene mentions must have the specific type 'GazetteerGene' to be counted. Apart from the ML-based gene mentions for FP filtering there should not any other gene mentions in the CAS to avoid counting mistakes.")
@TypeCapability(inputs={"de.julielab.jcore.types.Organism", "de.julielab.jcore.types.Gene"})
public class SynonymSpeciesOccurrencesConsumer
extends JCasAnnotator_ImplBase {
    public static final String PARAM_COSTOSYS_FILE = "CostosysConfiguration";
    public static final String PARAM_TABLE_NAME = "TableName";
    private static final Logger log = LoggerFactory.getLogger(SynonymSpeciesOccurrencesConsumer.class);
    private static final Multimap<String, String> desc2TaxId = HashMultimap.create();
    private static final Map<String, String> referenceSpeciesTaxIds = new HashMap<String, String>();
    @ConfigurationParameter(name="CostosysConfiguration", description="Path to the CoStoSys configuration file the specifies the database to write to.")
    private String costosysConfiguration;
    @ConfigurationParameter(name="TableName", mandatory=false, description="Optional. The name of the database table in which the species-genesynonym occurrence counts will be stored. Defaults to 'occurrences'.")
    private String tableName;
    private DataBaseConnector dbc;
    private List<Triple<String, String, OccurrenceScope>> occurrences;
    private final TermNormalizer normalizer = new TermNormalizer();
    private long processed;

    public void initialize(UimaContext aContext) throws ResourceInitializationException {
        block5: {
            this.costosysConfiguration = (String)aContext.getConfigParameterValue(PARAM_COSTOSYS_FILE);
            this.tableName = Optional.ofNullable((String)aContext.getConfigParameterValue(PARAM_TABLE_NAME)).orElse("occurrences");
            try {
                this.dbc = new DataBaseConnector(this.costosysConfiguration);
                this.addOccurrencesTableFieldConfiguration();
                this.dbc.createTable(this.tableName, "occurrences", "Created by " + SynonymSpeciesOccurrencesConsumer.class.getCanonicalName() + " on " + new Date() + ". This table collects counts of common occurrences of gene synonyms and species taxonomy IDs. Its aim is to serve as a source of disambiguation for the jcore-gene-mapper-ae when it is not clear to which species a gene might belong and there are multiple (or even no) candidates to chose from.");
            }
            catch (FileNotFoundException e) {
                log.error("Could not instantiate DatabaseConnector", (Throwable)e);
                throw new ResourceInitializationException((Throwable)e);
            }
            catch (CoStoSysSQLRuntimeException e) {
                log.info("This exception is expected and is no cause for alarm.");
                if (e.getMessage().contains("already exists")) break block5;
                throw new ResourceInitializationException((Throwable)e);
            }
        }
        this.occurrences = new ArrayList<Triple<String, String, OccurrenceScope>>();
        this.readDesc2TaxMap();
        try {
            this.readReferenceMap();
        }
        catch (IOException e) {
            log.error("Could not read the reference species taxonomy ID map", (Throwable)e);
            throw new ResourceInitializationException((Throwable)e);
        }
        this.processed = 0L;
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private void readReferenceMap() throws IOException {
        Map<String, String> map = referenceSpeciesTaxIds;
        synchronized (map) {
            if (!referenceSpeciesTaxIds.isEmpty()) {
                return;
            }
            try (InputStream referenceSpecies = FileUtilities.findResource((String)"/reference_species.txt");){
                if (referenceSpecies == null) {
                    throw new FileNotFoundException("Could not find the reference species mapping file expected on the classpath as /reference_species.txt.");
                }
                try (BufferedReader br = IOStreamUtilities.getReaderFromInputStream((InputStream)referenceSpecies);){
                    br.lines().map(line -> line.split("\t")).forEach(split -> referenceSpeciesTaxIds.put(split[1], split[0]));
                }
            }
        }
    }

    private void addOccurrencesTableFieldConfiguration() {
        ArrayList<Map> columnsDefinition = new ArrayList<Map>();
        columnsDefinition.add(JulieXMLTools.createField((String[])new String[]{"name", "synonym", "type", "text", "retrieve", "true", "primaryKey", "true"}));
        columnsDefinition.add(JulieXMLTools.createField((String[])new String[]{"name", "tax_id", "type", "integer", "retrieve", "true", "primaryKey", "true"}));
        columnsDefinition.add(JulieXMLTools.createField((String[])new String[]{"name", "scope", "type", "text", "retrieve", "true", "primaryKey", "true"}));
        columnsDefinition.add(JulieXMLTools.createField((String[])new String[]{"name", "count", "type", "integer", "retrieve", "true"}));
        FieldConfig occurrencesFieldConfig = new FieldConfig(columnsDefinition, null, "occurrences");
        this.dbc.addFieldConfiguration(occurrencesFieldConfig);
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private void readDesc2TaxMap() throws ResourceInitializationException {
        Multimap<String, String> multimap = desc2TaxId;
        synchronized (multimap) {
            if (!desc2TaxId.isEmpty()) {
                return;
            }
            try {
                InputStream mapping = FileUtilities.findResource((String)"/desc2tax");
                if (mapping == null) {
                    mapping = FileUtilities.findResource((String)"/desc2tax.gz");
                }
                if (mapping == null) {
                    throw new ResourceInitializationException((Throwable)new FileNotFoundException("Could not find the desc2tax file that maps MeSH descriptor names to taxonomy IDs."));
                }
                try (BufferedReader br = IOStreamUtilities.getReaderFromInputStream((InputStream)mapping);){
                    br.lines().map(line -> line.split("\t")).forEach(split -> desc2TaxId.put((Object)split[0].trim(), (Object)split[1].trim().intern()));
                }
            }
            catch (IOException e) {
                log.error("IOException while searching for the descriptor to taxonomy ID mapping file", (Throwable)e);
                throw new ResourceInitializationException((Throwable)e);
            }
        }
    }

    public void process(JCas jCas) throws AnalysisEngineProcessException {
        FSIterator gazGeneIt = jCas.getAnnotationIndex(Gene.type).iterator();
        if (gazGeneIt.hasNext()) {
            Set<String> docTaxIds = this.getAllTaxIdsInDocument(jCas);
            Set<String> meshTaxIds = this.getMeshTaxIds(jCas);
            HashMultimap synTaxPairs = HashMultimap.create();
            HashSet<String> synonyms = new HashSet<String>();
            JCoReOverlapAnnotationIndex sentenceIndex = new JCoReOverlapAnnotationIndex(jCas, Sentence.type);
            JCoReOverlapAnnotationIndex organismIndex = new JCoReOverlapAnnotationIndex(jCas, Organism.type);
            JCoReOverlapAnnotationIndex npIndex = new JCoReOverlapAnnotationIndex(jCas, ChunkNP.type);
            JCoReOverlapAnnotationIndex flairGeneIndex = new JCoReOverlapAnnotationIndex(jCas, Gene.type);
            while (gazGeneIt.hasNext()) {
                Optional sentenceOpt;
                EntityMention gazSynonym = (EntityMention)gazGeneIt.next();
                if (gazSynonym.getSpecificType() == null || !gazSynonym.getSpecificType().equals("GazetteerGene") || !flairGeneIndex.search((Annotation)gazSynonym).stream().anyMatch(gm -> gm.getSpecificType() == null || !gm.getSpecificType().equals("GazetteerGene"))) continue;
                String synonym = this.getSynonym(gazSynonym);
                synonyms.add(synonym);
                int originalSize = this.occurrences.size();
                Optional chunkOpt = npIndex.search((Annotation)gazSynonym).stream().findAny();
                if (chunkOpt.isPresent()) {
                    ChunkNP np = (ChunkNP)chunkOpt.get();
                    Stream organismsInNp = organismIndex.search((Annotation)np).stream();
                    organismsInNp.map(org -> org.getResourceEntryList(0).getEntryId()).flatMap(org -> Stream.of(org, referenceSpeciesTaxIds.get(org))).filter(Objects::nonNull).peek(arg_0 -> SynonymSpeciesOccurrencesConsumer.lambda$process$7((Multimap)synTaxPairs, synonym, arg_0)).forEach(taxId -> this.occurrences.add((Triple<String, String, OccurrenceScope>)new ImmutableTriple((Object)synonym, taxId, (Object)OccurrenceScope.NP)));
                }
                if (this.occurrences.size() == originalSize && (sentenceOpt = sentenceIndex.search((Annotation)gazSynonym).stream().findAny()).isPresent()) {
                    Sentence sentence = (Sentence)sentenceOpt.get();
                    Stream organismsInSentence = organismIndex.search((Annotation)sentence).stream();
                    organismsInSentence.map(org -> org.getResourceEntryList(0).getEntryId()).flatMap(org -> Stream.of(org, referenceSpeciesTaxIds.get(org))).filter(Objects::nonNull).peek(arg_0 -> SynonymSpeciesOccurrencesConsumer.lambda$process$11((Multimap)synTaxPairs, synonym, arg_0)).forEach(taxId -> this.occurrences.add((Triple<String, String, OccurrenceScope>)new ImmutableTriple((Object)synonym, taxId, (Object)OccurrenceScope.SENTENCE)));
                }
                for (String taxId2 : meshTaxIds) {
                    this.occurrences.add((Triple<String, String, OccurrenceScope>)new ImmutableTriple((Object)synonym, (Object)taxId2, (Object)OccurrenceScope.MESH));
                }
            }
            for (String synonym : synonyms) {
                for (String taxId3 : docTaxIds) {
                    if (synTaxPairs.get((Object)synonym).contains(taxId3)) continue;
                    this.occurrences.add((Triple<String, String, OccurrenceScope>)new ImmutableTriple((Object)synonym, (Object)taxId3, (Object)OccurrenceScope.DOCUMENT));
                }
            }
        }
        ++this.processed;
        if (this.processed % 500L == 0L) {
            try {
                log.debug("Triggering batchProcessComplete after 500 processed documents.");
                this.batchProcessComplete();
            }
            catch (AnalysisEngineProcessException e) {
                log.error("Exception while executing batchProcessComplete", (Throwable)e);
                throw e;
            }
        }
    }

    private String getSynonym(EntityMention gene) {
        try {
            return this.normalizer.normalize(gene.getCoveredText());
        }
        catch (StringIndexOutOfBoundsException e) {
            try {
                log.error("Gene mention in document {} has invalid offsets: {}-{} (document text length: {})", new Object[]{JCoReTools.getDocId((JCas)gene.getCAS().getJCas()), gene.getBegin(), gene.getEnd(), gene.getCAS().getDocumentText().length()});
            }
            catch (CASException casException) {
                log.error("Gene mention has invalid offsets. Cannot output more details due to CASException when trying to obtain more information.");
            }
            throw e;
        }
    }

    private Set<String> getMeshTaxIds(JCas jCas) {
        HashSet<String> meshTaxIds = new HashSet<String>();
        for (Annotation annotation : jCas.getAnnotationIndex(MeshHeading.type)) {
            MeshHeading heading = (MeshHeading)annotation;
            String desc = heading.getDescriptorName();
            Collection taxIds = desc2TaxId.get((Object)desc);
            if (taxIds == null) continue;
            for (String taxId : taxIds) {
                meshTaxIds.add(taxId);
                String referenceId = referenceSpeciesTaxIds.get(taxId);
                if (referenceId == null) continue;
                meshTaxIds.add(referenceId);
            }
        }
        return meshTaxIds;
    }

    private Set<String> getAllTaxIdsInDocument(JCas jCas) {
        HashSet<String> taxIdMentions = new HashSet<String>();
        for (Annotation annotation : jCas.getAnnotationIndex(Organism.type)) {
            Organism organism = (Organism)annotation;
            taxIdMentions.add(organism.getResourceEntryList(0).getEntryId());
            String referenceId = referenceSpeciesTaxIds.get(organism.getResourceEntryList(0).getEntryId());
            if (referenceId == null) continue;
            taxIdMentions.add(referenceId);
        }
        return taxIdMentions;
    }

    private void sendOccurrencesToDatabase() throws SQLException {
        log.debug("Sending {} gene synonym species occurrences to the database", (Object)this.occurrences.size());
        try (CoStoSysConnection coStoSysConnection = this.dbc.obtainOrReserveConnection();){
            boolean wasAutoComit = coStoSysConnection.getAutoCommit();
            coStoSysConnection.setAutoCommit(false);
            PreparedStatement ps = coStoSysConnection.prepareStatement("INSERT INTO " + this.tableName + "(synonym,tax_id,scope,count) values(?,?,?,1) ON CONFLICT ON CONSTRAINT " + this.tableName + "_pkey DO UPDATE SET count = " + this.tableName + ".count + 1");
            for (Triple<String, String, OccurrenceScope> occurrence : this.occurrences) {
                ps.setString(1, (String)occurrence.getLeft());
                ps.setInt(2, Integer.parseInt((String)occurrence.getMiddle()));
                ps.setString(3, ((OccurrenceScope)((Object)occurrence.getRight())).name());
                ps.addBatch();
            }
            ps.executeBatch();
            coStoSysConnection.commit();
            coStoSysConnection.setAutoCommit(wasAutoComit);
            this.occurrences.clear();
        }
        log.debug("Finished sending gene synonym species occurrences to the database.");
    }

    public void batchProcessComplete() throws AnalysisEngineProcessException {
        log.debug("Batch processing complete.");
        try {
            this.sendOccurrencesToDatabase();
        }
        catch (SQLException e) {
            log.error("Could not send the collected gene synonym species occurrences to the database", (Throwable)e);
            throw new AnalysisEngineProcessException((Throwable)e);
        }
    }

    public void collectionProcessComplete() throws AnalysisEngineProcessException {
        log.debug("Collection processing complete.");
        try {
            this.sendOccurrencesToDatabase();
        }
        catch (SQLException e) {
            log.error("Could not send the collected gene synonym species occurrences to the database", (Throwable)e);
            throw new AnalysisEngineProcessException((Throwable)e);
        }
    }

    private static /* synthetic */ void lambda$process$11(Multimap synTaxPairs, String synonym, String taxId) {
        synTaxPairs.put((Object)synonym, (Object)taxId);
    }

    private static /* synthetic */ void lambda$process$7(Multimap synTaxPairs, String synonym, String taxId) {
        synTaxPairs.put((Object)synonym, (Object)taxId);
    }

    public static enum OccurrenceScope {
        SENTENCE,
        NP,
        MESH,
        DOCUMENT;

    }
}

