/*
 * Decompiled with CFR 0.152.
 */
package de.julielab.genemapper.uima;

import com.lahodiuk.ahocorasick.AhoCorasickOptimized;
import de.julielab.jcore.types.Abbreviation;
import de.julielab.jcore.types.AbbreviationLongform;
import de.julielab.jcore.types.EntityMention;
import de.julielab.jcore.types.Gene;
import de.julielab.jcore.types.Protein;
import de.julielab.jcore.utility.index.Comparators;
import de.julielab.jcore.utility.index.IndexTermGenerator;
import de.julielab.jcore.utility.index.JCoReTreeMapAnnotationIndex;
import de.julielab.jcore.utility.index.TermGenerators;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Optional;
import java.util.stream.Collectors;
import org.apache.commons.lang3.tuple.ImmutablePair;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.FSIterator;
import org.apache.uima.cas.Type;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.descriptor.ResourceMetaData;
import org.apache.uima.fit.descriptor.TypeCapability;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.cas.TOP;
import org.apache.uima.jcas.tcas.Annotation;
import org.apache.uima.resource.ResourceInitializationException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

@ResourceMetaData(name="JCoRe Protein Consistency Tagger", description="Tags untagged protein names that have been tagged elsewhere in the document.")
@TypeCapability(inputs={"de.julielab.jcore.types.Protein"})
public class ProteinConsistencyTagger
extends JCasAnnotator_ImplBase {
    private static final Logger log = LoggerFactory.getLogger(ProteinConsistencyTagger.class);
    private final TermGenerators.LongOffsetIndexTermGenerator longOffsetIndexTermGenerator = TermGenerators.longOffsetTermGenerator();
    public static final String PARAMETER_PROTEIN_TYPE = "ProteinType";
    public static final String PARAM_OFFSET_EXPANSION_TYPE = "OffsetExpansionType";
    @ConfigurationParameter(name="ProteinType", mandatory=false, defaultValue={"de.julielab.jcore.types.Protein"}, description="The UIMA type that is used to represent the protein annotations for which consistency tagging should be performed. Defaults to de.julielab.jcore.types.Protein.")
    private String proteinTypeName;
    @ConfigurationParameter(name="OffsetExpansionType", mandatory=false, defaultValue={"de.julielab.jcore.type.Gene"}, description="The UIMA type that is used to widen the offsets of annotations of the type given with the 'ProteinType' parameter. Annotations of the expansion type that completely overlap annotations of the protein type will cause the offsets of the protein annotations to be set to the expansion annotation offset values. Defaults to de.julielab.jcore.types.Gene which could be generated by the JCoRe Lingpipe Gazetteer component, for example.")
    private String offsetExpansionTypeName;
    private Type proteinType;
    private Type offsetExpansionType;

    public void initialize(UimaContext aContext) throws ResourceInitializationException {
        super.initialize(aContext);
        this.proteinTypeName = (String)Optional.ofNullable(aContext.getConfigParameterValue(PARAMETER_PROTEIN_TYPE)).orElse("de.julielab.jcore.types.Protein");
        this.offsetExpansionTypeName = (String)Optional.ofNullable(aContext.getConfigParameterValue(PARAM_OFFSET_EXPANSION_TYPE)).orElse("de.julielab.jcore.types.Gene");
    }

    public void process(JCas jCas) throws AnalysisEngineProcessException {
        if (this.proteinType == null || this.offsetExpansionType == null) {
            this.proteinType = jCas.getTypeSystem().getType(this.proteinTypeName);
            if (this.proteinType == null) {
                log.error("Could not find the annotation type {} for which consistency tagging should be applied in the type system.", (Object)this.proteinTypeName);
                throw new AnalysisEngineProcessException("unsupported_cas_type", new Object[]{this.proteinTypeName});
            }
            this.offsetExpansionType = jCas.getTypeSystem().getType(this.offsetExpansionTypeName);
            if (this.offsetExpansionType == null) {
                log.error("Could not find the annotation type {} that should be used to expand offsets of the {} annotation type.", (Object)this.offsetExpansionTypeName, (Object)this.proteinTypeName);
                throw new AnalysisEngineProcessException("unsupported_cas_type", new Object[]{this.offsetExpansionTypeName});
            }
        }
        JCoReTreeMapAnnotationIndex gazetteerGenes = new JCoReTreeMapAnnotationIndex(Comparators.longOverlapComparator(), (IndexTermGenerator)TermGenerators.longOffsetTermGenerator(), (IndexTermGenerator)TermGenerators.longOffsetTermGenerator(), jCas, this.offsetExpansionType);
        JCoReTreeMapAnnotationIndex abbreviations = new JCoReTreeMapAnnotationIndex(Comparators.longOverlapComparator(), (IndexTermGenerator)TermGenerators.longOffsetTermGenerator(), (IndexTermGenerator)TermGenerators.longOffsetTermGenerator(), jCas, Abbreviation.type);
        JCoReTreeMapAnnotationIndex abbLongforms = new JCoReTreeMapAnnotationIndex(Comparators.longOverlapComparator(), (IndexTermGenerator)TermGenerators.longOffsetTermGenerator(), (IndexTermGenerator)TermGenerators.longOffsetTermGenerator(), jCas, AbbreviationLongform.type);
        this.augmentProteinOffsetsWithDictionaryMatches(jCas, (JCoReTreeMapAnnotationIndex<Long, Gene>)gazetteerGenes);
        JCoReTreeMapAnnotationIndex existingProteins = new JCoReTreeMapAnnotationIndex(Comparators.longOverlapComparator(), (IndexTermGenerator)TermGenerators.longOffsetTermGenerator(), (IndexTermGenerator)TermGenerators.longOffsetTermGenerator(), jCas, Protein.type);
        this.makeAbbreviationsConsistent(jCas, (JCoReTreeMapAnnotationIndex<Long, EntityMention>)existingProteins, (JCoReTreeMapAnnotationIndex<Long, Abbreviation>)abbreviations, (JCoReTreeMapAnnotationIndex<Long, AbbreviationLongform>)abbLongforms);
        existingProteins = new JCoReTreeMapAnnotationIndex(Comparators.longOverlapComparator(), (IndexTermGenerator)TermGenerators.longOffsetTermGenerator(), (IndexTermGenerator)TermGenerators.longOffsetTermGenerator(), jCas, Protein.type);
        this.tagSameSurfaceForms(jCas, (JCoReTreeMapAnnotationIndex<Long, EntityMention>)existingProteins, (JCoReTreeMapAnnotationIndex<Long, AbbreviationLongform>)abbLongforms);
    }

    private void tagSameSurfaceForms(JCas jCas, JCoReTreeMapAnnotationIndex<Long, EntityMention> existingProteins, JCoReTreeMapAnnotationIndex<Long, AbbreviationLongform> abbLongforms) {
        FSIterator iterator = jCas.getAnnotationIndex(Protein.type).iterator();
        HashMap taggedProteinNames = new HashMap();
        while (iterator.hasNext()) {
            Protein p = (Protein)iterator.next();
            HashSet<String> variants = new HashSet<String>();
            variants.add(p.getCoveredText());
            variants.forEach(variant -> taggedProteinNames.put(variant, new ImmutablePair((Object)p.getSpecificType(), (Object)p.getConfidence())));
        }
        AhoCorasickOptimized ac = new AhoCorasickOptimized((Collection)taggedProteinNames.keySet().stream().collect(Collectors.toList()));
        ac.match(jCas.getDocumentText(), (start, end, matched) -> {
            Optional anyOverlappingProtein = existingProteins.searchFuzzy((Comparable)this.longOffsetIndexTermGenerator.forOffsets(start, end)).findAny();
            if (!anyOverlappingProtein.isPresent() || ((EntityMention)anyOverlappingProtein.get()).getEnd() - ((EntityMention)anyOverlappingProtein.get()).getBegin() < end - start) {
                if (anyOverlappingProtein.isPresent()) {
                    ((EntityMention)anyOverlappingProtein.get()).removeFromIndexes();
                }
                Protein protein = new Protein(jCas, start, end + 1);
                protein.setComponentId(ProteinConsistencyTagger.class.getSimpleName());
                protein.setSpecificType((String)((ImmutablePair)taggedProteinNames.get(matched)).getLeft());
                protein.setConfidence((String)((ImmutablePair)taggedProteinNames.get(matched)).getRight());
                protein.addToIndexes();
                existingProteins.add((Annotation)protein);
            }
        });
    }

    private void makeAbbreviationsConsistent(JCas jCas, JCoReTreeMapAnnotationIndex<Long, EntityMention> existingProteins, JCoReTreeMapAnnotationIndex<Long, Abbreviation> abbreviations, JCoReTreeMapAnnotationIndex<Long, AbbreviationLongform> abbLongforms) {
        ArrayList<Protein> newProteins = new ArrayList<Protein>();
        for (Protein existingProt : jCas.getAnnotationIndex(Protein.type)) {
            Abbreviation abbreviation;
            Optional anyProtOverAbbreviation;
            Optional anyLongform;
            AbbreviationLongform longform;
            Optional anyProtOverLongform;
            Optional anyAbbreviation = abbreviations.searchFuzzy((Annotation)existingProt).findAny();
            if (anyAbbreviation.isPresent() && !(anyProtOverLongform = existingProteins.searchFuzzy((Annotation)(longform = ((Abbreviation)anyAbbreviation.get()).getTextReference())).findAny()).isPresent()) {
                Protein longformProtein = new Protein(jCas, longform.getBegin(), longform.getEnd());
                longformProtein.setComponentId(ProteinConsistencyTagger.class.getSimpleName());
                longformProtein.setSpecificType(existingProt.getSpecificType());
                newProteins.add(longformProtein);
                existingProteins.add((Annotation)longformProtein);
            }
            if (!(anyLongform = abbLongforms.searchFuzzy((Annotation)existingProt).findAny()).isPresent() || (anyProtOverAbbreviation = existingProteins.searchFuzzy((Annotation)(abbreviation = ((AbbreviationLongform)anyLongform.get()).getAbbreviation())).findAny()).isPresent()) continue;
            Protein abbreviationProtein = new Protein(jCas, abbreviation.getBegin(), abbreviation.getEnd());
            abbreviationProtein.setComponentId(ProteinConsistencyTagger.class.getSimpleName());
            abbreviationProtein.setSpecificType(existingProt.getSpecificType());
            newProteins.add(abbreviationProtein);
            existingProteins.add((Annotation)abbreviationProtein);
        }
        newProteins.forEach(TOP::addToIndexes);
        newProteins.forEach(arg_0 -> existingProteins.add(arg_0));
    }

    private void augmentProteinOffsetsWithDictionaryMatches(JCas jCas, JCoReTreeMapAnnotationIndex<Long, Gene> gazetteerGenes) {
        ArrayList<Protein> toRemove = new ArrayList<Protein>();
        ArrayList<Protein> toAdd = new ArrayList<Protein>();
        HashMap<Gene, Protein> alreadyUsedGenes = new HashMap<Gene, Protein>();
        for (Protein protein : jCas.getAnnotationIndex(Protein.type)) {
            Iterator geneIt = gazetteerGenes.searchFuzzy((Annotation)protein).iterator();
            while (geneIt.hasNext()) {
                Protein newProt;
                Gene gazGene = (Gene)geneIt.next();
                int protLength = protein.getEnd() - protein.getBegin();
                int gazGeneLength = gazGene.getEnd() - gazGene.getBegin();
                if (gazGeneLength <= protLength) continue;
                toRemove.add(protein);
                if (!alreadyUsedGenes.containsKey(gazGene)) {
                    newProt = new Protein(jCas, gazGene.getBegin(), gazGene.getEnd());
                    newProt.setSpecificType(protein.getSpecificType());
                    newProt.setComponentId(ProteinConsistencyTagger.class.getSimpleName() + ",LongerOverlappingGazetteerGene");
                    toAdd.add(newProt);
                    alreadyUsedGenes.put(gazGene, protein);
                    continue;
                }
                if (!protein.getSpecificType().equals("protein_familiy_or_group")) continue;
                newProt = (Protein)alreadyUsedGenes.get(gazGene);
                newProt.setSpecificType(protein.getSpecificType());
                newProt.setConfidence(protein.getConfidence());
            }
        }
        toRemove.forEach(TOP::removeFromIndexes);
        toAdd.forEach(TOP::addToIndexes);
    }
}

