/*
 * Decompiled with CFR 0.152.
 */
package de.julielab.genemapper.mappingcores;

import com.fulmicoton.multiregexp.MultiPatternSearcher;
import com.lahodiuk.ahocorasick.AhoCorasickOptimized;
import de.julielab.gene.candidateretrieval.CandidateRetrieval;
import de.julielab.gene.candidateretrieval.GeneRecordHit;
import de.julielab.gene.candidateretrieval.LuceneCandidateRetrieval;
import de.julielab.geneexpbase.CandidateFilter;
import de.julielab.geneexpbase.candidateretrieval.SynHit;
import de.julielab.geneexpbase.genemodel.GeneDocument;
import de.julielab.geneexpbase.genemodel.GeneMention;
import de.julielab.geneexpbase.genemodel.GeneName;
import de.julielab.genemapper.utils.SpeciesMultiPatternSearcher;
import de.julielab.java.utilities.spanutils.OffsetMap;
import de.julielab.java.utilities.spanutils.Span;
import de.julielab.speciesassignment.services.SpeciesHintSetter;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Collection;
import java.util.EnumSet;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.NavigableMap;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Stream;
import org.apache.commons.lang3.Range;

public class DypsisGeneSelector {
    private static final Set<String> POS_VERBS = Set.of("V", "VB", "VBZ", "VBD", "VBP", "VHD");

    public static void selectLongerGazetteerMatchesOverFlair(GeneDocument document) {
        OffsetMap flairGms = new OffsetMap();
        document.getAllGenes().stream().filter(gm -> !gm.getTagger().name().toLowerCase().contains("gazetteer")).forEach(gm -> {
            ArrayList<GeneMention> overlappingMentions = (ArrayList<GeneMention>)flairGms.get((Object)gm.getOffsets());
            if (overlappingMentions == null) {
                overlappingMentions = new ArrayList<GeneMention>();
                flairGms.put((Object)gm.getOffsets(), overlappingMentions);
            }
            overlappingMentions.add((GeneMention)gm);
        });
        Iterator gazetteerGenesIt = document.getAllGenes().stream().filter(gm -> gm.getTagger() == GeneMention.GeneTagger.GAZETTEER).iterator();
        ArrayList<GeneMention> flairGenesToRemove = new ArrayList<GeneMention>();
        ArrayList<GeneMention> toremove = new ArrayList<GeneMention>();
        while (gazetteerGenesIt.hasNext()) {
            GeneMention gazGm = (GeneMention)gazetteerGenesIt.next();
            NavigableMap flairGenesOverlappingGazGm = flairGms.getOverlapping((Span)gazGm);
            if (!flairGenesOverlappingGazGm.isEmpty()) {
                Iterator flairGmIt = flairGenesOverlappingGazGm.values().stream().flatMap(Collection::stream).iterator();
                while (flairGmIt.hasNext()) {
                    GeneMention flairGm = (GeneMention)flairGmIt.next();
                    if (gazGm.getOffsets().containsRange(flairGm.getOffsets()) && !gazGm.getOffsets().equals((Object)flairGm.getOffsets())) {
                        flairGenesToRemove.add(flairGm);
                        document.selectGene(gazGm);
                        continue;
                    }
                    toremove.add(gazGm);
                }
                continue;
            }
            toremove.add(gazGm);
        }
        flairGenesToRemove.forEach(document.getAllGenes()::remove);
        toremove.forEach(document.getAllGenes()::remove);
    }

    public static void splitDashGenes(GeneDocument document, CandidateRetrieval candidateRetrieval) {
        OffsetMap gazetteerMentions = new OffsetMap();
        document.getAllGenes().stream().filter(gm -> gm.getTagger() == GeneMention.GeneTagger.GAZETTEER).forEach(arg_0 -> ((OffsetMap)gazetteerMentions).put(arg_0));
        Iterator flairGenes = document.getGenes().filter(gm -> gm.getTagger() != GeneMention.GeneTagger.GAZETTEER).filter(gm -> gm.getText().contains("/")).iterator();
        Stream.Builder<GeneMention> toSplit = Stream.builder();
        while (flairGenes.hasNext()) {
            GeneMention flairGene = (GeneMention)flairGenes.next();
            Collection overlappingGazetteerMentions = gazetteerMentions.getOverlapping((Span)flairGene).values();
            boolean geneNameContainsSlash = overlappingGazetteerMentions.stream().anyMatch(gm -> gm.getText().contains("/"));
            if (geneNameContainsSlash || !overlappingGazetteerMentions.isEmpty()) continue;
            List candidates = candidateRetrieval.getCandidates(flairGene, LuceneCandidateRetrieval.GENE_RECORDS_SYNONYMS_APPROX);
            boolean foundCandidate = false;
            for (SynHit sh : candidates) {
                String flairText;
                String matchedSynonym = sh.getSynonym();
                if (!matchedSynonym.contains(flairText = flairGene.getNormalizedText()) && !flairText.contains(matchedSynonym)) continue;
                foundCandidate = true;
                break;
            }
            if (foundCandidate) continue;
            toSplit.accept(flairGene);
        }
        Iterator toSplitIt = toSplit.build().iterator();
        Matcher m = null;
        if (toSplitIt.hasNext()) {
            m = Pattern.compile("/").matcher("");
        }
        while (toSplitIt.hasNext()) {
            GeneMention splitGene;
            GeneMention gmToSplit = (GeneMention)toSplitIt.next();
            m.reset(gmToSplit.getText());
            int lastEnd = 0;
            ArrayList<GeneMention> newGenes = new ArrayList<GeneMention>();
            while (m.find()) {
                splitGene = new GeneMention(gmToSplit);
                splitGene.setOffsets(Range.between((Comparable)Integer.valueOf(gmToSplit.getBegin() + lastEnd), (Comparable)Integer.valueOf(gmToSplit.getBegin() + m.start())));
                splitGene.setGeneName(new GeneName(gmToSplit.getText().substring(lastEnd, m.start()), gmToSplit.getNormalizer()));
                splitGene.setText(splitGene.getGeneName().getText());
                splitGene.setCompositeResolver(DypsisGeneSelector.class.getCanonicalName());
                newGenes.add(splitGene);
                lastEnd = m.end();
            }
            splitGene = new GeneMention(gmToSplit);
            splitGene.setOffsets(Range.between((Comparable)Integer.valueOf(gmToSplit.getBegin() + lastEnd), (Comparable)Integer.valueOf(gmToSplit.getEnd())));
            splitGene.setGeneName(new GeneName(gmToSplit.getText().substring(lastEnd), gmToSplit.getNormalizer()));
            splitGene.setText(splitGene.getGeneName().getText());
            splitGene.setCompositeResolver(DypsisGeneSelector.class.getCanonicalName());
            newGenes.add(splitGene);
            int matchFound = 0;
            for (GeneMention newGene : newGenes) {
                SynHit bestCandidate;
                List candidates;
                if (CandidateFilter.isNumberGreekOrLatin((String)newGene.getText()) || (candidates = candidateRetrieval.getCandidates(newGene, LuceneCandidateRetrieval.GENE_RECORDS_SYNONYMS_APPROX)).isEmpty() || !(bestCandidate = (SynHit)candidates.get(0)).isExactMatch() && !(((GeneRecordHit)bestCandidate).getSynonymSimilarityScore() > 0.8)) continue;
                ++matchFound;
            }
            if (newGenes.size() != matchFound) continue;
            newGenes.forEach(arg_0 -> ((GeneDocument)document).selectGene(arg_0));
            document.removeGene(gmToSplit);
        }
    }

    public static void mergeContinuousGeneAnnotations(GeneDocument document) {
        ArrayList toRemove = new ArrayList();
        ArrayList<GeneMention> newGenes = new ArrayList<GeneMention>();
        ArrayDeque<GeneMention> currentSpan = new ArrayDeque<GeneMention>();
        for (GeneMention gm : document.getAllGenes()) {
            if (!currentSpan.isEmpty() && ((GeneMention)currentSpan.peekLast()).getEnd() < gm.getBegin() - 1) {
                if (currentSpan.size() > 1) {
                    int begin = ((GeneMention)currentSpan.getFirst()).getBegin();
                    int end = ((GeneMention)currentSpan.getLast()).getEnd();
                    GeneMention newMention = new GeneMention((GeneMention)currentSpan.getFirst());
                    newMention.setText(document.getCoveredText(begin, end));
                    newMention.setOffsets(Range.between((Comparable)Integer.valueOf(begin), (Comparable)Integer.valueOf(end)));
                    currentSpan.stream().map(GeneMention::getTagger).filter(t -> t != GeneMention.GeneTagger.GAZETTEER).findAny().ifPresent(arg_0 -> ((GeneMention)newMention).setTagger(arg_0));
                    newGenes.add(newMention);
                    toRemove.addAll(currentSpan);
                }
                currentSpan.clear();
            }
            if (!currentSpan.isEmpty() && ((GeneMention)currentSpan.peekLast()).getEnd() != gm.getBegin() - 1) continue;
            currentSpan.add(gm);
        }
        if (currentSpan.size() > 1) {
            int begin = ((GeneMention)currentSpan.getFirst()).getBegin();
            int end = ((GeneMention)currentSpan.getLast()).getEnd();
            GeneMention newMention = new GeneMention((GeneMention)currentSpan.getFirst());
            newMention.setText(document.getCoveredText(begin, end));
            newMention.setOffsets(Range.between((Comparable)Integer.valueOf(begin), (Comparable)Integer.valueOf(end)));
            currentSpan.stream().map(GeneMention::getTagger).filter(t -> t != GeneMention.GeneTagger.GAZETTEER).findAny().ifPresent(arg_0 -> ((GeneMention)newMention).setTagger(arg_0));
            newGenes.add(newMention);
            toRemove.addAll(currentSpan);
        }
        toRemove.forEach(document.getAllGenes()::remove);
        newGenes.forEach(arg_0 -> ((GeneDocument)document).addGene(arg_0));
    }

    public static void clipAsterisksFromGeneNames(GeneDocument document) {
        for (GeneMention gm : document.getAllGenes()) {
            String text = gm.getText();
            int index = text.indexOf(42);
            if (index <= 1) continue;
            String newText = text.substring(0, index);
            gm.setText(newText);
            gm.setOffsets(Range.between((Comparable)Integer.valueOf(gm.getBegin()), (Comparable)Integer.valueOf(gm.getBegin() + newText.length())));
        }
    }

    public static void addSynonyms(GeneDocument document) {
        for (GeneMention gm : document.getGenesIterable()) {
            if (gm.getText().contains("embryonal")) {
                gm.getGeneName().addAlternative(new GeneName(gm.getText().replace("embryonal", "embryonic"), gm.getNormalizer()));
            }
            if (!gm.getText().contains("embryonic")) continue;
            gm.getGeneName().addAlternative(new GeneName(gm.getText().replace("embryonic", "embryonal"), gm.getNormalizer()));
        }
    }

    public static void removeGenesWithVerbs(GeneDocument document) {
        ArrayList<GeneMention> toRemove = null;
        for (GeneMention gm : document.getGenesIterable()) {
            Collection overlappingPosTags = document.getOverlappingPosTags(gm.getOffsets());
            if (overlappingPosTags.size() <= 1 || !overlappingPosTags.stream().anyMatch(pos -> POS_VERBS.contains(pos.getTag()))) continue;
            if (toRemove == null) {
                toRemove = new ArrayList<GeneMention>();
            }
            toRemove.add(gm);
        }
        if (toRemove != null) {
            for (GeneMention gm : toRemove) {
                document.removeGene(gm);
                AhoCorasickOptimized geneNameAC = document.getGeneNameDictionary();
                LinkedHashSet genesInRemoved = new LinkedHashSet();
                geneNameAC.match(gm.getText(), (start, end, match) -> genesInRemoved.add(Range.between((Comparable)Integer.valueOf(gm.getBegin() + start), (Comparable)Integer.valueOf(gm.getBegin() + end + 1))));
                for (Range range : genesInRemoved) {
                    GeneMention subMention = new GeneMention(gm);
                    subMention.setOffsets(range);
                    subMention.setText(document.getCoveredText((Span)subMention));
                    document.selectGene(subMention);
                }
            }
        }
    }

    public static void removeSpeciesPrefixesFromGenes(GeneDocument document) {
        document.expectState(EnumSet.of(GeneDocument.State.SPECIES_ASSIGNED_TO_GENES));
        for (GeneMention gm : document.getGenesIterable()) {
            String speciesPrefix = SpeciesHintSetter.getSpeciesPrefix((GeneMention)gm);
            Collection taxIdsForPrefix = SpeciesHintSetter.getTaxIdsForPrefix((String)speciesPrefix);
            if (!taxIdsForPrefix.stream().anyMatch(tax -> gm.getTaxonomyIds().contains(tax))) continue;
            GeneName newGeneName = new GeneName(gm.getGeneName());
            newGeneName.setText(gm.getText().substring(speciesPrefix.length()));
            gm.setGeneName(newGeneName);
        }
    }

    public static void removeSpeciesMention(GeneDocument document) {
        DypsisGeneSelector.removeSpeciesMention(SpeciesMultiPatternSearcher.searcher, document);
    }

    public static void removeSpeciesMention(MultiPatternSearcher searcher, GeneDocument document) {
        document.getAllGenes().forEach(gm -> {
            int start;
            String text = gm.getText();
            MultiPatternSearcher.Cursor cursor = searcher.search((CharSequence)text);
            if (cursor.next() && (start = cursor.start()) == 0) {
                int end = cursor.end();
                gm.setText(text.substring(end));
                Range offsets = gm.getOffsets();
                int newBegin = (Integer)offsets.getMinimum() + end;
                gm.setOffsets(Range.between((Comparable)Integer.valueOf(newBegin), (Comparable)((Integer)offsets.getMaximum())));
            }
        });
    }
}

