/*
 * Decompiled with CFR 0.152.
 */
package de.julielab.speciesassignment.services;

import com.google.common.collect.Multimap;
import de.julielab.geneexpbase.candidateretrieval.GeneCandidateRetrievalException;
import de.julielab.geneexpbase.genemodel.GeneDocument;
import de.julielab.geneexpbase.genemodel.GeneMention;
import de.julielab.geneexpbase.genemodel.GeneSpeciesOccurrence;
import de.julielab.geneexpbase.genemodel.SpeciesMention;
import de.julielab.java.utilities.FileUtilities;
import de.julielab.java.utilities.IOStreamUtilities;
import de.julielab.speciesassignment.services.SpeciesHintSetter;
import de.julielab.speciesassignment.spi.SpeciesAssignmentFilter;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.Map;
import java.util.NavigableMap;
import java.util.Set;
import java.util.stream.Collectors;
import org.apache.commons.lang3.Range;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class SpeciesAssignmentFilterImpl
implements SpeciesAssignmentFilter {
    private static final Logger log = LoggerFactory.getLogger(SpeciesAssignmentFilterImpl.class);
    private Set<String> taxIdsInNCBIGene;

    public SpeciesAssignmentFilterImpl() {
        this.readTaxIdsInNCBIGene();
    }

    private void filterSpeciesMentions(Iterator<Map.Entry<Range<Integer>, SpeciesMention>> textSpeciesIt, GeneDocument document) {
        if (document.getChunks() == null || document.getChunks().isEmpty()) {
            log.warn("To filter organism mentions that should be removed for gene species assignments, chunking is required. Thus, the chunks must be set before the species mentions. At this moment, there are no chunks set and species filtering might be ineffective. Document ID: {}", (Object)document.getId());
        }
        while (textSpeciesIt.hasNext()) {
            Map.Entry<Range<Integer>, SpeciesMention> e = textSpeciesIt.next();
            NavigableMap<Range<Integer>, String> overlapping = document.getChunks().getOverlapping(e.getKey());
            StringBuilder sb = new StringBuilder();
            for (Range chunk : overlapping.keySet()) {
                sb.append(document.getCoveredText(chunk)).append(" ");
            }
            String chunkText = sb.toString().toLowerCase();
            if (!chunkText.matches(".*(one|two|bi|three|tri)(-|\\s)hybrid.*") && !chunkText.contains("interaction trap") && !chunkText.endsWith("region") && !chunkText.endsWith("sequence")) continue;
            textSpeciesIt.remove();
        }
    }

    @Override
    public void filterSpeciesMentions(GeneDocument document) {
        this.filterSpeciesMentions(document.getSpecies().getTitleCandidates().entrySet().iterator(), document);
        this.filterSpeciesMentions(document.getSpecies().getTextCandidates().entrySet().iterator(), document);
    }

    @Override
    public void filterAssignments(GeneDocument doc) {
        try {
            ArrayList<String> toRemove = new ArrayList<String>();
            for (GeneMention gm : () -> doc.getGenes().iterator()) {
                Multimap<String, GeneSpeciesOccurrence> taxonomyCandidates = gm.getTaxonomyOccurrences();
                for (String taxId : taxonomyCandidates.keySet()) {
                    if (this.taxIdsInNCBIGene.contains(taxId)) continue;
                    toRemove.add(taxId);
                }
                for (String taxId : toRemove) {
                    taxonomyCandidates.removeAll(taxId);
                }
                if (taxonomyCandidates.isEmpty() && !StringUtils.isBlank(SpeciesHintSetter.getDefaultSpecies())) {
                    taxonomyCandidates.put(SpeciesHintSetter.getDefaultSpecies(), GeneSpeciesOccurrence.DEFAULT);
                }
                toRemove.clear();
            }
            doc.addState(GeneDocument.State.SPECIES_CANDIDATES_FILTERED);
        }
        catch (GeneCandidateRetrievalException e) {
            e.printStackTrace();
        }
    }

    private void readTaxIdsInNCBIGene() {
        try {
            InputStream is = FileUtilities.findResource("/organisms_in_gene_info.txt.gz");
            if (is == null) {
                is = FileUtilities.findResource("/organisms_in_gene_info.txt");
            }
            this.taxIdsInNCBIGene = IOStreamUtilities.getLinesFromInputStream(is).stream().map(String::trim).collect(Collectors.toSet());
        }
        catch (IOException e) {
            e.printStackTrace();
        }
    }
}

