/*
 * Decompiled with CFR 0.152.
 */
package de.julielab.genemapper.evaluation.tools;

import com.google.common.collect.Multimap;
import de.julielab.evaluation.entities.EntityEvaluator;
import de.julielab.gene.candidateretrieval.CandidateRetrieval;
import de.julielab.geneexpbase.configuration.Parameters;
import de.julielab.geneexpbase.data.CorpusReader;
import de.julielab.geneexpbase.genemodel.GeneDocument;
import de.julielab.geneexpbase.genemodel.GeneMention;
import de.julielab.geneexpbase.genemodel.SpeciesCandidates;
import de.julielab.genemapper.GeneMapper;
import de.julielab.java.utilities.FileUtilities;
import de.julielab.java.utilities.spanutils.OffsetMap;
import de.julielab.java.utilities.spanutils.OffsetSet;
import de.julielab.speciesassignment.GNATSpeciesAssigner;
import de.julielab.speciesassignment.GeneSpeciesAssigner;
import java.io.BufferedWriter;
import java.io.File;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class GenelistTaxonomyPredictor {
    private static final Logger log = LoggerFactory.getLogger(GenelistTaxonomyPredictor.class);

    public static void main(String[] args) throws Exception {
        String dataType = "bc2train";
        File predictedTaxList = new File(dataType + ".taxlist");
        String propertiesFile = "data/eval_data/gnormplus_data/genemapper.properties";
        GeneMapper mapper = null;
        String basePath = "data/eval_data/gnormplus_data/" + dataType;
        String genesPath = basePath + "/annotated/goldgenes.tsv.gz";
        String sentencesPath = basePath + "/annotated/annotations.tsv.gz";
        String chunksPath = basePath + "/annotated/annotations.tsv.gz";
        String speciesPath = basePath + "/annotated/annotations.tsv.gz";
        String acronymsPath = basePath + "/annotated/acronyms.tsv.gz";
        String meshPath = "data/eval_data/bc2_data/" + dataType.substring(3) + "/mesh.tsv.gz";
        String docTextPath = basePath + "/annotated/text";
        Multimap goldData = CorpusReader.readMentionsWithOffsets((String)genesPath);
        Iterator goldit = goldData.entries().iterator();
        while (goldit.hasNext()) {
            Map.Entry e = (Map.Entry)goldit.next();
            if (((GeneMention)e.getValue()).getSpecificType() == GeneMention.SpecificType.GENE) continue;
            goldit.remove();
        }
        Multimap acronyms = CorpusReader.readAcronymAnnotations((String)acronymsPath);
        Map documentContexts = CorpusReader.readGeneContexts((String)docTextPath);
        Multimap sentences = CorpusReader.readMixedFileForSentenceOffsets((String)sentencesPath);
        Map species = CorpusReader.readMixedFileForTextSpecies((String)speciesPath);
        Map chunks = CorpusReader.readMixedFileForChunkOffsets((String)chunksPath);
        Multimap posTags = CorpusReader.readMixedFileForPosTags((String)chunksPath);
        Multimap meshHeadings = CorpusReader.readMeshHeadings((String)meshPath);
        Set docIds = documentContexts.keySet();
        GNATSpeciesAssigner speciesAssigner = new GNATSpeciesAssigner();
        try (BufferedWriter bw = FileUtilities.getWriterToFile((File)predictedTaxList);){
            for (String docId : docIds) {
                GeneDocument document = new GeneDocument(docId);
                document.setTermNormalizer(mapper.getMappingCore().getTermNormalizer());
                document.setAcronyms(new HashSet(acronyms.get((Object)docId)));
                document.setDocumentText((String)documentContexts.get(docId));
                document.setDocumentTitle(((String)documentContexts.get(docId)).split("\\n")[0]);
                document.setChunks((OffsetMap)chunks.get(docId));
                document.setPosTags(posTags.get((Object)docId));
                document.setSpecies(new SpeciesCandidates(0, document.getDocumentTitle().length() - 1, Collections.emptySet(), (OffsetMap)species.get(docId)));
                document.setSentences(new OffsetSet(sentences.get((Object)docId)));
                document.setMeshHeadings(meshHeadings.get((Object)docId));
                document.setGenes(new HashSet(goldData.get((Object)docId)));
                document.getAllGenes().forEach(gm -> gm.setDocumentContext(document.getDocumentText()));
                document.selectAllGenes();
                document.getGenes().forEach(arg_0 -> GenelistTaxonomyPredictor.lambda$main$1((GeneSpeciesAssigner)speciesAssigner, arg_0));
                document.getAllGenes().forEach(gm -> System.out.println(gm.getDocId() + ", " + gm.getText() + ": " + gm.getTaxonomyOccurrences()));
                speciesAssigner.assign(document, Parameters.of((Object[])new Object[0]));
                for (GeneMention gm2 : document.getAllGenes()) {
                    ArrayList<String> record = new ArrayList<String>();
                    record.add(docId);
                    record.add(gm2.getTaxonomyId());
                    record.add(String.valueOf(gm2.getBegin()));
                    record.add(String.valueOf(gm2.getEnd()));
                    record.add(gm2.getText());
                    bw.write(record.stream().collect(Collectors.joining("\t")));
                    bw.newLine();
                }
            }
        }
        EntityEvaluator.main((String[])new String[]{"-g", "data/eval_data/gnormplus_data/bc2train/bc2train.taxlist", "-p", predictedTaxList.getAbsolutePath()});
    }

    private static String[] replaceGeneByTaxId(String[] record, GeneMapper mapper) {
        String geneId;
        CandidateRetrieval candidateRetrieval = mapper.getMappingCore().getCandidateRetrieval();
        String taxId = candidateRetrieval.mapGeneIdToTaxId(geneId = record[1]);
        if (taxId == null || taxId.trim().isEmpty()) {
            throw new IllegalStateException("Gene ID " + geneId + " does not have a taxonomy ID in the mention index.");
        }
        record[1] = taxId;
        return record;
    }

    private static /* synthetic */ void lambda$main$1(GeneSpeciesAssigner speciesAssigner, GeneMention gm) {
        speciesAssigner.setSpeciesHints(gm, new Parameters());
    }
}

