/*
 * Decompiled with CFR 0.152.
 */
package de.julielab.genemapper.evaluation.tools;

import com.google.common.collect.Multimap;
import de.julielab.evaluation.entities.EntityEvaluator;
import de.julielab.gene.candidateretrieval.CandidateRetrieval;
import de.julielab.geneexpbase.configuration.Parameters;
import de.julielab.geneexpbase.data.CorpusReader;
import de.julielab.geneexpbase.genemodel.Acronym;
import de.julielab.geneexpbase.genemodel.GeneDocument;
import de.julielab.geneexpbase.genemodel.GeneMention;
import de.julielab.geneexpbase.genemodel.MeshHeading;
import de.julielab.geneexpbase.genemodel.PosTag;
import de.julielab.geneexpbase.genemodel.SpeciesCandidates;
import de.julielab.geneexpbase.genemodel.SpeciesMention;
import de.julielab.genemapper.GeneMapper;
import de.julielab.java.utilities.FileUtilities;
import de.julielab.java.utilities.spanutils.OffsetMap;
import de.julielab.java.utilities.spanutils.OffsetSet;
import de.julielab.speciesassignment.GNATSpeciesAssigner;
import java.io.BufferedWriter;
import java.io.File;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import org.apache.commons.lang3.Range;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class GenelistTaxonomyPredictor {
    private static final Logger log = LoggerFactory.getLogger(GenelistTaxonomyPredictor.class);

    public static void main(String[] args) throws Exception {
        String dataType = "bc2train";
        File predictedTaxList = new File(dataType + ".taxlist");
        String propertiesFile = "data/eval_data/gnormplus_data/genemapper.properties";
        GeneMapper mapper = null;
        String basePath = "data/eval_data/gnormplus_data/" + dataType;
        String genesPath = basePath + "/annotated/goldgenes.tsv.gz";
        String sentencesPath = basePath + "/annotated/annotations.tsv.gz";
        String chunksPath = basePath + "/annotated/annotations.tsv.gz";
        String speciesPath = basePath + "/annotated/annotations.tsv.gz";
        String acronymsPath = basePath + "/annotated/acronyms.tsv.gz";
        String meshPath = "data/eval_data/bc2_data/" + dataType.substring(3) + "/mesh.tsv.gz";
        String docTextPath = basePath + "/annotated/text";
        Multimap<String, GeneMention> goldData = CorpusReader.readMentionsWithOffsets(genesPath);
        Iterator<Map.Entry<String, GeneMention>> goldit = goldData.entries().iterator();
        while (goldit.hasNext()) {
            Map.Entry<String, GeneMention> e = goldit.next();
            if (e.getValue().getSpecificType() == GeneMention.SpecificType.GENE) continue;
            goldit.remove();
        }
        Multimap<String, Acronym> acronyms = CorpusReader.readAcronymAnnotations(acronymsPath);
        Map<String, String> documentContexts = CorpusReader.readGeneContexts(docTextPath);
        Multimap<String, Range<Integer>> sentences = CorpusReader.readMixedFileForSentenceOffsets(sentencesPath);
        Map<String, OffsetMap<SpeciesMention>> species = CorpusReader.readMixedFileForTextSpecies(speciesPath);
        Map<String, OffsetMap<String>> chunks = CorpusReader.readMixedFileForChunkOffsets(chunksPath);
        Multimap<String, PosTag> posTags = CorpusReader.readMixedFileForPosTags(chunksPath);
        Multimap<String, MeshHeading> meshHeadings = CorpusReader.readMeshHeadings(meshPath);
        Set<String> docIds = documentContexts.keySet();
        GNATSpeciesAssigner speciesAssigner = new GNATSpeciesAssigner();
        try (BufferedWriter bw = FileUtilities.getWriterToFile(predictedTaxList);){
            for (String docId : docIds) {
                GeneDocument document = new GeneDocument(docId);
                document.setTermNormalizer(mapper.getMappingCore().getTermNormalizer());
                document.setAcronyms(new HashSet<Acronym>(acronyms.get(docId)));
                document.setDocumentText(documentContexts.get(docId));
                document.setDocumentTitle(documentContexts.get(docId).split("\\n")[0]);
                document.setChunks(chunks.get(docId));
                document.setPosTags(posTags.get(docId));
                document.setSpecies(new SpeciesCandidates(0, document.getDocumentTitle().length() - 1, Collections.emptySet(), species.get(docId)));
                document.setSentences(new OffsetSet(sentences.get(docId)));
                document.setMeshHeadings(meshHeadings.get(docId));
                document.setGenes(new HashSet<GeneMention>(goldData.get(docId)));
                document.getAllGenes().forEach(gm -> gm.setDocumentContext(document.getDocumentText()));
                document.selectAllGenes();
                document.getGenes().forEach(gm -> speciesAssigner.setSpeciesHints((GeneMention)gm, new Parameters()));
                document.getAllGenes().forEach(gm -> System.out.println(gm.getDocId() + ", " + gm.getText() + ": " + gm.getTaxonomyOccurrences()));
                speciesAssigner.assign(document, Parameters.of(new Object[0]));
                for (GeneMention gm2 : document.getAllGenes()) {
                    ArrayList<String> record = new ArrayList<String>();
                    record.add(docId);
                    record.add(gm2.getTaxonomyId());
                    record.add(String.valueOf(gm2.getBegin()));
                    record.add(String.valueOf(gm2.getEnd()));
                    record.add(gm2.getText());
                    bw.write(record.stream().collect(Collectors.joining("\t")));
                    bw.newLine();
                }
            }
        }
        EntityEvaluator.main(new String[]{"-g", "data/eval_data/gnormplus_data/bc2train/bc2train.taxlist", "-p", predictedTaxList.getAbsolutePath()});
    }

    private static String[] replaceGeneByTaxId(String[] record, GeneMapper mapper) {
        String geneId;
        CandidateRetrieval candidateRetrieval = mapper.getMappingCore().getCandidateRetrieval();
        String taxId = candidateRetrieval.mapGeneIdToTaxId(geneId = record[1]);
        if (taxId == null || taxId.trim().isEmpty()) {
            throw new IllegalStateException("Gene ID " + geneId + " does not have a taxonomy ID in the mention index.");
        }
        record[1] = taxId;
        return record;
    }
}

