/*
 * Decompiled with CFR 0.152.
 */
package de.julielab.geneexpbase.data;

import com.google.common.collect.HashMultimap;
import com.google.common.collect.LinkedHashMultimap;
import com.google.common.collect.Multimap;
import de.julielab.geneexpbase.data.GeneInformation;
import de.julielab.geneexpbase.genemodel.Acronym;
import de.julielab.geneexpbase.genemodel.AcronymLongform;
import de.julielab.geneexpbase.genemodel.Apposition;
import de.julielab.geneexpbase.genemodel.CoreferenceExpression;
import de.julielab.geneexpbase.genemodel.CoreferenceSet;
import de.julielab.geneexpbase.genemodel.DictionaryGeneIdRecord;
import de.julielab.geneexpbase.genemodel.GeneMention;
import de.julielab.geneexpbase.genemodel.MeshHeading;
import de.julielab.geneexpbase.genemodel.PosTag;
import de.julielab.geneexpbase.genemodel.SpeciesMention;
import de.julielab.java.utilities.FileUtilities;
import de.julielab.java.utilities.spanutils.OffsetMap;
import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FilenameFilter;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Stream;
import org.apache.commons.io.IOUtils;
import org.apache.commons.io.LineIterator;
import org.apache.commons.lang3.Range;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class CorpusReader {
    private static final Logger log = LoggerFactory.getLogger(CorpusReader.class);

    public static Multimap<String, GeneMention> readGoldIds(String geneList) throws IOException {
        HashMultimap<String, GeneMention> ids = HashMultimap.create();
        LineIterator lineIterator = IOUtils.lineIterator((InputStream)FileUtilities.getInputStreamFromFile(new File(geneList)), "UTF-8");
        while (lineIterator.hasNext()) {
            String line = lineIterator.next();
            String[] split = line.split("\t");
            GeneMention geneMention = new GeneMention();
            geneMention.setDocId(split[0]);
            geneMention.setId(split[1]);
            geneMention.setIds(List.of(split[1]));
            if (split.length > 2) {
                geneMention.setText(split[2]);
            }
            if (GeneInformation.DISCONTINUED.contains(geneMention.getGoldMentionId())) continue;
            geneMention.setId(GeneInformation.REPLACED.getOrDefault(geneMention.getGoldMentionId(), geneMention.getGoldMentionId()));
            ids.put(split[0], geneMention);
        }
        return ids;
    }

    public static Multimap<String, String> readPredictedMentions(String mentionListDocumentsDirectory) throws IOException {
        File[] predictedMentionFiles;
        HashMultimap<String, String> predictedMentions = HashMultimap.create();
        File directory = new File(mentionListDocumentsDirectory);
        if (!directory.isDirectory()) {
            throw new IllegalArgumentException("The path \"" + mentionListDocumentsDirectory + "\" does not point to a directory. A directory holding separate files for each gene mention evaluation document is expected.");
        }
        for (File mentionFile : predictedMentionFiles = directory.listFiles(new FilenameFilter(){

            @Override
            public boolean accept(File dir2, String name) {
                return !name.equals(".DS_Store");
            }
        })) {
            String docId = mentionFile.getName().substring(0, mentionFile.getName().indexOf(46));
            LineIterator lineIterator = IOUtils.lineIterator((InputStream)FileUtilities.getInputStreamFromFile(mentionFile), "UTF-8");
            while (lineIterator.hasNext()) {
                String mentionString = lineIterator.next();
                predictedMentions.put(docId, mentionString.split("\\t")[0]);
            }
        }
        return predictedMentions;
    }

    public static Multimap<String, GeneMention> readMixedFileForGenesWithOffsets(String geneListFile) throws IOException {
        return CorpusReader.readMixedFileForGenesWithOffsets(geneListFile, Collections.singletonList("Gene"), (GeneMention.GeneTagger)null);
    }

    public static Multimap<String, GeneMention> readMixedFileForGenesWithOffsets(String geneListFile, Collection<String> allowedTypeSuffixes, GeneMention.GeneTagger allowedTagger) throws IOException {
        return CorpusReader.readMixedFileForGenesWithOffsets(geneListFile, allowedTypeSuffixes, Collections.singleton(allowedTagger));
    }

    public static Multimap<String, GeneMention> readMixedFileForGenesWithOffsets(String geneListFile, Collection<String> allowedTypeSuffixes, Set<GeneMention.GeneTagger> allowedTaggers) throws IOException {
        LinkedHashMultimap<String, GeneMention> mentionsWithOffsets = LinkedHashMultimap.create();
        File directory = new File(geneListFile);
        if (!directory.isFile()) {
            throw new IllegalArgumentException("The path \"" + geneListFile + "\" does not point to a file. A file holding one documentId, gene id, begin, end and gene mention record per line is required.");
        }
        LineIterator lineIterator = IOUtils.lineIterator((InputStream)FileUtilities.getInputStreamFromFile(new File(geneListFile)), "UTF-8");
        while (lineIterator.hasNext()) {
            boolean foundSuffix;
            String line = lineIterator.nextLine();
            if (line.isBlank()) continue;
            boolean bl = foundSuffix = allowedTypeSuffixes == null || allowedTypeSuffixes.isEmpty();
            if (!foundSuffix) {
                for (String allowedSuffix : allowedTypeSuffixes) {
                    try {
                        if (!line.split("\t")[6].endsWith(allowedSuffix)) continue;
                        foundSuffix = true;
                    }
                    catch (ArrayIndexOutOfBoundsException e) {
                        log.error("Format error 'Not enough columns' in line '{}'", (Object)line, (Object)e);
                        throw e;
                    }
                }
            }
            if (!foundSuffix) continue;
            GeneMention geneMention = CorpusReader.createGeneMention(line);
            if (allowedTaggers != null && !allowedTaggers.contains((Object)geneMention.getTagger())) continue;
            mentionsWithOffsets.put(geneMention.getDocId(), geneMention);
        }
        return mentionsWithOffsets;
    }

    public static Multimap<String, GeneMention> readMixedFileForMentionTypesWithOffsets(String geneListFile, Set<GeneMention.SpecificType> types) throws IOException {
        return CorpusReader.readMixedFileForMentionTypesWithOffsets(geneListFile, types, null);
    }

    public static Multimap<String, GeneMention> readMixedFileForMentionTypesWithOffsets(String geneListFile, Set<GeneMention.SpecificType> types, Set<GeneMention.GeneTagger> allowedTaggers) throws IOException {
        LinkedHashMultimap<String, GeneMention> mentionsWithOffsets = LinkedHashMultimap.create();
        File directory = new File(geneListFile);
        if (!directory.isFile()) {
            throw new IllegalArgumentException("The path \"" + geneListFile + "\" does not point to a file. A file holding one documentId, gene id, begin, end and gene mention record per line is required.");
        }
        LineIterator lineIterator = IOUtils.lineIterator((InputStream)FileUtilities.getInputStreamFromFile(new File(geneListFile)), "UTF-8");
        while (lineIterator.hasNext()) {
            String line = lineIterator.nextLine();
            String[] split = line.split("\\t");
            String type = split[6];
            GeneMention.SpecificType specificType = type.equalsIgnoreCase("protein_familiy_or_group") || type.equalsIgnoreCase("familyname") ? GeneMention.SpecificType.FAMILYNAME : (type.equalsIgnoreCase("domainmotif") ? GeneMention.SpecificType.DOMAINMOTIF : GeneMention.SpecificType.GENE);
            if (!types.contains((Object)specificType)) continue;
            GeneMention geneMention = CorpusReader.createGeneMention(line);
            if (allowedTaggers != null && !allowedTaggers.isEmpty() && !allowedTaggers.contains((Object)geneMention.getTagger())) continue;
            mentionsWithOffsets.put(geneMention.getDocId(), geneMention);
        }
        return mentionsWithOffsets;
    }

    public static Multimap<String, GeneMention> readMixedFileForChunks(String geneListFile) throws IOException {
        LinkedHashMultimap<String, GeneMention> mentionsWithOffsets = LinkedHashMultimap.create();
        File directory = new File(geneListFile);
        if (!directory.isFile()) {
            throw new IllegalArgumentException("The path \"" + geneListFile + "\" does not point to a file. A file holding one documentId, gene id, begin, end and gene mention record per line is required.");
        }
        LineIterator lineIterator = IOUtils.lineIterator((InputStream)FileUtilities.getInputStreamFromFile(new File(geneListFile)), "UTF-8");
        while (lineIterator.hasNext()) {
            String line = lineIterator.nextLine();
            if (!line.endsWith("Gene")) continue;
            GeneMention geneMention = CorpusReader.createGeneMention(line);
            mentionsWithOffsets.put(geneMention.getDocId(), geneMention);
        }
        return mentionsWithOffsets;
    }

    public static Multimap<String, GeneMention> readMentionsWithOffsets(String geneListFile) throws IOException {
        LinkedHashMultimap<String, GeneMention> mentionsWithOffsets = LinkedHashMultimap.create();
        File directory = new File(geneListFile);
        if (!directory.isFile()) {
            throw new IllegalArgumentException("The path \"" + geneListFile + "\" does not point to a file. A file holding one documentId, gene id, begin, end and gene mention record per line is required.");
        }
        LineIterator lineIterator = IOUtils.lineIterator((InputStream)FileUtilities.getInputStreamFromFile(new File(geneListFile)), "UTF-8");
        while (lineIterator.hasNext()) {
            GeneMention geneMention;
            String mentionString = lineIterator.next();
            if (mentionString.startsWith("#") || GeneInformation.DISCONTINUED.contains((geneMention = CorpusReader.createGeneMention(mentionString)).getGoldMentionId())) continue;
            geneMention.setId(GeneInformation.REPLACED.getOrDefault(geneMention.getGoldMentionId(), geneMention.getGoldMentionId()));
            mentionsWithOffsets.put(geneMention.getDocId(), geneMention);
        }
        return mentionsWithOffsets;
    }

    public static GeneMention createGeneMention(String mentionString) {
        String[] split = mentionString.split("\\t");
        String docId = split[0];
        String geneId = split[1];
        int begin = Integer.parseInt(split[2]);
        int end = Integer.parseInt(split[3]);
        String text = null;
        GeneMention.GeneTagger tagger = null;
        GeneMention.SpecificType specificType = GeneMention.SpecificType.UNKNOWN;
        double confidence = 0.0;
        if (split.length > 4) {
            text = split[4];
        }
        if (split.length > 5) {
            String systemId = split[5];
            if (systemId.contains("ProteinConsistencyTagger")) {
                tagger = GeneMention.GeneTagger.CONSISTENCY_TAGGER;
            } else if (systemId.contains("ExtendedProteinsMerger")) {
                tagger = GeneMention.GeneTagger.EXPANSION_TAGGER;
            } else if (systemId.contains("GazetteerAnnotator")) {
                tagger = GeneMention.GeneTagger.GAZETTEER;
            } else if (systemId.contains("EntityAnnotator")) {
                tagger = GeneMention.GeneTagger.JNET;
            } else if (systemId.contains("JNET ConsistencyPreservation")) {
                tagger = GeneMention.GeneTagger.JNET;
            } else if (systemId.contains("BANNER")) {
                tagger = GeneMention.GeneTagger.BANNER;
            } else if (systemId.contains("Reader")) {
                tagger = GeneMention.GeneTagger.GOLD;
            } else if (systemId.equalsIgnoreCase("gold")) {
                tagger = GeneMention.GeneTagger.GOLD;
            } else if (systemId.contains("FlairNerAnnotator")) {
                tagger = GeneMention.GeneTagger.FLAIR;
            } else if (systemId.contains("FlairBC2GMTrain1024")) {
                tagger = GeneMention.GeneTagger.FLAIR;
            } else if (systemId.contains("FlairGNormPlusNLMIAT")) {
                tagger = GeneMention.GeneTagger.FLAIR_GNORMPLUSNLMIAT;
            } else if (systemId.contains("FlairBC2GMTrainTest")) {
                tagger = GeneMention.GeneTagger.FLAIR_BC2TRAINTEST;
            } else if (systemId.contains("FlairJPGCollapsedVarCompEnum")) {
                tagger = GeneMention.GeneTagger.FLAIR_JPG_COLLAPSED_VARCOMPENUM;
            } else if (systemId.contains("FlairJPGCollapsedVar")) {
                tagger = GeneMention.GeneTagger.FLAIR_JPG_COLLAPSED_VAR;
            } else if (systemId.contains("FlairJPGNoBC2TestNoTest")) {
                tagger = GeneMention.GeneTagger.FLAIR_JPG_NOBC2TEST_NOTEST;
            } else if (systemId.contains("FlairJPGNoBc2TestNoTestCollapsedVar")) {
                tagger = GeneMention.GeneTagger.FLAIR_JPG_NOBC2TEST_NOTEST_COLLAPSED_VAR;
            } else if (systemId.contains("FlairProGeneBC2TrainIsDevGNormPlusEntities")) {
                tagger = GeneMention.GeneTagger.FLAIR_JPG_GNP_ENTITIES;
            } else if (systemId.equals("GNormPlusTagger") || systemId.equals("GNormPlus")) {
                tagger = GeneMention.GeneTagger.GNORM_PLUS;
            } else {
                throw new IllegalArgumentException("The gene recognition system " + systemId + " is unknown. Mention record: " + mentionString);
            }
        }
        if (split.length > 6) {
            String stype = split[6];
            if (tagger != GeneMention.GeneTagger.GAZETTEER) {
                specificType = stype.equalsIgnoreCase("protein_familiy_or_group") || stype.equalsIgnoreCase("familyname") || stype.endsWith("-222") || stype.endsWith("-333") || stype.endsWith("-444") || stype.endsWith("-555") ? GeneMention.SpecificType.FAMILYNAME : (stype.equalsIgnoreCase("domainmotif") ? GeneMention.SpecificType.DOMAINMOTIF : GeneMention.SpecificType.GENE);
            } else if (stype.contains(":")) {
                DictionaryGeneIdRecord dictionaryGeneIdRecord = new DictionaryGeneIdRecord(stype);
            } else {
                specificType = GeneMention.SpecificType.GENE;
            }
        }
        if (split.length > 7) {
            try {
                confidence = Double.parseDouble(split[7]);
            }
            catch (NumberFormatException stype) {
                // empty catch block
            }
        }
        GeneMention geneMention = new GeneMention();
        geneMention.setDocId(docId);
        if (!StringUtils.isBlank(geneId) && !geneId.equals("null")) {
            geneMention.setId(geneId);
            geneMention.setIds(Collections.singletonList(geneId));
        }
        geneMention.setOffsets(Range.between(begin, end));
        geneMention.setText(text);
        geneMention.setTagger(tagger);
        geneMention.setSpecificType(specificType);
        geneMention.setSpecificTypeConfidence(confidence);
        return geneMention;
    }

    public static Multimap<String, GeneMention> readMentionsWithOffsetsAndSpecies(String geneListFile) throws IOException {
        LinkedHashMultimap<String, GeneMention> mentionsWithOffsets = LinkedHashMultimap.create();
        File directory = new File(geneListFile);
        if (!directory.isFile()) {
            throw new IllegalArgumentException("The path \"" + geneListFile + "\" does not point to a file. A file holding one documentId, gene id, begin, end and gene mention record per line is required.");
        }
        LineIterator lineIterator = IOUtils.lineIterator((InputStream)FileUtilities.getInputStreamFromFile(new File(geneListFile)), "UTF-8");
        while (lineIterator.hasNext()) {
            String mentionString = lineIterator.next();
            String[] split = mentionString.split("\\t");
            String docId = split[0];
            String geneId = split[1];
            int begin = Integer.parseInt(split[2]);
            int end = Integer.parseInt(split[3]);
            String text = null;
            String taxId = null;
            GeneMention.GeneTagger tagger = null;
            double labelConfidence = 0.0;
            if (split.length > 4) {
                text = split[4];
            }
            if (split.length > 5) {
                taxId = split[5];
            }
            if (split.length > 6) {
                String systemId = split[6];
                if (systemId.contains("GazetteerAnnotator")) {
                    tagger = GeneMention.GeneTagger.GAZETTEER;
                } else if (systemId.endsWith("EntityAnnotator")) {
                    tagger = GeneMention.GeneTagger.JNET;
                } else if (systemId.contains("JNET ConsistencyPreservation")) {
                    tagger = GeneMention.GeneTagger.JNET;
                } else if (systemId.contains("BANNER")) {
                    tagger = GeneMention.GeneTagger.BANNER;
                } else if (systemId.contains("FlairNerAnnotator")) {
                    tagger = GeneMention.GeneTagger.FLAIR;
                } else if (systemId.contains("FlairJPGCollapsedVar")) {
                    tagger = GeneMention.GeneTagger.FLAIR_JPG_COLLAPSED_VAR;
                } else if (systemId.contains("FlairJPGCollapsedVarCompEnum")) {
                    tagger = GeneMention.GeneTagger.FLAIR_JPG_COLLAPSED_VARCOMPENUM;
                } else {
                    throw new IllegalArgumentException("The gene recognition system " + systemId + " is unknown.");
                }
            }
            if (split.length > 7 && split[7] != "null") {
                labelConfidence = Double.parseDouble(split[7]);
            }
            GeneMention geneMention = new GeneMention();
            geneMention.setDocId(docId);
            if (!StringUtils.isBlank(geneId)) {
                geneMention.setId(geneId);
            }
            geneMention.setOffsets(Range.between(begin, end));
            geneMention.setText(text);
            geneMention.setTaxonomyId(taxId);
            geneMention.setTagger(tagger);
            geneMention.setSpecificTypeConfidence(labelConfidence);
            mentionsWithOffsets.put(docId, geneMention);
        }
        return mentionsWithOffsets;
    }

    public static Map<String, String> readGeneContexts(String contextDocumentsDirectory) throws IOException {
        File[] contextFiles;
        HashMap<String, String> documentContexts = new HashMap<String, String>();
        File directory = new File(contextDocumentsDirectory);
        if (!directory.isDirectory()) {
            throw new IllegalArgumentException("The path \"" + contextDocumentsDirectory + "\" does not point to a directory. A directory holding separate files for each gene mention evaluation document is expected.");
        }
        for (File mentionFile : contextFiles = directory.listFiles((dir2, name) -> !name.equals(".DS_Store"))) {
            String docId = mentionFile.getName().replaceAll("\\.txt$", "").replaceAll("\\.txt\\.gz$", "").replaceAll("\\.gz$", "");
            try (BufferedInputStream is = FileUtilities.getInputStreamFromFile(mentionFile);){
                String context = IOUtils.toString((InputStream)is, StandardCharsets.UTF_8).trim();
                documentContexts.put(docId, context);
            }
        }
        return documentContexts;
    }

    public static Multimap<String, String> convertGoldMentionsToIdsPerDocument(Multimap<String, GeneMention> bc2TrainGold) {
        HashMultimap<String, String> idsPerDoc = HashMultimap.create();
        for (String docId : bc2TrainGold.keySet()) {
            Collection<GeneMention> goldMentions = bc2TrainGold.get(docId);
            for (GeneMention goldMention : goldMentions) {
                idsPerDoc.put(docId, goldMention.getGoldMentionId());
            }
        }
        return idsPerDoc;
    }

    public static Multimap<String, String> convertGoldMentionsToMentionTextPerDocument(Multimap<String, GeneMention> bc2TrainGold) {
        HashMultimap<String, String> idsPerDoc = HashMultimap.create();
        for (String docId : bc2TrainGold.keySet()) {
            Collection<GeneMention> goldMentions = bc2TrainGold.get(docId);
            for (GeneMention goldMention : goldMentions) {
                idsPerDoc.put(docId, goldMention.getText());
            }
        }
        return idsPerDoc;
    }

    public static Set<String> getIdsOfMentions(Collection<GeneMention> goldMentionsForDoc) {
        HashSet<String> ids = new HashSet<String>(goldMentionsForDoc.size());
        for (GeneMention mention : goldMentionsForDoc) {
            ids.add(mention.getGoldMentionId());
        }
        return ids;
    }

    public static List<GeneMention> getGeneMentionsInRange(Collection<GeneMention> mentions, int begin, int end) {
        ArrayList<GeneMention> ret = new ArrayList<GeneMention>();
        Range<Integer> soughtRange = Range.between(begin, end);
        for (GeneMention mention : mentions) {
            Range<Integer> mentionRange = Range.between(mention.getBegin(), mention.getEnd());
            if (!mentionRange.isOverlappedBy(soughtRange)) continue;
            ret.add(mention);
        }
        return ret;
    }

    public static Set<GeneMention> getGeneMentionsAtPosition(GeneMention referenceMention, Collection<GeneMention> candidateMentions) {
        HashSet<GeneMention> ret = new HashSet<GeneMention>();
        Range<Integer> predictedSpan = Range.between(referenceMention.getBegin(), referenceMention.getEnd());
        for (GeneMention candidate : candidateMentions) {
            Range<Integer> candidateSpan = Range.between(candidate.getBegin(), candidate.getEnd());
            if (!candidateSpan.isOverlappedBy(predictedSpan)) continue;
            ret.add(candidate);
        }
        return ret;
    }

    public static Multimap<String, Acronym> readAcronymAnnotations(String acronymAnnotationPath) throws IOException {
        File acronymAnnotationFile = new File(acronymAnnotationPath);
        HashMultimap<String, Acronym> acronyms = HashMultimap.create();
        try (Stream<String> lines = FileUtilities.getReaderFromFile(acronymAnnotationFile).lines();){
            Iterator iterator = lines.iterator();
            HashMap<String, AcronymLongform> longforms = new HashMap<String, AcronymLongform>();
            while (iterator.hasNext()) {
                String line = (String)iterator.next();
                String[] split = line.split("\\t");
                String docId = split[0];
                String id = split[1];
                int begin = Integer.parseInt(split[2]);
                int end = Integer.parseInt(split[3]);
                if (id.startsWith("A")) {
                    String longformid = split[4];
                    AcronymLongform acronymLongform = (AcronymLongform)longforms.get(longformid);
                    Acronym acronym = new Acronym();
                    acronym.setOffsets(Range.between(begin, end));
                    acronym.setLongform(acronymLongform);
                    acronymLongform.addAcronym(acronym);
                    acronyms.put(docId, acronym);
                }
                if (!id.startsWith("F")) continue;
                AcronymLongform longform = new AcronymLongform();
                longform.setOffsets(Range.between(begin, end));
                longforms.put(id, longform);
            }
        }
        return acronyms;
    }

    public static Map<String, String> readTitles(String titlesPath) throws IOException {
        File titlesFile = new File(titlesPath);
        HashMap<String, String> titles = new HashMap<String, String>();
        try (Stream<String> lines = FileUtilities.getReaderFromFile(titlesFile).lines();){
            Iterator iterator = lines.iterator();
            while (iterator.hasNext()) {
                String line = (String)iterator.next();
                String[] split = line.split("\\t");
                if (split.length != 2) {
                    throw new IllegalArgumentException(titlesPath + " should have exactly two columns.");
                }
                String docId = split[0];
                String title = split[1];
                titles.put(docId, title);
            }
        }
        return titles;
    }

    public static Multimap<String, String> readMeshterms(String meshPath) throws IOException {
        File meshFile = new File(meshPath);
        HashMultimap<String, String> meshterms = HashMultimap.create();
        try (Stream<String> lines = FileUtilities.getReaderFromFile(meshFile).lines();){
            Iterator iterator = lines.iterator();
            while (iterator.hasNext()) {
                String line = (String)iterator.next();
                String[] split = line.split("\\t");
                if (split.length < 2) {
                    throw new IllegalArgumentException(meshPath + " should have at least two columns in each line.");
                }
                String docId = split[0];
                for (int i = 1; i < split.length; ++i) {
                    String mesh = split[1];
                    meshterms.put(docId, mesh);
                }
            }
        }
        return meshterms;
    }

    public static Multimap<String, String> readTitleSpecies(String titlesPath) throws IOException {
        File meshFile = new File(titlesPath);
        HashMultimap<String, String> titleSpecies = HashMultimap.create();
        try (Stream<String> lines = FileUtilities.getReaderFromFile(meshFile).lines();){
            Iterator iterator = lines.iterator();
            while (iterator.hasNext()) {
                String[] species;
                String line = (String)iterator.next();
                String[] split = line.split("\\t");
                if (split.length < 2) {
                    throw new IllegalArgumentException(titlesPath + " should have exactly two columns.");
                }
                String docId = split[0];
                for (String s2 : species = split[1].split(";")) {
                    titleSpecies.put(docId, s2);
                }
            }
        }
        return titleSpecies;
    }

    public static Multimap<String, String> readMeshSpecies(String meshPath) throws IOException {
        File meshFile = new File(meshPath);
        HashMultimap<String, String> meshSpecies = HashMultimap.create();
        try (Stream<String> lines = FileUtilities.getReaderFromFile(meshFile).lines();){
            Iterator iterator = lines.iterator();
            while (iterator.hasNext()) {
                String line = (String)iterator.next();
                String[] split = line.split("\\t");
                if (split.length < 2) {
                    throw new IllegalArgumentException(meshPath + " should have at least two columns in each line.");
                }
                String docId = split[0];
                for (int i = 1; i < split.length; ++i) {
                    String[] species;
                    if (split[i].equals("")) continue;
                    for (String s2 : species = split[i].split(";")) {
                        meshSpecies.put(docId, s2);
                    }
                }
            }
        }
        return meshSpecies;
    }

    public static Map<String, OffsetMap<SpeciesMention>> readMixedFileForTextSpecies(String textPath) throws IOException {
        File meshFile = new File(textPath);
        HashMap<String, OffsetMap<SpeciesMention>> species = new HashMap<String, OffsetMap<SpeciesMention>>();
        int lineCounter = 1;
        try (Stream<String> lines = FileUtilities.getReaderFromFile(meshFile).lines();){
            Iterator iterator = lines.iterator();
            while (iterator.hasNext()) {
                String line = (String)iterator.next();
                if (!line.endsWith("Organism") && !line.endsWith("Species")) continue;
                String[] split = line.split("\\t");
                if (split.length != 7) {
                    throw new IllegalArgumentException("Line " + lineCounter + ": " + textPath + " should have exactly seven columns in each line.");
                }
                String docId = split[0];
                String taxId = split[1];
                int start = Integer.parseInt(split[2]);
                int end = Integer.parseInt(split[3]);
                Range<Integer> offsets = Range.between(start, end);
                if (species.containsKey(docId)) {
                    OffsetMap docChunks = (OffsetMap)species.get(docId);
                    docChunks.put(offsets, new SpeciesMention(taxId, split[4], start, end));
                } else {
                    OffsetMap<SpeciesMention> docSpecies = new OffsetMap<SpeciesMention>();
                    docSpecies.put(offsets, new SpeciesMention(taxId, split[4], start, end));
                    species.put(docId, docSpecies);
                }
                ++lineCounter;
            }
        }
        return species;
    }

    public static Multimap<String, PosTag> readMixedFileForPosTags(String textPath) throws IOException {
        File f = new File(textPath);
        LinkedHashMultimap<String, PosTag> posTags = LinkedHashMultimap.create();
        int lineCounter = 1;
        try (Stream<String> lines = FileUtilities.getReaderFromFile(f).lines();){
            Iterator iterator = lines.iterator();
            while (iterator.hasNext()) {
                String line = (String)iterator.next();
                if (!line.endsWith("PennBioIEPOSTag")) continue;
                String[] split = line.split("\\t");
                if (split.length != 7) {
                    throw new IllegalArgumentException("Line " + lineCounter + ": " + textPath + " should have exactly seven columns in each line.");
                }
                String docId = split[0];
                String tag = split[1];
                int start = Integer.parseInt(split[2]);
                int end = Integer.parseInt(split[3]);
                Range<Integer> offsets = Range.between(start, end);
                PosTag postag = new PosTag(tag, offsets);
                posTags.put(docId, postag);
                ++lineCounter;
            }
        }
        return posTags;
    }

    public static Multimap<String, Range<Integer>> readMixedFileForSentenceOffsets(String sentencePath) throws IOException {
        File meshFile = new File(sentencePath);
        HashMultimap<String, Range<Integer>> sentences = HashMultimap.create();
        int lineCounter = 1;
        try (Stream<String> lines = FileUtilities.getReaderFromFile(meshFile).lines();){
            Iterator iterator = lines.iterator();
            while (iterator.hasNext()) {
                String line = (String)iterator.next();
                String[] split = line.split("\\t");
                try {
                    if (!split[split.length - 1].contains("Sentence")) {
                        continue;
                    }
                }
                catch (ArrayIndexOutOfBoundsException e) {
                    log.error("Illegal line for sentence reading: '" + line + "'", e);
                    throw e;
                }
                if (split.length < 4) {
                    throw new IllegalArgumentException("Line " + lineCounter + ": " + sentencePath + " should have at least four columns in each line.");
                }
                String docId = split[0];
                int start = Integer.parseInt(split[2]);
                int end = Integer.parseInt(split[3]);
                Range<Integer> offsets = Range.between(start, end);
                sentences.put(docId, offsets);
                ++lineCounter;
            }
        }
        return sentences;
    }

    public static Multimap<String, Range<Integer>> readMixedFileForNonGenePhraseOffsets(String nonGenePhrasePath) throws IOException {
        File meshFile = new File(nonGenePhrasePath);
        HashMultimap<String, Range<Integer>> sentences = HashMultimap.create();
        int lineCounter = 1;
        try (Stream<String> lines = FileUtilities.getReaderFromFile(meshFile).lines();){
            Iterator iterator = lines.iterator();
            while (iterator.hasNext()) {
                String line = (String)iterator.next();
                String[] split = line.split("\\t");
                try {
                    if (!split[split.length - 1].contains("NonGenePhrase")) {
                        continue;
                    }
                }
                catch (ArrayIndexOutOfBoundsException e) {
                    log.error("Illegal line for sentence reading: '" + line + "'", e);
                    throw e;
                }
                if (split.length < 4) {
                    throw new IllegalArgumentException("Line " + lineCounter + ": " + nonGenePhrasePath + " should have at least four columns in each line.");
                }
                String docId = split[0];
                int start = Integer.parseInt(split[2]);
                int end = Integer.parseInt(split[3]);
                Range<Integer> offsets = Range.between(start, end);
                sentences.put(docId, offsets);
                ++lineCounter;
            }
        }
        return sentences;
    }

    public static Map<String, OffsetMap<String>> readMixedFileForChunkOffsets(String chunkPath) throws IOException {
        File meshFile = new File(chunkPath);
        HashMap<String, OffsetMap<String>> chunks = new HashMap<String, OffsetMap<String>>();
        int lineCounter = 1;
        try (Stream<String> lines = FileUtilities.getReaderFromFile(meshFile).lines();){
            Iterator iterator = lines.iterator();
            while (iterator.hasNext()) {
                OffsetMap docChunks;
                String line = (String)iterator.next();
                String[] split = line.split("\\t");
                if (!split[split.length - 1].contains("Chunk")) continue;
                if (split.length != 7) {
                    throw new IllegalArgumentException("Line " + lineCounter + ": " + chunkPath + " should have exactly seven columns in each line.");
                }
                String docId = split[0];
                String type = split[6];
                String[] tempType = type.split("\\.");
                type = tempType[tempType.length - 1];
                int start = Integer.parseInt(split[2]);
                int end = Integer.parseInt(split[3]);
                Range<Integer> offsets = Range.between(start, end);
                if (chunks.containsKey(docId)) {
                    docChunks = (OffsetMap)chunks.get(docId);
                    docChunks.put(offsets, type);
                } else {
                    docChunks = new OffsetMap();
                    docChunks.put(offsets, type);
                    chunks.put(docId, docChunks);
                }
                ++lineCounter;
            }
        }
        return chunks;
    }

    public static Map<String, OffsetMap<String>> readMixedFileForOntologyClassMentions(String ontologyClassFilePath) throws IOException {
        if (ontologyClassFilePath == null) {
            return Collections.emptyMap();
        }
        File ontClassFile = new File(ontologyClassFilePath);
        if (!ontClassFile.exists()) {
            return Collections.emptyMap();
        }
        HashMap<String, OffsetMap<String>> classMentions = new HashMap<String, OffsetMap<String>>();
        int lineCounter = 1;
        try (Stream<String> lines = FileUtilities.getReaderFromFile(ontClassFile).lines();){
            Iterator iterator = lines.iterator();
            while (iterator.hasNext()) {
                String line = (String)iterator.next();
                String[] split = line.split("\\t");
                if (!split[split.length - 1].contains("OntClassMention")) continue;
                if (split.length != 7) {
                    throw new IllegalArgumentException("Line " + lineCounter + ": " + ontologyClassFilePath + " should have exactly seven columns in each line.");
                }
                String docId = split[0];
                String type = split[1];
                int start = Integer.parseInt(split[2]);
                int end = Integer.parseInt(split[3]);
                Range<Integer> offsets = Range.between(start, end);
                if (classMentions.containsKey(docId)) {
                    OffsetMap docChunks = (OffsetMap)classMentions.get(docId);
                    docChunks.put(offsets, type);
                } else {
                    OffsetMap<String> docMentions = new OffsetMap<String>();
                    docMentions.put(offsets, type);
                    classMentions.put(docId, docMentions);
                }
                ++lineCounter;
            }
        }
        return classMentions;
    }

    public static Multimap<String, MeshHeading> readMeshHeadings(String meshPath) throws IOException {
        HashMultimap<String, MeshHeading> map = HashMultimap.create();
        try (BufferedReader br = FileUtilities.getReaderFromFile(new File(meshPath));){
            br.lines().map(line -> line.split("\t")).forEach(split -> map.put(split[0], new MeshHeading(split[1])));
        }
        return map;
    }

    public static Multimap<String, CoreferenceSet> readCoreferenceAnnotations(String coreferenceAnnotationPath) throws IOException {
        HashMultimap<String, CoreferenceSet> coreferenceSets = HashMultimap.create();
        if (coreferenceAnnotationPath == null) {
            return coreferenceSets;
        }
        File coreferenceFile = new File(coreferenceAnnotationPath);
        if (!coreferenceFile.exists()) {
            return coreferenceSets;
        }
        try (Stream<String> lines = FileUtilities.getReaderFromFile(coreferenceFile).lines();){
            Iterator iterator = lines.iterator();
            CoreferenceSet currentSet = null;
            while (iterator.hasNext()) {
                String line = (String)iterator.next();
                String[] split = line.split("\\t");
                String docId = split[0];
                String id = split[1];
                int begin = Integer.parseInt(split[2]);
                int end = Integer.parseInt(split[3]);
                if (!id.startsWith("Ana") && !id.startsWith("Ant")) continue;
                String setId = id.substring(3);
                if (currentSet == null || !currentSet.getId().equals(setId) || !currentSet.getDocId().equals(docId)) {
                    currentSet = new CoreferenceSet(docId, setId);
                    coreferenceSets.put(docId, currentSet);
                }
                CoreferenceExpression cExp = new CoreferenceExpression(Range.between(begin, end));
                currentSet.add(cExp);
            }
        }
        return coreferenceSets;
    }

    public static Multimap<String, Apposition> readAppositionAnnotations(String appositionAnnotationPath) throws IOException {
        HashMultimap<String, Apposition> appositions = HashMultimap.create();
        if (appositionAnnotationPath == null) {
            return appositions;
        }
        File appositionFile = new File(appositionAnnotationPath);
        if (!appositionFile.exists()) {
            return appositions;
        }
        try (Stream<String> lines = FileUtilities.getReaderFromFile(appositionFile).lines();){
            Iterator iterator = lines.iterator();
            while (iterator.hasNext()) {
                String line = (String)iterator.next();
                String[] split = line.split("\\t");
                String docId = split[0];
                int inAppositionBegin = Integer.parseInt(split[1]);
                int inAppositionEnd = Integer.parseInt(split[2]);
                int appositionBegin = Integer.parseInt(split[3]);
                int appositionEnd = Integer.parseInt(split[4]);
                Apposition inApposition = new Apposition(Range.between(inAppositionBegin, inAppositionEnd), Apposition.AppositionType.InApposition);
                Apposition apposition = new Apposition(Range.between(appositionBegin, appositionEnd), Apposition.AppositionType.Appositive);
                inApposition.setOther(apposition);
                apposition.setOther(inApposition);
                appositions.put(docId, inApposition);
                appositions.put(docId, apposition);
            }
        }
        return appositions;
    }
}

