/*
 * Decompiled with CFR 0.152.
 */
package de.julielab.jcore.reader.ign;

import bioc.BioCAnnotation;
import bioc.BioCDocument;
import bioc.BioCLocation;
import bioc.BioCPassage;
import bioc.BioCSentence;
import bioc.io.BioCDocumentReader;
import bioc.io.BioCFactory;
import de.julielab.jcore.types.Date;
import de.julielab.jcore.types.Gene;
import de.julielab.jcore.types.GeneResourceEntry;
import de.julielab.jcore.types.Journal;
import de.julielab.jcore.types.pubmed.Header;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.CASException;
import org.apache.uima.cas.FeatureStructure;
import org.apache.uima.collection.CollectionException;
import org.apache.uima.collection.CollectionReader_ImplBase;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.cas.FSArray;
import org.apache.uima.jcas.cas.StringArray;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.util.Progress;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class IGNReader
extends CollectionReader_ImplBase {
    private static final Logger LOGGER = LoggerFactory.getLogger(IGNReader.class);
    public static final String PARAM_INPUTDIR_TEXT = "InputDirectoryText";
    public static final String PARAM_INPUTDIR_ANNO = "InputDirectoryAnnotations";
    public static final String PUBLICATION_DATES_FILE = "PublicationDatesFile";
    @ConfigurationParameter(name="InputDirectoryText", description="Directory containing files in BioC-format that comprise the actual text.")
    private File dirTextFiles;
    @ConfigurationParameter(name="InputDirectoryAnnotations", description="Directory containing files in BioC-format that comprise the annotations.")
    private File dirAnnoFiles;
    @ConfigurationParameter(name="PublicationDatesFile", defaultValue={"/de/julielab/jcore/reader/ign/pubdates/IGN_publicationDates"}, description="File containing a mapping between article ids and publication years.")
    private String publicationDatesFile;
    HashMap<String, BioCDocument> mapAnnoFiles = new HashMap();
    List<BioCDocument> biocDocuments = new ArrayList<BioCDocument>();
    private int currentIndex;
    private Map<String, String> pubDates;

    public void initialize() throws ResourceInitializationException {
        String configParameterValue;
        LOGGER.info("initialize() - Initializing BioC Reader...");
        this.dirTextFiles = new File((String)this.getConfigParameterValue(PARAM_INPUTDIR_TEXT));
        if (!this.dirTextFiles.exists() || !this.dirTextFiles.isDirectory()) {
            LOGGER.error("Input directory of files comprising the text doesn't exist or is not a directory ({}).", (Object)this.dirTextFiles.getAbsolutePath());
        }
        if (null != (configParameterValue = (String)this.getConfigParameterValue(PARAM_INPUTDIR_ANNO))) {
            this.dirAnnoFiles = new File(configParameterValue);
        }
        if (!(null == configParameterValue || this.dirAnnoFiles.exists() && this.dirAnnoFiles.isDirectory())) {
            LOGGER.error("Input directory of files comprising the annotations doesn't exist or is not a directory ({}).", (Object)configParameterValue);
            throw new ResourceInitializationException((Throwable)new FileNotFoundException("Annotation input directory not found: " + configParameterValue));
        }
        if (this.getConfigParameterValue(PUBLICATION_DATES_FILE) != null) {
            this.publicationDatesFile = ((String)this.getConfigParameterValue(PUBLICATION_DATES_FILE)).trim();
        }
        try {
            int i;
            this.pubDates = this.readIgnPubDates(this.publicationDatesFile);
            BioCFactory biocFactory = BioCFactory.newFactory((String)"STANDARD");
            FileReader reader = null;
            BioCDocumentReader biocReader = null;
            if (null != configParameterValue) {
                File[] arrayAnnoFiles = this.dirAnnoFiles.listFiles();
                for (i = 0; i < arrayAnnoFiles.length; ++i) {
                    File annoFile = arrayAnnoFiles[i];
                    reader = new FileReader(annoFile);
                    biocReader = biocFactory.createBioCDocumentReader((Reader)reader);
                    BioCDocument annoDoc = biocReader.readDocument();
                    String pmid = annoDoc.getID();
                    this.mapAnnoFiles.put(pmid, annoDoc);
                }
            }
            File[] arrayTextFiles = this.dirTextFiles.listFiles();
            for (i = 0; i < arrayTextFiles.length; ++i) {
                File textFile = arrayTextFiles[i];
                reader = new FileReader(textFile);
                biocReader = biocFactory.createBioCDocumentReader((Reader)reader);
                BioCDocument textDoc = biocReader.readDocument();
                this.biocDocuments.add(textDoc);
            }
        }
        catch (Exception e) {
            throw new ResourceInitializationException((Throwable)e);
        }
        this.currentIndex = 0;
    }

    public void getNext(CAS aCas) throws IOException, CollectionException {
        JCas aJCas;
        try {
            aJCas = aCas.getJCas();
        }
        catch (CASException e) {
            throw new CollectionException((Throwable)e);
        }
        BioCDocument textDoc = this.biocDocuments.get(this.currentIndex++);
        String pmid = textDoc.getID();
        LOGGER.info("getNext(CAS) - Reading text for PMID " + pmid);
        String text = "";
        List passageListText = textDoc.getPassages();
        for (BioCPassage passage : passageListText) {
            List sentList = passage.getSentences();
            for (BioCSentence sent : sentList) {
                String textPart = sent.getText();
                text = text + textPart + " ";
            }
        }
        Header header = new Header(aJCas);
        header.setDocId(pmid);
        this.addDateForID(header, aJCas, pmid);
        header.addToIndexes();
        aJCas.setDocumentText(text);
        if (null != this.mapAnnoFiles && !this.mapAnnoFiles.isEmpty()) {
            LOGGER.info("getNext(CAS) - Reading annotations for PMID " + pmid);
            BioCDocument annoDoc = this.mapAnnoFiles.get(pmid);
            List passageListAnno = annoDoc.getPassages();
            for (BioCPassage passage : passageListAnno) {
                List annos = passage.getAnnotations();
                for (BioCAnnotation anno : annos) {
                    BioCLocation loc;
                    int begin;
                    Map infons = anno.getInfons();
                    String egId = (String)infons.get("entrez_id");
                    String taxId = (String)infons.get("taxonomy_id");
                    List locs = anno.getLocations();
                    if (locs.size() > 1) {
                        LOGGER.warn("Discontinuous annotation! Will be ignored, as only the first location is considered.");
                    }
                    if ((begin = (loc = (BioCLocation)locs.get(0)).getOffset()) != 0 && text.charAt(++begin - 1) != ' ') {
                        --begin;
                    }
                    int end = begin + loc.getLength();
                    GeneResourceEntry resEntry = new GeneResourceEntry(aJCas);
                    resEntry.setBegin(begin);
                    resEntry.setEnd(end);
                    resEntry.setEntryId(egId);
                    resEntry.setSource("NCBI Gene");
                    resEntry.setTaxonomyId(taxId);
                    FSArray resList = new FSArray(aJCas, 1);
                    resList.set(0, (FeatureStructure)resEntry);
                    Gene gene = new Gene(aJCas);
                    gene.setBegin(begin);
                    gene.setEnd(end);
                    gene.setResourceEntryList(resList);
                    StringArray s1 = new StringArray(aJCas, 1);
                    s1.set(0, taxId);
                    gene.setSpecies(s1);
                    gene.addToIndexes();
                }
            }
        }
    }

    private void addDateForID(Header header, JCas jCas, String id) {
        if (this.pubDates.isEmpty()) {
            return;
        }
        Journal pubType = new Journal(jCas);
        FSArray pubTypeList = new FSArray(jCas, 1);
        pubTypeList.set(0, (FeatureStructure)pubType);
        header.setPubTypeList(pubTypeList);
        Date pubDate = new Date(jCas);
        String dateString = this.pubDates.get(id);
        int year = 0;
        int month = 0;
        try {
            year = Integer.parseInt(dateString.substring(0, 4));
            month = Integer.parseInt(dateString.substring(5));
        }
        catch (NumberFormatException numberFormatException) {
            // empty catch block
        }
        if (month != 0) {
            pubDate.setMonth(month);
        }
        if (year != 0) {
            pubDate.setYear(year);
            pubType.setPubDate(pubDate);
            LOGGER.debug("pubmed-id: {}, publication date: {}-{}", new Object[]{header.getDocId(), year, month});
        }
    }

    private Map<String, String> readIgnPubDates(String publicationDatesFilePath) throws FileNotFoundException {
        HashMap<String, String> pubDates = new HashMap<String, String>();
        if (publicationDatesFilePath != null) {
            String publicationDatesFileResource = publicationDatesFilePath.startsWith("/") ? publicationDatesFilePath : "/" + publicationDatesFilePath;
            InputStream is = ((Object)((Object)this)).getClass().getResourceAsStream(publicationDatesFileResource);
            if (null == is) {
                File f = new File(publicationDatesFilePath);
                if (f.exists()) {
                    LOGGER.debug("Loading IGN publication dates from file {}", (Object)f);
                    is = new FileInputStream(f);
                }
            } else {
                LOGGER.debug("Loading resource \"{}\" from the classpath", (Object)publicationDatesFileResource);
            }
            if (null == is) {
                LOGGER.warn("Could not find {}. Publication dates will not be annotated.", (Object)publicationDatesFilePath);
            } else {
                try (BufferedReader br = new BufferedReader(new InputStreamReader(is, Charset.forName("UTF-8")));){
                    String line = "";
                    while ((line = br.readLine()) != null) {
                        String[] tokens = line.split("\\s+");
                        if (tokens.length != 2 || tokens[1].length() != 7) {
                            throw new IllegalArgumentException("Format error in IGN publication date file. Make sure that there are two tab- or whitespace separated columns, first the PubMed ID, second the date and that the date is exactly of length 8, e.g. 2001-05. The errorneous line was: " + line);
                        }
                        pubDates.put(tokens[0].trim(), tokens[1].trim());
                    }
                }
                catch (FileNotFoundException e) {
                    e.printStackTrace();
                }
                catch (IOException e) {
                    e.printStackTrace();
                }
            }
        } else {
            LOGGER.debug("Since the pubmedID2publication file is not given, publication dates will not be annotated.");
        }
        return pubDates;
    }

    public boolean hasNext() throws IOException, CollectionException {
        return this.currentIndex < this.biocDocuments.size();
    }

    public Progress[] getProgress() {
        return null;
    }

    public void close() throws IOException {
    }
}

