/*
 * Decompiled with CFR 0.152.
 */
package de.julielab.jcore.consumer.bc2gmformat;

import de.julielab.java.utilities.FileUtilities;
import de.julielab.jcore.types.Gene;
import de.julielab.jcore.types.Sentence;
import de.julielab.jcore.utility.JCoReTools;
import java.io.BufferedWriter;
import java.io.File;
import java.io.IOException;
import java.nio.file.Path;
import java.util.TreeMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.FSIterator;
import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.cas.text.AnnotationIndex;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.descriptor.ResourceMetaData;
import org.apache.uima.fit.descriptor.TypeCapability;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

@ResourceMetaData(name="JCoRe BioCreative II Gene Mention Format writer", description="This component writes gene annotations in the CAS to the format employed by the BioCreative II Gene Mention challenge.")
@TypeCapability(inputs={"de.julielab.jcore.types.Sentence", "de.julielab.jcore.types.Gene"})
public class BC2GMFormatWriter
extends JCasAnnotator_ImplBase {
    public static final String PARAM_OUTPUT_DIR = "OutputDirectory";
    public static final String PARAM_SENTENCES_FILE = "SentencesFileName";
    public static final String PARAM_GENE_FILE = "GenesFileName";
    private static final Logger log = LoggerFactory.getLogger(BC2GMFormatWriter.class);
    private Matcher wsMatcher;
    @ConfigurationParameter(name="OutputDirectory", description="The directory to store the sentence and gene annotation files.")
    private File outputDir;
    @ConfigurationParameter(name="SentencesFileName", description="The name of the file that will contain the sentences, one per line.")
    private String sentencesFile;
    @ConfigurationParameter(name="GenesFileName", description="The name of the file that will contain the gene mention offsets for each sentence.")
    private String genesFile;
    private BufferedWriter sentenceWriter;
    private BufferedWriter genesWriter;

    public void initialize(UimaContext aContext) throws ResourceInitializationException {
        super.initialize(aContext);
        this.outputDir = new File((String)aContext.getConfigParameterValue(PARAM_OUTPUT_DIR));
        this.sentencesFile = (String)aContext.getConfigParameterValue(PARAM_SENTENCES_FILE);
        this.genesFile = (String)aContext.getConfigParameterValue(PARAM_GENE_FILE);
        try {
            this.sentenceWriter = FileUtilities.getWriterToFile((File)Path.of(this.outputDir.getAbsolutePath(), this.sentencesFile).toFile());
            this.genesWriter = FileUtilities.getWriterToFile((File)Path.of(this.outputDir.getAbsolutePath(), this.genesFile).toFile());
        }
        catch (IOException e) {
            log.error("IO error when trying to open the output files", (Throwable)e);
        }
        this.wsMatcher = Pattern.compile("\\s").matcher("");
        log.info("{}: {}", (Object)PARAM_OUTPUT_DIR, (Object)this.outputDir);
        log.info("{}: {}", (Object)PARAM_SENTENCES_FILE, (Object)this.sentencesFile);
        log.info("{}: {}", (Object)PARAM_GENE_FILE, (Object)this.genesFile);
    }

    public void process(JCas aJCas) throws AnalysisEngineProcessException {
        try {
            String docId = JCoReTools.getDocId((JCas)aJCas);
            int sentNum = 0;
            FSIterator sentIt = aJCas.getAnnotationIndex(Sentence.class).iterator();
            AnnotationIndex geneIndex = aJCas.getAnnotationIndex(Gene.class);
            while (sentIt.hasNext()) {
                Sentence sentence = (Sentence)sentIt.next();
                String sentId = docId + ":" + sentNum++;
                String coveredText = sentence.getCoveredText();
                this.sentenceWriter.write(sentId + " " + coveredText);
                this.sentenceWriter.newLine();
                TreeMap<Integer, Integer> wsNumMap = this.buildWSMap(coveredText);
                FSIterator tokenIt = geneIndex.subiterator((AnnotationFS)sentence);
                while (tokenIt.hasNext()) {
                    Gene gene = (Gene)tokenIt.next();
                    int begin = gene.getBegin() - sentence.getBegin();
                    int end = gene.getEnd() - sentence.getBegin();
                    int beginWOWs = begin - wsNumMap.floorEntry(begin).getValue();
                    int endWOWs = end - wsNumMap.floorEntry(end).getValue() - 1;
                    String entry = sentId + "|" + beginWOWs + " " + endWOWs + "|" + gene.getCoveredText();
                    this.genesWriter.write(entry);
                    this.genesWriter.newLine();
                }
            }
        }
        catch (IOException e) {
            throw new AnalysisEngineProcessException((Throwable)e);
        }
    }

    public void collectionProcessComplete() throws AnalysisEngineProcessException {
        super.collectionProcessComplete();
        try {
            this.sentenceWriter.close();
            this.genesWriter.close();
        }
        catch (IOException e) {
            throw new AnalysisEngineProcessException((Throwable)e);
        }
    }

    public TreeMap<Integer, Integer> buildWSMap(String text) {
        TreeMap<Integer, Integer> wsNumMap = new TreeMap<Integer, Integer>();
        String sentenceText = text;
        int pos = 0;
        int numWs = 0;
        this.wsMatcher.reset(sentenceText);
        wsNumMap.put(0, 0);
        while (this.wsMatcher.find(pos)) {
            pos = this.wsMatcher.end();
            wsNumMap.put(pos, ++numWs);
        }
        return wsNumMap;
    }
}

