/*
 * Decompiled with CFR 0.152.
 */
package de.julielab.genemapper.uima;

import de.julielab.java.utilities.FileUtilities;
import de.julielab.jcore.types.Organism;
import de.julielab.jcore.types.ResourceEntry;
import de.julielab.jcore.types.Sentence;
import de.julielab.jcore.types.Token;
import de.julielab.jcore.utility.JCoReAnnotationIndexMerger;
import de.julielab.jcore.utility.index.JCoReOverlapAnnotationIndex;
import java.io.File;
import java.io.IOException;
import java.io.Writer;
import java.util.LinkedHashSet;
import java.util.Optional;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.FeatureStructure;
import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.descriptor.ResourceMetaData;
import org.apache.uima.fit.descriptor.TypeCapability;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.cas.FSArray;
import org.apache.uima.resource.ResourceInitializationException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

@ResourceMetaData(name="JCoRe TaxId Sentence Label Writer", description="This component writes the CAS text sentence-wise to file. For each sentence, the taxonomy IDs of organism mentions found in the respective sentence is added in FastText format.")
@TypeCapability(inputs={"de.julielab.jcore.types.Organism", "de.julielab.jcore.types.Sentence"})
public class TaxIdSentenceLabelWriter
extends JCasAnnotator_ImplBase {
    public static final String PARAM_BASE_PATH = "OutputBasePath";
    public static final String PARAM_TOKENIZATION = "UseTokenization";
    public static final String PARAM_LEMMAS = "UseLemmas";
    public static final String PARAM_LOWERCASING = "DoLowercase";
    public static final String PARAM_MASK_ORGANISM_NAMES = "MaskOrganismNames";
    private static final Logger log = LoggerFactory.getLogger(TaxIdSentenceLabelWriter.class);
    @ConfigurationParameter(name="OutputBasePath", description="A base path to the output file containing the augmented document texts. The path will be extended by the name of the writing thread and the .txt file extension.")
    private String outputBasePath;
    @ConfigurationParameter(name="UseTokenization", mandatory=false, defaultValue={"false"}, description="When set to true, existing token annotations are used to write tokens delimited by a whitespace into the output file.")
    private boolean useTokenization;
    @ConfigurationParameter(name="UseLemmas", mandatory=false, defaultValue={"false"}, description="When set to true, existing token annotations with their lemmas set are used to write lemmas delimited by a whitespace into the output file.")
    private boolean useLemmas;
    @ConfigurationParameter(name="DoLowercase", mandatory=false, defaultValue={"false"}, description="When set to true, all output is lowercased.")
    private boolean doLowercase;
    @ConfigurationParameter(name="MaskOrganismNames", mandatory=false, defaultValue={"false"}, description="When set to true, the actual organism names in the text are replaced with a place holder.")
    private boolean maskOrganismNames;
    private Writer writer;

    @Override
    public void initialize(UimaContext aContext) throws ResourceInitializationException {
        super.initialize(aContext);
        this.outputBasePath = (String)aContext.getConfigParameterValue(PARAM_BASE_PATH);
        this.useTokenization = Optional.ofNullable((Boolean)aContext.getConfigParameterValue(PARAM_TOKENIZATION)).orElse(false);
        this.useLemmas = Optional.ofNullable((Boolean)aContext.getConfigParameterValue(PARAM_LEMMAS)).orElse(false);
        this.doLowercase = Optional.ofNullable((Boolean)aContext.getConfigParameterValue(PARAM_LOWERCASING)).orElse(false);
        this.maskOrganismNames = Optional.ofNullable((Boolean)aContext.getConfigParameterValue(PARAM_MASK_ORGANISM_NAMES)).orElse(false);
    }

    @Override
    public void process(JCas jCas) throws AnalysisEngineProcessException {
        StringBuilder sb = new StringBuilder();
        String linesep = System.getProperty("line.separator");
        JCoReOverlapAnnotationIndex<Organism> organismIndex = this.maskOrganismNames ? new JCoReOverlapAnnotationIndex<Organism>(jCas, Organism.type) : null;
        for (Sentence s2 : jCas.getAnnotationIndex(Sentence.type)) {
            JCoReAnnotationIndexMerger merger;
            try {
                LinkedHashSet<Integer> types = new LinkedHashSet<Integer>();
                types.add(Organism.type);
                types.add(Token.type);
                merger = new JCoReAnnotationIndexMerger(types, true, s2, jCas);
            }
            catch (ClassNotFoundException e) {
                log.error("Could not create annotation index merger", e);
                throw new AnalysisEngineProcessException(e);
            }
            if (this.useTokenization || this.useLemmas) {
                this.addTokenizedOutput(sb, merger, organismIndex);
            } else {
                this.addUntokenizedOutput(jCas, sb, s2);
            }
            if (sb.length() <= 1 || String.valueOf(sb.charAt(sb.length() - 1)).equals(linesep)) continue;
            sb.append(linesep);
        }
        try {
            if (this.writer == null) {
                File outputfile = new File(this.outputBasePath + Thread.currentThread().getName() + ".txt");
                if (!outputfile.getParentFile().exists()) {
                    outputfile.getParentFile().mkdirs();
                }
                this.writer = FileUtilities.getWriterToFile(outputfile);
            }
            this.writer.write(sb.toString());
        }
        catch (IOException e) {
            log.error("Could not write to {}", (Object)(this.outputBasePath + Thread.currentThread().getName() + ".txt"));
            throw new AnalysisEngineProcessException(e);
        }
    }

    public void addUntokenizedOutput(JCas jCas, StringBuilder sb, Sentence s2) {
        LinkedHashSet<String> ids = new LinkedHashSet<String>();
        for (Organism o : () -> jCas.getAnnotationIndex(Organism.type).subiterator(s2)) {
            FSArray resourceEntryList = o.getResourceEntryList();
            for (FeatureStructure fs : resourceEntryList) {
                ResourceEntry re = (ResourceEntry)fs;
                ids.add(re.getEntryId());
            }
        }
        for (String id : ids) {
            sb.append("__label__").append(id).append(" ");
        }
        if (!ids.isEmpty()) {
            sb.append(s2.getCoveredText());
        }
    }

    public void addTokenizedOutput(StringBuilder sb, JCoReAnnotationIndexMerger merger, JCoReOverlapAnnotationIndex<Organism> organismIndex) {
        LinkedHashSet<String> ids = new LinkedHashSet<String>();
        StringBuilder tokenSb = new StringBuilder();
        int skipTo = 0;
        while (merger.incrementAnnotation()) {
            AnnotationFS a = (AnnotationFS)((Object)merger.getAnnotation());
            if (a.getBegin() < skipTo) continue;
            if (a instanceof Token) {
                String output;
                if (this.maskOrganismNames && !organismIndex.search((Token)a).isEmpty()) {
                    output = "SPECIES";
                    skipTo = a.getEnd();
                } else {
                    output = this.doLowercase && !this.useLemmas ? a.getCoveredText().toLowerCase() : (this.doLowercase && this.useLemmas ? ((Token)a).getLemma().getValue().toLowerCase() : (!this.doLowercase && this.useLemmas ? ((Token)a).getLemma().getValue() : a.getCoveredText()));
                }
                tokenSb.append(output).append(" ");
            }
            if (!(a instanceof Organism)) continue;
            Organism o = (Organism)a;
            FSArray resourceEntryList = o.getResourceEntryList();
            for (FeatureStructure fs : resourceEntryList) {
                ResourceEntry re = (ResourceEntry)fs;
                ids.add(re.getEntryId());
            }
        }
        if (tokenSb.length() > 0) {
            tokenSb.deleteCharAt(tokenSb.length() - 1);
        }
        for (String id : ids) {
            sb.append("__label__").append(id).append(" ");
        }
        if (!ids.isEmpty()) {
            sb.append((CharSequence)tokenSb);
        }
    }

    @Override
    public void collectionProcessComplete() throws AnalysisEngineProcessException {
        try {
            this.writer.close();
        }
        catch (IOException e) {
            log.error("Could not close the writer", e);
            throw new AnalysisEngineProcessException(e);
        }
    }
}

