/*
 * Decompiled with CFR 0.152.
 */
package de.julielab.genemapper.uima;

import de.julielab.java.utilities.FileUtilities;
import de.julielab.jcore.types.Organism;
import de.julielab.jcore.types.ResourceEntry;
import de.julielab.jcore.types.Token;
import de.julielab.jcore.utility.JCoReAnnotationIndexMerger;
import java.io.File;
import java.io.IOException;
import java.io.Writer;
import java.util.Arrays;
import java.util.LinkedHashSet;
import java.util.Optional;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.FeatureStructure;
import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.cas.text.AnnotationIndex;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.descriptor.ResourceMetaData;
import org.apache.uima.fit.descriptor.TypeCapability;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.cas.FSArray;
import org.apache.uima.resource.ResourceInitializationException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

@ResourceMetaData(name="JCoRe TaxId-Integrated Text Writer", description="This component creates a version of the CAS document text where after each Organism annotation, the NCBI Taxonomy ID of the organism is added to the text with the 'taxid' prefix (e.g. taxid10090).")
@TypeCapability(inputs={"de.julielab.jcore.types.Organism", "de.julielab.jcore.types.Token", "de.julielab.jcore.types.Sentence"})
public class TaxIdIntegratedTextWriter
extends JCasAnnotator_ImplBase {
    public static final String PARAM_BASE_PATH = "OutputBasePath";
    public static final String PARAM_MODE = "TaxIdInsertionMode";
    public static final String PARAM_TOKENIZATION = "UseTokenization";
    public static final String PARAM_LEMMAS = "UseLemmas";
    public static final String PARAM_LOWERCASING = "DoLowercase";
    private static final Logger log = LoggerFactory.getLogger(TaxIdIntegratedTextWriter.class);
    @ConfigurationParameter(name="OutputBasePath", description="A base path to the output file containing the augmented document texts. The path will be extended by the name of the writing thread and the .txt file extension.")
    private String outputBasePath;
    @ConfigurationParameter(name="TaxIdInsertionMode", description="One of 'mask' and 'add'. Masking will cause the taxonomy ID to replace the organism mention while addition will add the ID to the text directly following the mention.")
    private String mode;
    @ConfigurationParameter(name="UseTokenization", mandatory=false, defaultValue={"false"}, description="When set to true, existing token annotations are used to write tokens delimited by a whitespace into the output file.")
    private boolean useTokenization;
    @ConfigurationParameter(name="UseLemmas", mandatory=false, defaultValue={"false"}, description="When set to true, existing token annotations with their lemmas set are used to write lemmas delimited by a whitespace into the output file.")
    private boolean useLemmas;
    @ConfigurationParameter(name="DoLowercase", mandatory=false, defaultValue={"false"}, description="When set to true, all output is lowercased.")
    private boolean doLowercase;
    private Writer writer;

    public void initialize(UimaContext aContext) throws ResourceInitializationException {
        super.initialize(aContext);
        this.outputBasePath = (String)aContext.getConfigParameterValue(PARAM_BASE_PATH);
        this.mode = ((String)aContext.getConfigParameterValue(PARAM_MODE)).toLowerCase();
        this.useTokenization = Optional.ofNullable((Boolean)aContext.getConfigParameterValue(PARAM_TOKENIZATION)).orElse(false);
        this.useLemmas = Optional.ofNullable((Boolean)aContext.getConfigParameterValue(PARAM_LEMMAS)).orElse(false);
        this.doLowercase = Optional.ofNullable((Boolean)aContext.getConfigParameterValue(PARAM_LOWERCASING)).orElse(false);
        if (!this.mode.equals("mask") && !this.mode.equals("add")) {
            String msg = String.format("Illegal value for parameter %s: %s. Allowed values: 'mask' and 'add'.", PARAM_MODE, this.mode);
            log.error(msg);
            throw new ResourceInitializationException((Throwable)new IllegalArgumentException(msg));
        }
        log.info("{}: {}", (Object)PARAM_BASE_PATH, (Object)this.outputBasePath);
        log.info("{}: {}", (Object)PARAM_MODE, (Object)this.mode);
        log.info("{}: {}", (Object)PARAM_TOKENIZATION, (Object)this.useTokenization);
        log.info("{}: {}", (Object)PARAM_LEMMAS, (Object)this.useLemmas);
        log.info("{}: {}", (Object)PARAM_LOWERCASING, (Object)this.doLowercase);
    }

    public void process(JCas jCas) throws AnalysisEngineProcessException {
        String docText = jCas.getDocumentText();
        if (this.doLowercase) {
            docText = docText.toLowerCase();
        }
        StringBuilder sb = new StringBuilder();
        if (this.useTokenization || this.useLemmas) {
            this.writeTokenizedText(sb, jCas);
        } else {
            this.writeUntokenizedText(jCas, docText, sb);
        }
        try {
            if (this.writer == null) {
                File outputfile = new File(this.outputBasePath + Thread.currentThread().getName() + ".txt");
                if (!outputfile.getParentFile().exists()) {
                    outputfile.getParentFile().mkdirs();
                }
                this.writer = FileUtilities.getWriterToFile((File)outputfile);
            }
            this.writer.write(sb.toString());
        }
        catch (IOException e) {
            log.error("Could not write to {}", (Object)(this.outputBasePath + Thread.currentThread().getName() + ".txt"));
            throw new AnalysisEngineProcessException((Throwable)e);
        }
    }

    private void writeTokenizedText(StringBuilder sb, JCas jCas) {
        try {
            JCoReAnnotationIndexMerger merger = new JCoReAnnotationIndexMerger(new LinkedHashSet<Integer>(Arrays.asList(Organism.type, Token.type)), true, null, jCas);
            int currentEnd = 0;
            while (merger.incrementAnnotation()) {
                AnnotationFS a = (AnnotationFS)merger.getAnnotation();
                if (a.getBegin() < currentEnd) continue;
                Object output = null;
                boolean isOrganism = a instanceof Organism;
                if (isOrganism) {
                    Organism o = (Organism)a;
                    FSArray resourceEntryList = o.getResourceEntryList();
                    if (resourceEntryList != null) {
                        for (FeatureStructure fs : resourceEntryList) {
                            ResourceEntry re = (ResourceEntry)fs;
                            output = "taxid" + re.getEntryId();
                        }
                    }
                } else {
                    Token t = (Token)a;
                    Object object = output = this.useLemmas ? t.getLemma().getValue() : t.getCoveredText();
                    if (this.doLowercase) {
                        output = ((String)output).toLowerCase();
                    }
                }
                if (!merger.firstToken()) {
                    sb.append(" ");
                }
                if (isOrganism && this.mode.equals("add")) {
                    sb.append(a.getCoveredText()).append(" ");
                }
                sb.append((String)output);
                currentEnd = a.getEnd();
            }
        }
        catch (ClassNotFoundException e) {
            e.printStackTrace();
        }
    }

    public void writeUntokenizedText(JCas jCas, String docText, StringBuilder sb) {
        AnnotationIndex organismIndex = jCas.getAnnotationIndex(Organism.type);
        int lastend = 0;
        for (Organism o : organismIndex) {
            FSArray resourceEntryList;
            if (o.getEnd() <= lastend || (resourceEntryList = o.getResourceEntryList()) == null) continue;
            if (this.mode.equals("add")) {
                sb.append(docText, lastend, o.getEnd());
            }
            if (this.mode.equals("mask")) {
                sb.append(docText, lastend, Math.max(lastend, o.getBegin()));
            }
            lastend = o.getEnd();
            for (FeatureStructure fs : resourceEntryList) {
                ResourceEntry re = (ResourceEntry)fs;
                if (this.mode.equals("add")) {
                    sb.append(" ");
                }
                sb.append("taxid").append(re.getEntryId());
                if (!this.mode.equals("add") || lastend >= docText.length() || Character.isWhitespace(docText.charAt(lastend))) continue;
                sb.append(" ");
            }
        }
        sb.append(docText, lastend, docText.length());
    }

    public void collectionProcessComplete() throws AnalysisEngineProcessException {
        try {
            this.writer.close();
        }
        catch (IOException e) {
            log.error("Could not close the writer", (Throwable)e);
            throw new AnalysisEngineProcessException((Throwable)e);
        }
    }
}

