/*
 * Decompiled with CFR 0.152.
 */
package de.julielab.jcore.ae.jtbd.main;

import de.julielab.jcore.types.Sentence;
import de.julielab.jcore.types.Token;
import de.julielab.jtbd.EOSSymbols;
import de.julielab.jtbd.Tokenizer;
import de.julielab.jtbd.Unit;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.JFSIndexRepository;
import org.apache.uima.resource.ResourceInitializationException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class TokenAnnotator
extends JCasAnnotator_ImplBase {
    private static final Logger LOGGER = LoggerFactory.getLogger(TokenAnnotator.class);
    public static final String PARAM_MODEL = "ModelFilename";
    private static final String COMPONENT_ID = "JULIE Token Boundary Detector";
    public static final String USE_DOC_TEXT_PARAM = "UseDocText";
    private Tokenizer tokenizer;
    @ConfigurationParameter(name="UseDocText", defaultValue={"false"})
    private static boolean useCompleteDocText = false;
    private int tokenNumber;
    @ConfigurationParameter(name="ModelFilename", mandatory=true, description="Path to the tokenizer model.")
    private String modelFilename;

    private void createToken(JCas jcas, int begin, int end) {
        Token annotation = new Token(jcas);
        annotation.setBegin(begin);
        annotation.setEnd(end);
        annotation.setId("" + this.tokenNumber);
        annotation.setComponentId(COMPONENT_ID);
        annotation.addToIndexes();
        LOGGER.debug("createToken() - created token: " + jcas.getDocumentText().substring(begin, end) + " " + begin + " - " + end);
        ++this.tokenNumber;
    }

    public void initialize(UimaContext aContext) throws ResourceInitializationException {
        LOGGER.info("[JTBD] initializing JTBD Annotator ...");
        super.initialize(aContext);
        this.tokenizer = new Tokenizer();
        InputStream is = null;
        try {
            try {
                this.modelFilename = (String)aContext.getConfigParameterValue(PARAM_MODEL);
                try {
                    is = new FileInputStream(this.modelFilename);
                }
                catch (IOException e) {
                    LOGGER.debug("File \"{}\" does not exist. Searching for the model as a classpath resource.", (Object)this.modelFilename);
                    is = ((Object)((Object)this)).getClass().getResourceAsStream(this.modelFilename.startsWith("/") ? this.modelFilename : "/" + this.modelFilename);
                    if (is == null) {
                        throw new IllegalArgumentException("The model file \"" + this.modelFilename + "\" could be found neither in the file system nor in the classpath.");
                    }
                    LOGGER.info("Loading model as classpathresource");
                }
                this.tokenizer.readModel(is);
            }
            catch (Exception e) {
                throw new ResourceInitializationException((Throwable)e);
            }
        }
        finally {
            if (is != null) {
                try {
                    is.close();
                }
                catch (IOException e) {
                    e.printStackTrace();
                }
            }
        }
        Object useDocTextParam = aContext.getConfigParameterValue(USE_DOC_TEXT_PARAM);
        if (useDocTextParam != null) {
            useCompleteDocText = (Boolean)useDocTextParam;
        }
        if (useCompleteDocText) {
            LOGGER.info("initialize() - whole documentText is tokenized");
        } else {
            LOGGER.info("initialize() - will tokenize only text covered by sentence annotations");
        }
    }

    public void process(JCas aJCas) throws AnalysisEngineProcessException {
        LOGGER.debug("process() - starting processing document");
        this.tokenNumber = 1;
        if (useCompleteDocText) {
            LOGGER.debug("process() - tokenizing whole document text!");
            String text = aJCas.getDocumentText();
            this.writeTokensToCAS(text, 0, aJCas);
        } else {
            JFSIndexRepository indexes = aJCas.getJFSIndexRepository();
            for (Sentence sentence : indexes.getAnnotationIndex(Sentence.type)) {
                LOGGER.debug("process() - going to next sentence having length: " + (sentence.getEnd() - sentence.getBegin()));
                String text = sentence.getCoveredText();
                this.writeTokensToCAS(text, sentence.getBegin(), aJCas);
            }
        }
    }

    private void writeTokensToCAS(String text, int offset, JCas aJCas) throws AnalysisEngineProcessException {
        if (text == null || text.isEmpty()) {
            LOGGER.debug("writeTokensToCAS() - input for JTBD tokenizer is null or empty!");
        } else {
            Character lastChar;
            int end;
            if (text.length() > 1 || !EOSSymbols.contains(Character.valueOf(text.charAt(text.length() - 1)))) {
                LOGGER.debug("writeTokensToCAS() - tokenizing input: " + text);
                ArrayList<Unit> units = this.tokenizer.predict(text);
                LOGGER.debug("+++predition done!++++");
                if (units == null || units.size() == 0) {
                    LOGGER.error("writeTokensToCAS() - no units found by JTBD for: " + text);
                    throw new AnalysisEngineProcessException();
                }
                int begin = 0;
                end = 0;
                boolean startNewToken = true;
                for (Unit unit : units) {
                    if (startNewToken) {
                        begin = unit.begin + offset;
                    }
                    end = unit.end + offset;
                    if (unit.label.equals("N")) {
                        startNewToken = false;
                        continue;
                    }
                    if (unit.label.equals("P")) {
                        this.createToken(aJCas, begin, end);
                        startNewToken = true;
                        continue;
                    }
                    LOGGER.error("writeTokensToCAS() - found unit label '" + unit.label + "' (only 'N' and 'P' are allowed");
                    throw new AnalysisEngineProcessException();
                }
                if (!startNewToken) {
                    this.createToken(aJCas, begin, end);
                    LOGGER.debug("writeTokensToCAS() - found terminal unit with label 'N' (expected 'P'). Check behaviour of JTBD! Token text: " + aJCas.getDocumentText().subSequence(begin, end));
                }
            }
            if (EOSSymbols.contains(lastChar = Character.valueOf(text.charAt(text.length() - 1)))) {
                int start = offset + text.length() - 1;
                end = offset + text.length();
                this.createToken(aJCas, start, end);
            }
        }
    }
}

