/*
 * Decompiled with CFR 0.152.
 */
package de.julielab.jcore.ae.lingscope;

import de.julielab.java.utilities.FileUtilities;
import de.julielab.jcore.ae.lingscope.LikelihoodUtils;
import de.julielab.jcore.types.Lemma;
import de.julielab.jcore.types.LikelihoodIndicator;
import de.julielab.jcore.types.POSTag;
import de.julielab.jcore.types.Scope;
import de.julielab.jcore.types.Sentence;
import de.julielab.jcore.types.Token;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.function.Supplier;
import java.util.stream.Collectors;
import lingscope.algorithms.Annotator;
import lingscope.drivers.CueAndPosFilesMerger;
import lingscope.drivers.SentenceTagger;
import lingscope.structures.AnnotatedSentence;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.FSIterator;
import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.cas.text.AnnotationIndex;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.descriptor.ResourceMetaData;
import org.apache.uima.fit.descriptor.TypeCapability;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.tcas.Annotation;
import org.apache.uima.resource.ResourceInitializationException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

@ResourceMetaData(name="JCoRe Lingscope AE", description="This component uses the Lingscope negation/hedge detection algorithm and models to annotate negation/hedge cues and the scope to which the cues apply.")
@TypeCapability(inputs={"de.julielab.jcore.types.Token", "de.julielab.jcore.types.PennBioIEPOSTag"}, outputs={"de.julielab.jcore.types.LikelihoodIndicator", "de.julielab.jcore.types.Scope"})
public class LingscopePosAnnotator
extends JCasAnnotator_ImplBase {
    public static final String PARAM_CUE_MODEL = "CueModel";
    public static final String PARAM_SCOPE_MODEL = "ScopeModel";
    public static final String PARAM_LIKELIHOOD_DICT_PATH = "LikelihoodDict";
    public static final String PARAM_IS_NEGATION_ANNOTATOR = "IsNegationAnnotator";
    private static final Logger log = LoggerFactory.getLogger(LingscopePosAnnotator.class);
    private Annotator cueAnnotator;
    private Annotator scopeAnnotator;
    private Map<String, String> likelihoodDict = new HashMap<String, String>();
    @ConfigurationParameter(name="CueModel", description="The model that is used to recognize the negation or hedge cue words in text. There are different models for negation and hedge detection in Lingscope, indicated by the directory names 'negation_models' and 'hedge_models' in the respective downloads from the Lingscope SourceForge page. The cue detection models are always those where the string 'cue' follows the 'baseline' or 'crf' string in the filename. Thus, all 'baseline_cue_*' and 'crf_cue_*' files are cue identification models. The 'crf_scope_cue_*' models, in contrast, are scope detection models that replace the cue words by the string CUE.")
    private String cueModelLocation;
    @ConfigurationParameter(name="ScopeModel", description="The model that is used to detect the scope of a previously found negation or hedge cue word. There are different models for negation and hedge detection in Lingscope, indicated by the directory names 'negation_models' and 'hedge_models' in the respective downloads from the Lingscope SourceForge page. The cue detection models are always those where the string 'cue' follows the 'baseline' or 'crf' string in the filename. Thus, all 'baseline_cue_*' and 'crf_cue_*' files are cue identification models. The 'crf_scope_cue_*' models, in contrast, are scope detection models that replace the cue words by the string CUE.")
    private String scopeModelLocation;
    @ConfigurationParameter(name="LikelihoodDict", mandatory=false, description="String parameter indicating path to likelihood dictionary (One entry per line; Entries consist of tab-separated lemmatized likelihood indicators and assigned likelihood category). The dictionary passed here is only used to assign likelihood scores (low, medium, high) to negation and hedge cues. It is not used to detect the cues in the first place.")
    private String likelihoodDictFile;
    @ConfigurationParameter(name="IsNegationAnnotator", mandatory=false, defaultValue={"false"}, description="If set to true, the recognized cue words will all be assigned the 'negation' likelihood, even if the model used is a hedge model.")
    private boolean isNegationAnnotator;
    private boolean replaceCue;

    public void initialize(UimaContext aContext) throws ResourceInitializationException {
        this.cueModelLocation = (String)aContext.getConfigParameterValue(PARAM_CUE_MODEL);
        this.scopeModelLocation = (String)aContext.getConfigParameterValue(PARAM_SCOPE_MODEL);
        Optional.ofNullable((String)aContext.getConfigParameterValue(PARAM_LIKELIHOOD_DICT_PATH)).ifPresent(path -> LikelihoodUtils.loadLikelihoodDict(path, this.likelihoodDict));
        Optional.ofNullable((Boolean)aContext.getConfigParameterValue(PARAM_IS_NEGATION_ANNOTATOR)).ifPresent(b -> {
            this.isNegationAnnotator = b;
        });
        File cueModelFile = new File(this.cueModelLocation);
        String cueModelType = cueModelFile.getName().startsWith("baseline") ? "baseline" : (cueModelFile.getName().startsWith("crf") ? "crf" : "negex");
        log.info("Inferred the cue detection type '{}' from the cue model file '{}'", (Object)cueModelType, (Object)this.cueModelLocation);
        this.replaceCue = !this.scopeModelLocation.contains("words");
        log.info("Inferred the strategy as to whether to replace found cue words with the CUE string or not from the scope model file '{}' to: Replace: {}", (Object)this.scopeModelLocation, (Object)this.replaceCue);
        String scopeModelType = "crf";
        try {
            this.cueAnnotator = SentenceTagger.getAnnotator(cueModelType, "cue");
            this.cueAnnotator.loadAnnotator(FileUtilities.findResource((String)this.cueModelLocation));
            this.scopeAnnotator = SentenceTagger.getAnnotator(scopeModelType, "scope");
            this.scopeAnnotator.loadAnnotator(FileUtilities.findResource((String)this.scopeModelLocation));
        }
        catch (IOException e) {
            log.error("Could not initialize Lingscope annotators", (Throwable)e);
            throw new ResourceInitializationException((Throwable)e);
        }
    }

    public void process(JCas aJCas) throws AnalysisEngineProcessException {
        AnnotationIndex tokIt = aJCas.getAnnotationIndex(Token.type);
        for (Annotation sent : aJCas.getAnnotationIndex(Sentence.type)) {
            FSIterator subiterator = tokIt.subiterator((AnnotationFS)sent);
            StringBuilder sb = new StringBuilder();
            ArrayList<Token> tokens = new ArrayList<Token>();
            while (subiterator.hasNext()) {
                Token token = (Token)subiterator.next();
                POSTag posTag = token.getPosTag(0);
                if (posTag == null) {
                    throw new AnalysisEngineProcessException((Throwable)new IllegalArgumentException("PoS tags are required but the current token has none."));
                }
                sb.append(posTag.getValue()).append(" ");
                tokens.add(token);
            }
            if (sb.length() <= 0) continue;
            sb.deleteCharAt(sb.length() - 1);
            String posSentence = sb.toString();
            AnnotatedSentence cueTaggedSentence = null;
            AnnotatedSentence posCueMerged = null;
            AnnotatedSentence scopeMarkedSentence = null;
            try {
                cueTaggedSentence = this.cueAnnotator.annotateSentence(tokens.stream().map(Annotation::getCoveredText).collect(Collectors.joining(" ")).replace("|", "/"), true);
                posCueMerged = CueAndPosFilesMerger.merge(cueTaggedSentence, posSentence, this.replaceCue);
                scopeMarkedSentence = this.scopeAnnotator.annotateSentence(posCueMerged.getSentenceText(), true);
                List<LikelihoodIndicator> likelihoodIndicators = this.addAnnotationToCas(tokens, cueTaggedSentence, () -> new LikelihoodIndicator(aJCas));
                List<Scope> scopes = this.addAnnotationToCas(tokens, scopeMarkedSentence, () -> new Scope(aJCas));
                if (likelihoodIndicators.size() == scopes.size()) {
                    for (int i = 0; i < scopes.size(); ++i) {
                        LikelihoodIndicator indicator = likelihoodIndicators.get(i);
                        Scope scope = scopes.get(i);
                        scope.setCue((de.julielab.jcore.types.Annotation)indicator);
                    }
                    continue;
                }
                log.debug("Not assigning negation or hedge cues to their scopes because the number of cues and scopes differs.");
                log.trace("The respective sentence is: '{}'. Cue tags: '{}', Scope tags: '{}'", new Object[]{sent.getCoveredText(), cueTaggedSentence.getTags(), scopeMarkedSentence.getTags()});
            }
            catch (Throwable t) {
                log.error("Lingscope error in sentence '{}'", (Object)sent.getCoveredText(), (Object)t);
                log.error("PosCueMerged Sent Text: {}", (Object)(posCueMerged != null ? posCueMerged.getSentenceText() : "<null>"));
                log.error("Tokens: {}", (Object)tokens.stream().map(Annotation::getCoveredText).collect(Collectors.joining(" ")));
                log.error("Lemmas: {}", (Object)tokens.stream().map(Token::getLemma).map(Lemma::getValue).collect(Collectors.joining(" ")));
                log.error("PoS: {}", (Object)posSentence);
                log.error("Cue tags: {}", cueTaggedSentence != null ? cueTaggedSentence.getTags() : "<null>");
                log.error("POS Cue merged: {}", posCueMerged != null ? posCueMerged.getTags() : "<null>");
                log.error("Scope tags: {}", scopeMarkedSentence != null ? scopeMarkedSentence.getTags() : "<null>");
                log.error("StackTrace:", t);
                throw t;
            }
        }
    }

    private <T extends Annotation> List<T> addAnnotationToCas(List<Token> tokens, AnnotatedSentence taggedSentence, Supplier<T> annotationSupplier) throws AnalysisEngineProcessException {
        ArrayList allIndicators = new ArrayList();
        Annotation annotation = null;
        ArrayList<Token> tokensInCurrentExpression = new ArrayList<Token>();
        for (int i = 0; i < tokens.size(); ++i) {
            Token token = tokens.get(i);
            String tag = taggedSentence.getTags().get(i);
            if (tag.startsWith("B")) {
                if (annotation != null) {
                    this.endAnnotation(tokens, allIndicators, annotation, tokensInCurrentExpression, i);
                }
                annotation = (Annotation)annotationSupplier.get();
                annotation.setBegin(token.getBegin());
                tokensInCurrentExpression.add(token);
            }
            if (tag.startsWith("I")) {
                tokensInCurrentExpression.add(token);
            }
            if (!tag.equals("O")) continue;
            if (annotation != null) {
                this.endAnnotation(tokens, allIndicators, annotation, tokensInCurrentExpression, i);
            }
            annotation = null;
        }
        return allIndicators;
    }

    private <T extends Annotation> void endAnnotation(List<Token> tokens, List<T> allIndicators, T annotation, List<Token> tokensInCurrentExpression, int i) throws AnalysisEngineProcessException {
        annotation.setEnd(tokens.get(i - 1).getEnd());
        if (annotation instanceof LikelihoodIndicator) {
            if (!this.likelihoodDict.isEmpty() && !this.isNegationAnnotator) {
                StringBuilder lemmaExpressionBuilder = new StringBuilder();
                for (Token t : tokensInCurrentExpression) {
                    Lemma lemma = t.getLemma();
                    if (lemma == null) {
                        throw new AnalysisEngineProcessException((Throwable)new IllegalArgumentException("Lemmas are required when a likelihood dictionary is passed but the current token has none."));
                    }
                    String lemmaValue = lemma.getValue();
                    lemmaExpressionBuilder.append(lemmaValue.toLowerCase()).append(" ");
                }
                tokensInCurrentExpression.clear();
                lemmaExpressionBuilder.deleteCharAt(lemmaExpressionBuilder.length() - 1);
                String lemmatizedLikelihoodExpression = lemmaExpressionBuilder.toString();
                String likelihoodScore = this.likelihoodDict.get(lemmatizedLikelihoodExpression);
                if (likelihoodScore != null) {
                    LikelihoodIndicator indicator = (LikelihoodIndicator)annotation;
                    indicator.setLikelihood(likelihoodScore);
                }
            } else if (this.isNegationAnnotator) {
                LikelihoodIndicator indicator = (LikelihoodIndicator)annotation;
                indicator.setLikelihood("negation");
            }
        }
        annotation.addToIndexes();
        allIndicators.add(annotation);
    }
}

