package org.apache.ctakes.temporal.ae;

import com.google.common.base.Charsets;
import com.google.common.collect.Maps;
import com.google.common.io.Resources;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.ctakes.temporal.ae.feature.TimeWordTypeExtractor;
import org.apache.ctakes.temporal.duration.PreserveUMLSEventTimeRelationsInGold;
import org.apache.ctakes.typesystem.type.syntax.BaseToken;
import org.apache.ctakes.typesystem.type.syntax.TopTreebankNode;
import org.apache.ctakes.typesystem.type.syntax.TreebankNode;
import org.apache.ctakes.typesystem.type.textsem.TimeMention;
import org.apache.ctakes.typesystem.type.textspan.Segment;
import org.apache.log4j.Logger;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.CASException;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;
import org.cleartk.classifier.CleartkAnnotator;
import org.cleartk.classifier.DataWriter;
import org.cleartk.classifier.Feature;
import org.cleartk.classifier.Instance;
import org.cleartk.classifier.ScoredOutcome;
import org.cleartk.classifier.feature.extractor.CleartkExtractor;
import org.cleartk.classifier.feature.extractor.simple.CharacterCategoryPatternExtractor;
import org.cleartk.classifier.feature.extractor.simple.CombinedExtractor;
import org.cleartk.classifier.feature.extractor.simple.CoveredTextExtractor;
import org.cleartk.classifier.feature.extractor.simple.SimpleFeatureExtractor;
import org.cleartk.classifier.feature.extractor.simple.TypePathExtractor;
import org.cleartk.classifier.jar.DefaultDataWriterFactory;
import org.cleartk.classifier.jar.DirectoryDataWriterFactory;
import org.cleartk.classifier.jar.GenericJarClassifierFactory;
import org.cleartk.timeml.util.TimeWordsExtractor;
import org.uimafit.descriptor.ConfigurationParameter;
import org.uimafit.factory.AnalysisEngineFactory;
import org.uimafit.util.JCasUtil;

/* loaded from: input_file:org/apache/ctakes/temporal/ae/ConstituencyBasedTimeAnnotator.class */
public class ConstituencyBasedTimeAnnotator extends TemporalEntityAnnotator_ImplBase {
    private static final String NON_MENTION = "NON_TIME_MENTION";
    private static final String MENTION = "TIME_MENTION";
    private static Logger logger = Logger.getLogger(ConstituencyBasedTimeAnnotator.class);
    private static final int SPAN_LIMIT = 12;
    public static final String PARAM_TIMEX_VIEW = "TimexView";

    @ConfigurationParameter(name = "TimexView", mandatory = false, description = "View to write timexes to (used for ensemble methods)")
    protected String timexView = PreserveUMLSEventTimeRelationsInGold.SYSTEM_VIEW_NAME;
    protected List<SimpleFeatureExtractor> featureExtractors;
    protected SimpleFeatureExtractor wordTypeExtractor;
    private static final String LOOKUP_PATH = "/org/apache/ctakes/temporal/time_word_types.txt";
    private Map<String, String> wordTypes;

    public static AnalysisEngineDescription createDataWriterDescription(Class<? extends DataWriter<String>> cls, File file) throws ResourceInitializationException {
        return AnalysisEngineFactory.createPrimitiveDescription(ConstituencyBasedTimeAnnotator.class, new Object[]{CleartkAnnotator.PARAM_IS_TRAINING, true, DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME, cls, DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY, file});
    }

    public static AnalysisEngineDescription createAnnotatorDescription(String str) throws ResourceInitializationException {
        return AnalysisEngineFactory.createPrimitiveDescription(ConstituencyBasedTimeAnnotator.class, new Object[]{CleartkAnnotator.PARAM_IS_TRAINING, false, GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH, str});
    }

    public static AnalysisEngineDescription createEnsembleDescription(String str, String str2) throws ResourceInitializationException {
        return AnalysisEngineFactory.createPrimitiveDescription(ConstituencyBasedTimeAnnotator.class, new Object[]{CleartkAnnotator.PARAM_IS_TRAINING, false, "TimexView", str2, GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH, str});
    }

    public static AnalysisEngineDescription createAnnotatorDescription(File file) throws ResourceInitializationException {
        return AnalysisEngineFactory.createPrimitiveDescription(ConstituencyBasedTimeAnnotator.class, new Object[]{CleartkAnnotator.PARAM_IS_TRAINING, false, GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH, new File(file, "model.jar")});
    }

    public static AnalysisEngineDescription createEnsembleDescription(File file, String str) throws ResourceInitializationException {
        return AnalysisEngineFactory.createPrimitiveDescription(ConstituencyBasedTimeAnnotator.class, new Object[]{CleartkAnnotator.PARAM_IS_TRAINING, false, "TimexView", str, GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH, new File(file, "model.jar")});
    }

    public void initialize(UimaContext uimaContext) throws ResourceInitializationException {
        super.initialize(uimaContext);
        SimpleFeatureExtractor combinedExtractor = new CombinedExtractor(new SimpleFeatureExtractor[]{new CharacterCategoryPatternExtractor(CharacterCategoryPatternExtractor.PatternType.REPEATS_MERGED), new CharacterCategoryPatternExtractor(CharacterCategoryPatternExtractor.PatternType.ONE_PER_CHAR)});
        this.wordTypes = Maps.newHashMap();
        try {
            for (String str : Resources.readLines(TimeWordsExtractor.class.getResource(LOOKUP_PATH), Charsets.US_ASCII)) {
                String[] split = str.split("\\s+");
                if (split.length != 2) {
                    throw new IllegalArgumentException("Expected '<type> <word>', found: " + str);
                }
                this.wordTypes.put(split[1], split[0]);
            }
            CombinedExtractor combinedExtractor2 = new CombinedExtractor(new SimpleFeatureExtractor[]{new CoveredTextExtractor(), combinedExtractor, new TypePathExtractor(BaseToken.class, "partOfSpeech")});
            this.featureExtractors = new ArrayList();
            this.featureExtractors.add(new CleartkExtractor(BaseToken.class, combinedExtractor2, new CleartkExtractor.Context[]{new CleartkExtractor.Bag(new CleartkExtractor.Context[]{new CleartkExtractor.Covered()})}));
            this.wordTypeExtractor = new CleartkExtractor(BaseToken.class, new TimeWordTypeExtractor(), new CleartkExtractor.Context[]{new CleartkExtractor.Bag(new CleartkExtractor.Context[]{new CleartkExtractor.Covered()})});
        } catch (IOException e) {
            throw new ResourceInitializationException(e);
        }
    }

    @Override // org.apache.ctakes.temporal.ae.TemporalEntityAnnotator_ImplBase
    public void process(JCas jCas, Segment segment) throws AnalysisEngineProcessException {
        HashSet hashSet = new HashSet(JCasUtil.selectCovered(TimeMention.class, segment));
        Iterator it = JCasUtil.selectCovered(TopTreebankNode.class, segment).iterator();
        while (it.hasNext()) {
            recursivelyProcessNode(jCas, ((TopTreebankNode) it.next()).getChildren(0), hashSet, 0.0d);
        }
    }

    private double recursivelyProcessNode(JCas jCas, TreebankNode treebankNode, Set<TimeMention> set, double d) throws AnalysisEngineProcessException {
        double d2 = 0.0d;
        ArrayList arrayList = new ArrayList();
        String str = NON_MENTION;
        if (treebankNode.getParent().getParent() == null) {
            arrayList.add(new Feature("IS_ROOT"));
        }
        arrayList.add(new Feature("NODE_LABEL", treebankNode.getNodeType()));
        arrayList.add(new Feature("PARENT_LABEL", treebankNode.getParent().getNodeType()));
        int size = JCasUtil.selectCovered(BaseToken.class, treebankNode).size();
        if (treebankNode.getLeaf()) {
            arrayList.add(new Feature("IS_LEAF"));
            arrayList.addAll(this.wordTypeExtractor.extract(jCas, treebankNode));
        } else {
            StringBuilder sb = new StringBuilder();
            for (int i = 0; i < treebankNode.getChildren().size(); i++) {
                sb.append(treebankNode.getChildren(i).getNodeType());
                sb.append("_");
                arrayList.add(new Feature("CHILD_BAG", treebankNode.getChildren(i).getNodeType()));
            }
            arrayList.add(new Feature("PRODUCTION", sb.toString()));
        }
        Iterator<SimpleFeatureExtractor> it = this.featureExtractors.iterator();
        while (it.hasNext()) {
            arrayList.addAll(it.next().extract(jCas, treebankNode));
        }
        if (isTraining()) {
            for (TimeMention timeMention : JCasUtil.selectCovered(TimeMention.class, treebankNode)) {
                if (timeMention.getBegin() == treebankNode.getBegin() && timeMention.getEnd() == treebankNode.getEnd()) {
                    str = MENTION;
                    d2 = 1.0d;
                    set.remove(timeMention);
                    if (treebankNode.getCoveredText().contains("postoperative")) {
                        System.out.println("*** Positive Example: ***");
                        System.out.println("*** Parent: " + treebankNode.getParent().getCoveredText());
                        printFeatures(treebankNode, arrayList);
                    }
                }
            }
            if (size < SPAN_LIMIT) {
                this.dataWriter.write(new Instance(str, arrayList));
            }
        } else {
            d2 = ((ScoredOutcome) this.classifier.score(arrayList, 1).get(0)).getScore();
            str = (String) this.classifier.classify(arrayList);
            if (str.equals(MENTION)) {
                try {
                    TimeMention timeMention2 = new TimeMention(jCas.getView(this.timexView), treebankNode.getBegin(), treebankNode.getEnd());
                    timeMention2.setConfidence((float) d2);
                    timeMention2.addToIndexes();
                } catch (CASException e) {
                    throw new AnalysisEngineProcessException(e);
                }
            } else {
                d2 = 1.0d - d2;
            }
        }
        if (treebankNode.getLeaf() || MENTION.equals(str)) {
            return d2;
        }
        double d3 = 0.5d;
        TreebankNode treebankNode2 = null;
        for (int i2 = 0; i2 < treebankNode.getChildren().size(); i2++) {
            TreebankNode children = treebankNode.getChildren(i2);
            double recursivelyProcessNode = recursivelyProcessNode(jCas, children, set, Math.max(d2, 0.0d));
            if (recursivelyProcessNode > d3) {
                treebankNode2 = children;
                d3 = recursivelyProcessNode;
            }
        }
        if (!isTraining() && MENTION.equals(str)) {
            Logger logger2 = logger;
            Object[] objArr = new Object[6];
            objArr[0] = treebankNode.getCoveredText();
            objArr[1] = Double.valueOf(d2);
            objArr[2] = treebankNode.getParent().getCoveredText();
            objArr[3] = Double.valueOf(0.0d);
            objArr[4] = treebankNode2 == null ? "(none)" : treebankNode2.getCoveredText();
            objArr[5] = Double.valueOf(d3);
            logger2.info(String.format("\nFound mention (%s) with score %f\n\tParent (%s) : %f\n\tBest child (%s) : %f\n", objArr));
        }
        return d2;
    }

    private static void printFeatures(TreebankNode treebankNode, List<Feature> list) {
        System.out.println(treebankNode.getCoveredText());
        for (Feature feature : list) {
            System.out.printf("%s => %s\n", feature.getName(), feature.getValue());
        }
    }
}
