package org.apache.ctakes.temporal.ae;

import java.io.File;
import java.io.IOException;
import java.net.URI;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.apache.ctakes.temporal.ae.feature.ParseSpanFeatureExtractor;
import org.apache.ctakes.temporal.ae.feature.TimeWordTypeExtractor;
import org.apache.ctakes.temporal.ae.feature.selection.Chi2FeatureSelection;
import org.apache.ctakes.temporal.ae.feature.selection.FeatureSelection;
import org.apache.ctakes.temporal.duration.PreserveUMLSEventTimeRelationsInGold;
import org.apache.ctakes.temporal.utils.SMOTEplus;
import org.apache.ctakes.typesystem.type.syntax.BaseToken;
import org.apache.ctakes.typesystem.type.textsem.TimeMention;
import org.apache.ctakes.typesystem.type.textspan.Segment;
import org.apache.ctakes.typesystem.type.textspan.Sentence;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.CASException;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;
import org.cleartk.classifier.CleartkAnnotator;
import org.cleartk.classifier.Feature;
import org.cleartk.classifier.Instance;
import org.cleartk.classifier.chunking.BIOChunking;
import org.cleartk.classifier.feature.extractor.CleartkExtractor;
import org.cleartk.classifier.feature.extractor.simple.CharacterCategoryPatternExtractor;
import org.cleartk.classifier.feature.extractor.simple.CombinedExtractor;
import org.cleartk.classifier.feature.extractor.simple.CoveredTextExtractor;
import org.cleartk.classifier.feature.extractor.simple.SimpleFeatureExtractor;
import org.cleartk.classifier.feature.extractor.simple.TypePathExtractor;
import org.cleartk.classifier.jar.DefaultDataWriterFactory;
import org.cleartk.classifier.jar.DirectoryDataWriterFactory;
import org.cleartk.classifier.jar.GenericJarClassifierFactory;
import org.uimafit.descriptor.ConfigurationParameter;
import org.uimafit.factory.AnalysisEngineFactory;
import org.uimafit.util.JCasUtil;

/* loaded from: input_file:org/apache/ctakes/temporal/ae/TimeAnnotator.class */
public class TimeAnnotator extends TemporalEntityAnnotator_ImplBase {
    public static final String PARAM_FEATURE_SELECTION_THRESHOLD = "WhetherToDoFeatureSelection";
    public static final String PARAM_FEATURE_SELECTION_URI = "FeatureSelectionURI";

    @ConfigurationParameter(mandatory = false, name = "FeatureSelectionURI", description = "provides a URI where the feature selection data will be written")
    protected URI featureSelectionURI;
    public static final String PARAM_SMOTE_NUM_NEIGHBORS = "NumOfNeighborForSMOTE";
    public static final String PARAM_TIMEX_VIEW = "TimexView";
    protected List<SimpleFeatureExtractor> tokenFeatureExtractors;
    protected List<CleartkExtractor> contextFeatureExtractors;
    protected ParseSpanFeatureExtractor parseExtractor;
    private BIOChunking<BaseToken, TimeMention> timeChunking;
    private FeatureSelection<String> featureSelection;
    private static final String FEATURE_SELECTION_NAME = "SelectNeighborFeatures";

    @ConfigurationParameter(name = "WhetherToDoFeatureSelection", mandatory = false, description = "the Chi-squared threshold at which features should be removed")
    protected Float featureSelectionThreshold = Float.valueOf(1.0f);

    @ConfigurationParameter(name = "NumOfNeighborForSMOTE", mandatory = false, description = "the number of neighbors used for minority instances for SMOTE algorithm")
    protected Float smoteNumOfNeighbors = Float.valueOf(0.0f);

    @ConfigurationParameter(name = "TimexView", mandatory = false, description = "View to write timexes to (used for ensemble methods)")
    protected String timexView = PreserveUMLSEventTimeRelationsInGold.SYSTEM_VIEW_NAME;

    public static AnalysisEngineDescription createDataWriterDescription(Class<?> cls, File file, float f, float f2) throws ResourceInitializationException {
        return AnalysisEngineFactory.createPrimitiveDescription(TimeAnnotator.class, new Object[]{CleartkAnnotator.PARAM_IS_TRAINING, true, DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME, cls, DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY, file, "WhetherToDoFeatureSelection", Float.valueOf(f), "NumOfNeighborForSMOTE", Float.valueOf(f2)});
    }

    public static AnalysisEngineDescription createAnnotatorDescription(String str) throws ResourceInitializationException {
        return AnalysisEngineFactory.createPrimitiveDescription(TimeAnnotator.class, new Object[]{CleartkAnnotator.PARAM_IS_TRAINING, false, GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH, str});
    }

    public static AnalysisEngineDescription createAnnotatorDescription(File file) throws ResourceInitializationException {
        return AnalysisEngineFactory.createPrimitiveDescription(TimeAnnotator.class, new Object[]{CleartkAnnotator.PARAM_IS_TRAINING, false, GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH, new File(file, "model.jar"), "FeatureSelectionURI", createFeatureSelectionURI(file)});
    }

    public static AnalysisEngineDescription createEnsembleDescription(File file, String str) throws ResourceInitializationException {
        return AnalysisEngineFactory.createPrimitiveDescription(TimeAnnotator.class, new Object[]{CleartkAnnotator.PARAM_IS_TRAINING, false, GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH, new File(file, "model.jar"), "TimexView", str, "FeatureSelectionURI", createFeatureSelectionURI(file)});
    }

    public static FeatureSelection<String> createFeatureSelection(double d) {
        return new Chi2FeatureSelection(FEATURE_SELECTION_NAME, d, true);
    }

    public static URI createFeatureSelectionURI(File file) {
        return new File(file, "SelectNeighborFeatures_Chi2_extractor.dat").toURI();
    }

    public void initialize(UimaContext uimaContext) throws ResourceInitializationException {
        super.initialize(uimaContext);
        this.timeChunking = new BIOChunking<>(BaseToken.class, TimeMention.class);
        SimpleFeatureExtractor combinedExtractor = new CombinedExtractor(new SimpleFeatureExtractor[]{new CoveredTextExtractor(), new CharacterCategoryPatternExtractor(CharacterCategoryPatternExtractor.PatternType.REPEATS_MERGED), new CharacterCategoryPatternExtractor(CharacterCategoryPatternExtractor.PatternType.ONE_PER_CHAR), new TypePathExtractor(BaseToken.class, "partOfSpeech"), new TimeWordTypeExtractor()});
        this.tokenFeatureExtractors = new ArrayList();
        this.tokenFeatureExtractors.add(combinedExtractor);
        this.contextFeatureExtractors = new ArrayList();
        this.contextFeatureExtractors.add(new CleartkExtractor(BaseToken.class, combinedExtractor, new CleartkExtractor.Context[]{new CleartkExtractor.Preceding(3), new CleartkExtractor.Following(3)}));
        this.parseExtractor = new ParseSpanFeatureExtractor();
        if (this.featureSelectionThreshold.floatValue() == 1.0f) {
            this.featureSelection = null;
            return;
        }
        this.featureSelection = createFeatureSelection(this.featureSelectionThreshold.floatValue());
        if (this.featureSelectionURI != null) {
            try {
                this.featureSelection.load(this.featureSelectionURI);
            } catch (IOException e) {
                throw new ResourceInitializationException(e);
            }
        }
    }

    /* JADX WARN: Multi-variable type inference failed */
    /* JADX WARN: Type inference failed for: r0v121, types: [java.util.List] */
    @Override // org.apache.ctakes.temporal.ae.TemporalEntityAnnotator_ImplBase
    public void process(JCas jCas, Segment segment) throws AnalysisEngineProcessException {
        SMOTEplus sMOTEplus = new SMOTEplus((int) Math.ceil(this.smoteNumOfNeighbors.floatValue()));
        for (Sentence sentence : JCasUtil.selectCovered(jCas, Sentence.class, segment)) {
            List<BaseToken> selectCovered = JCasUtil.selectCovered(jCas, BaseToken.class, sentence);
            ArrayList createOutcomes = isTraining() ? this.timeChunking.createOutcomes(jCas, selectCovered, JCasUtil.selectCovered(jCas, TimeMention.class, sentence)) : new ArrayList();
            int i = -1;
            for (BaseToken baseToken : selectCovered) {
                i++;
                List arrayList = new ArrayList();
                Iterator<SimpleFeatureExtractor> it = this.tokenFeatureExtractors.iterator();
                while (it.hasNext()) {
                    arrayList.addAll(it.next().extract(jCas, baseToken));
                }
                Iterator<CleartkExtractor> it2 = this.contextFeatureExtractors.iterator();
                while (it2.hasNext()) {
                    arrayList.addAll(it2.next().extractWithin(jCas, baseToken, sentence));
                }
                for (int i2 = 2; i2 > 0; i2--) {
                    int i3 = i - i2;
                    arrayList.add(new Feature("PreviousOutcome_" + i2, i3 < 0 ? "O" : (String) createOutcomes.get(i3)));
                }
                arrayList.add(new Feature("SegmentID", segment.getId()));
                BaseToken baseToken2 = baseToken;
                for (int i4 = i - 1; i4 >= 0 && !((String) createOutcomes.get(i4)).equals("O"); i4--) {
                    baseToken2 = (BaseToken) selectCovered.get(i4);
                }
                arrayList.addAll(this.parseExtractor.extract(jCas, baseToken2.getBegin(), baseToken.getEnd()));
                if (this.featureSelection != null) {
                    arrayList = this.featureSelection.transform((List<Feature>) arrayList);
                }
                if (isTraining()) {
                    String str = (String) createOutcomes.get(i);
                    if (str.equals("O")) {
                        this.dataWriter.write(new Instance(str, arrayList));
                    } else {
                        Instance<String> instance = new Instance<>(str, arrayList);
                        this.dataWriter.write(instance);
                        sMOTEplus.addInstance(instance);
                    }
                } else {
                    createOutcomes.add(this.classifier.classify(arrayList));
                }
            }
            if (!isTraining()) {
                try {
                    this.timeChunking.createChunks(jCas.getView(this.timexView), selectCovered, createOutcomes);
                } catch (CASException e) {
                    throw new AnalysisEngineProcessException(e);
                }
            }
        }
        if (!isTraining() || this.smoteNumOfNeighbors.floatValue() < 1.0f) {
            return;
        }
        Iterator<Instance<String>> it3 = sMOTEplus.populateMinorityClass().iterator();
        while (it3.hasNext()) {
            this.dataWriter.write(it3.next());
        }
    }
}
