package org.apache.ctakes.assertion.medfacts.cleartk;

import java.io.File;
import java.net.URI;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Random;
import org.apache.commons.io.FilenameUtils;
import org.apache.ctakes.assertion.attributes.features.selection.FeatureSelection;
import org.apache.ctakes.assertion.medfacts.cleartk.extractors.FedaFeatureFunction;
import org.apache.ctakes.core.util.DocumentIDAnnotationUtil;
import org.apache.ctakes.typesystem.type.syntax.BaseToken;
import org.apache.ctakes.typesystem.type.temporary.assertion.AssertionCuePhraseAnnotation;
import org.apache.ctakes.typesystem.type.textsem.EntityMention;
import org.apache.ctakes.typesystem.type.textsem.EventMention;
import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
import org.apache.ctakes.typesystem.type.textspan.Sentence;
import org.apache.log4j.Logger;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.CASException;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;
import org.cleartk.classifier.CleartkAnnotator;
import org.cleartk.classifier.Feature;
import org.cleartk.classifier.Instance;
import org.cleartk.classifier.TreeFeature;
import org.cleartk.classifier.feature.extractor.CleartkExtractor;
import org.cleartk.classifier.feature.extractor.simple.CombinedExtractor;
import org.cleartk.classifier.feature.extractor.simple.CoveredTextExtractor;
import org.cleartk.classifier.feature.extractor.simple.SimpleFeatureExtractor;
import org.cleartk.classifier.feature.extractor.simple.TypePathExtractor;
import org.cleartk.classifier.feature.function.FeatureFunctionExtractor;
import org.uimafit.descriptor.ConfigurationParameter;
import org.uimafit.factory.AnalysisEngineFactory;
import org.uimafit.factory.ConfigurationParameterFactory;
import org.uimafit.util.JCasUtil;
import scala.actors.threadpool.Arrays;

/* loaded from: input_file:org/apache/ctakes/assertion/medfacts/cleartk/AssertionCleartkAnalysisEngine.class */
public abstract class AssertionCleartkAnalysisEngine extends CleartkAnnotator<String> {
    public static final String PARAM_GOLD_VIEW_NAME = "GoldViewName";
    public static int relationId;
    public static final String FILE_TO_DOMAIN_MAP = "mapTrainFileToDomain";

    @ConfigurationParameter(name = PARAM_GOLD_VIEW_NAME, mandatory = false, description = "view containing the manual identified annotations (especially EntityMention and EventMention annotations); needed for training")
    protected String goldViewName;
    public static final String PARAM_PRINT_ERRORS = "PrintErrors";

    @ConfigurationParameter(name = PARAM_PRINT_ERRORS, mandatory = false, description = "Print errors true/false", defaultValue = {"false"})
    boolean printErrors;
    public static final String PARAM_PROBABILITY_OF_KEEPING_DEFAULT_EXAMPLE = "ProbabilityOfKeepingADefaultExample";
    public static final String PARAM_FEATURE_SELECTION_THRESHOLD = "WhetherToDoFeatureSelection";
    public static final String PARAM_FEATURE_CONFIG = "FEATURE_CONFIG";
    public static final String PARAM_FEATURE_SELECTION_URI = "FeatureSelectionURI";

    @ConfigurationParameter(mandatory = false, name = PARAM_FEATURE_SELECTION_URI, description = "provides a URI where the feature selection data will be written")
    protected URI featureSelectionURI;
    protected static final String FEATURE_SELECTION_NAME = "SelectNeighborFeatures";

    @ConfigurationParameter(name = FILE_TO_DOMAIN_MAP, mandatory = false, description = "a map of filenames to their respective domains (i.e., directories that contain them)")
    protected String fileDomainMap;
    protected String lastLabel;
    protected List<CleartkExtractor> contextFeatureExtractors;
    protected List<CleartkExtractor> tokenContextFeatureExtractors;
    protected List<CleartkExtractor> tokenCleartkExtractors;
    protected List<SimpleFeatureExtractor> entityFeatureExtractors;
    protected List<SimpleFeatureExtractor> entityTreeExtractors;
    protected CleartkExtractor cuePhraseInWindowExtractor;
    protected List<FeatureFunctionExtractor> featureFunctionExtractors;
    protected FedaFeatureFunction ffDomainAdaptor;
    protected FeatureSelection<String> featureSelection;
    Logger logger = Logger.getLogger(AssertionCleartkAnalysisEngine.class);

    @ConfigurationParameter(name = PARAM_PROBABILITY_OF_KEEPING_DEFAULT_EXAMPLE, mandatory = false, description = "probability that a default example should be retained for training")
    protected double probabilityOfKeepingADefaultExample = 1.0d;

    @ConfigurationParameter(name = PARAM_FEATURE_SELECTION_THRESHOLD, mandatory = false, description = "the Chi-squared threshold at which features should be removed")
    protected Float featureSelectionThreshold = Float.valueOf(0.0f);

    @ConfigurationParameter(name = PARAM_FEATURE_CONFIG, description = "Feature configuration to use (for experiments)", mandatory = false)
    protected FEATURE_CONFIG featConfig = FEATURE_CONFIG.ALL_SYN;
    protected Random coin = new Random(0);
    protected Map<String, String> fileToDomain = new HashMap();

    /* loaded from: input_file:org/apache/ctakes/assertion/medfacts/cleartk/AssertionCleartkAnalysisEngine$FEATURE_CONFIG.class */
    public enum FEATURE_CONFIG {
        NO_SEM,
        NO_SYN,
        STK,
        STK_FRAGS,
        PTK,
        PTK_FRAGS,
        DEP_REGEX,
        DEP_REGEX_FRAGS,
        ALL_SYN
    }

    public abstract void setClassLabel(IdentifiedAnnotation identifiedAnnotation, Instance<String> instance) throws AnalysisEngineProcessException;

    protected abstract void initializeFeatureSelection() throws ResourceInitializationException;

    public void initialize(UimaContext uimaContext) throws ResourceInitializationException {
        super.initialize(uimaContext);
        if (null != this.fileDomainMap) {
            for (String str : this.fileDomainMap.split("[;:]")) {
                String normalizeToDomain = normalizeToDomain(str);
                File file = new File(str);
                if (file.listFiles() != null) {
                    for (File file2 : file.listFiles()) {
                        this.fileToDomain.put(FilenameUtils.removeExtension(file2.getName()), normalizeToDomain);
                    }
                }
            }
        }
        if (isTraining() && this.goldViewName == null) {
            throw new IllegalArgumentException("GoldViewName must be defined during training");
        }
        this.entityFeatureExtractors = new ArrayList();
        this.tokenCleartkExtractors = new ArrayList();
        this.tokenCleartkExtractors.add(new CleartkExtractor(BaseToken.class, new CoveredTextExtractor(), new CleartkExtractor.Context[]{new CleartkExtractor.LastCovered(2), new CleartkExtractor.Preceding(5), new CleartkExtractor.Following(4), new CleartkExtractor.Bag(new CleartkExtractor.Context[]{new CleartkExtractor.Preceding(3)}), new CleartkExtractor.Bag(new CleartkExtractor.Context[]{new CleartkExtractor.Following(3)}), new CleartkExtractor.Bag(new CleartkExtractor.Context[]{new CleartkExtractor.Preceding(5)}), new CleartkExtractor.Bag(new CleartkExtractor.Context[]{new CleartkExtractor.Following(5)}), new CleartkExtractor.Bag(new CleartkExtractor.Context[]{new CleartkExtractor.Preceding(10)}), new CleartkExtractor.Bag(new CleartkExtractor.Context[]{new CleartkExtractor.Following(10)})}));
        new CombinedExtractor(new SimpleFeatureExtractor[]{new CoveredTextExtractor(), new TypePathExtractor(AssertionCuePhraseAnnotation.class, "cuePhrase"), new TypePathExtractor(AssertionCuePhraseAnnotation.class, "cuePhraseCategory"), new TypePathExtractor(AssertionCuePhraseAnnotation.class, "cuePhraseAssertionFamily")});
        this.cuePhraseInWindowExtractor = new CleartkExtractor(BaseToken.class, new CoveredTextExtractor(), new CleartkExtractor.Context[]{new CleartkExtractor.Bag(new CleartkExtractor.Context[]{new CleartkExtractor.Covered()})});
        if (!this.fileToDomain.isEmpty()) {
            this.ffDomainAdaptor = new FedaFeatureFunction(new ArrayList(new HashSet(this.fileToDomain.values())));
        }
        this.entityTreeExtractors = new ArrayList();
    }

    public void process(JCas jCas) throws AnalysisEngineProcessException {
        JCas view;
        String documentID = DocumentIDAnnotationUtil.getDocumentID(jCas);
        if (documentID != null) {
            this.logger.debug("processing next doc: " + documentID);
            if (!this.fileToDomain.isEmpty()) {
                this.ffDomainAdaptor.setDomain(this.fileToDomain.get(documentID));
            }
        } else {
            this.logger.warn("processing next doc (doc id is null)");
        }
        this.lastLabel = "<BEGIN>";
        if (isTraining()) {
            try {
                view = jCas.getView(this.goldViewName);
            } catch (CASException e) {
                throw new AnalysisEngineProcessException(e);
            }
        } else {
            view = jCas;
        }
        for (IdentifiedAnnotation identifiedAnnotation : JCasUtil.select(view, IdentifiedAnnotation.class)) {
            if ((identifiedAnnotation instanceof EntityMention) || (identifiedAnnotation instanceof EventMention)) {
                if (identifiedAnnotation.getPolarity() == -1) {
                    this.logger.debug(String.format(" - identified annotation: [%d-%d] polarity %d (%s)", Integer.valueOf(identifiedAnnotation.getBegin()), Integer.valueOf(identifiedAnnotation.getEnd()), Integer.valueOf(identifiedAnnotation.getPolarity()), identifiedAnnotation.getClass().getName()));
                }
                Instance<String> instance = new Instance<>();
                if (this.ffDomainAdaptor == null) {
                    Iterator<CleartkExtractor> it = this.tokenCleartkExtractors.iterator();
                    while (it.hasNext()) {
                        instance.addAll(it.next().extract(view, identifiedAnnotation));
                    }
                }
                ArrayList arrayList = new ArrayList(JCasUtil.selectCovering(jCas, Sentence.class, identifiedAnnotation.getBegin(), identifiedAnnotation.getEnd()));
                if (arrayList.size() > 0) {
                    int i = Integer.MAX_VALUE;
                    AssertionCuePhraseAnnotation assertionCuePhraseAnnotation = null;
                    for (AssertionCuePhraseAnnotation assertionCuePhraseAnnotation2 : JCasUtil.selectCovered(AssertionCuePhraseAnnotation.class, (Sentence) arrayList.get(0))) {
                        List selectBetween = JCasUtil.selectBetween(BaseToken.class, assertionCuePhraseAnnotation2, identifiedAnnotation);
                        if (selectBetween.size() < i) {
                            assertionCuePhraseAnnotation = assertionCuePhraseAnnotation2;
                            i = selectBetween.size();
                        }
                    }
                    if (assertionCuePhraseAnnotation != null && i < 21) {
                        instance.add(new Feature("ClosestCue_Word", assertionCuePhraseAnnotation.getCoveredText()));
                        instance.add(new Feature("ClosestCue_PhraseFamily", assertionCuePhraseAnnotation.getCuePhraseAssertionFamily()));
                        instance.add(new Feature("ClosestCue_PhraseCategory", assertionCuePhraseAnnotation.getCuePhraseCategory()));
                        if (!this.fileToDomain.isEmpty() && this.ffDomainAdaptor != null) {
                            instance.addAll(this.ffDomainAdaptor.apply(new Feature("ClosestCue_Word", assertionCuePhraseAnnotation.getCoveredText())));
                            instance.addAll(this.ffDomainAdaptor.apply(new Feature("ClosestCue_PhraseFamily", assertionCuePhraseAnnotation.getCuePhraseAssertionFamily())));
                            instance.addAll(this.ffDomainAdaptor.apply(new Feature("ClosestCue_PhraseCategory", assertionCuePhraseAnnotation.getCuePhraseCategory())));
                        }
                    }
                }
                if (identifiedAnnotation.getTypeID() == 6) {
                    instance.add(new Feature("ENTITY_TYPE_ANAT_SITE"));
                    if (!this.fileToDomain.isEmpty() && this.ffDomainAdaptor != null) {
                        instance.addAll(this.ffDomainAdaptor.apply(new Feature("ENTITY_TYPE_ANAT_SITE")));
                    }
                }
                if (this.ffDomainAdaptor == null) {
                    Iterator<SimpleFeatureExtractor> it2 = this.entityFeatureExtractors.iterator();
                    while (it2.hasNext()) {
                        instance.addAll(it2.next().extract(jCas, identifiedAnnotation));
                    }
                }
                Iterator<SimpleFeatureExtractor> it3 = this.entityTreeExtractors.iterator();
                while (it3.hasNext()) {
                    instance.addAll(it3.next().extract(jCas, identifiedAnnotation));
                }
                List<Feature> features = instance.getFeatures();
                for (Feature feature : features) {
                    if (!(feature instanceof TreeFeature) && (feature.getName() == null || (!feature.getName().startsWith("TreeFrag") && !feature.getName().startsWith("WORD") && !feature.getName().startsWith("NEG")))) {
                        if (feature.getName() == null || (!feature.getName().contains("_TreeFrag") && !feature.getName().contains("_WORD") && !feature.getName().contains("_NEG"))) {
                            if (feature.getValue() instanceof String) {
                                feature.setValue(((String) feature.getValue()).toLowerCase());
                            }
                        }
                    }
                }
                if (!this.fileToDomain.isEmpty() && this.ffDomainAdaptor != null) {
                    Iterator<FeatureFunctionExtractor> it4 = this.featureFunctionExtractors.iterator();
                    while (it4.hasNext()) {
                        instance.addAll(it4.next().extract(jCas, identifiedAnnotation));
                    }
                }
                setClassLabel(identifiedAnnotation, instance);
                if (isTraining()) {
                    if (this.featureSelection != null) {
                        features = this.featureSelection.transform(features);
                    }
                    if (instance.getOutcome() != null) {
                        this.dataWriter.write(new Instance(instance.getOutcome(), features));
                    }
                }
            }
        }
    }

    public static AnalysisEngineDescription getDescription(Object... objArr) throws ResourceInitializationException {
        AnalysisEngineDescription createPrimitiveDescription = AnalysisEngineFactory.createPrimitiveDescription(AssertionCleartkAnalysisEngine.class, new Object[0]);
        if (objArr.length > 0) {
            ConfigurationParameterFactory.addConfigurationParameters(createPrimitiveDescription, objArr);
        }
        return createPrimitiveDescription;
    }

    public Map<String, String> getTrainFileToDomain() {
        return this.fileToDomain;
    }

    public void setTrainFileToDomain(Map<String, String> map) {
        this.fileToDomain = map;
    }

    public static String normalizeToDomain(String str) {
        List<String> asList = Arrays.asList(str.split("/"));
        Collections.reverse(asList);
        for (String str2 : asList) {
            if (!str2.toLowerCase().startsWith("test") && !str2.toLowerCase().startsWith("train") && !str2.toLowerCase().startsWith("dev")) {
                return str2;
            }
        }
        return str;
    }
}
