package org.apache.ctakes.coreference.ae.features.cluster;

import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.apache.ctakes.core.resource.FileLocator;
import org.apache.ctakes.core.util.ListIterable;
import org.apache.ctakes.coreference.ae.features.StringMatchingFeatureExtractor;
import org.apache.ctakes.dependency.parser.util.DependencyUtility;
import org.apache.ctakes.relationextractor.ae.features.RelationFeaturesExtractor;
import org.apache.ctakes.typesystem.type.relation.CollectionTextRelation;
import org.apache.ctakes.typesystem.type.syntax.BaseToken;
import org.apache.ctakes.typesystem.type.syntax.ConllDependencyNode;
import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
import org.apache.ctakes.typesystem.type.textsem.Markable;
import org.apache.ctakes.utils.distsem.WordEmbeddings;
import org.apache.ctakes.utils.distsem.WordVector;
import org.apache.ctakes.utils.distsem.WordVectorReader;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.tcas.Annotation;
import org.cleartk.ml.Feature;

/* loaded from: input_file:org/apache/ctakes/coreference/ae/features/cluster/MentionClusterDistSemExtractor.class */
public class MentionClusterDistSemExtractor implements RelationFeaturesExtractor<CollectionTextRelation, IdentifiedAnnotation> {
    public static final double DEFAULT_SIM = 0.5d;
    private WordEmbeddings words;

    public MentionClusterDistSemExtractor() throws FileNotFoundException, IOException {
        this("org/apache/ctakes/coreference/distsem/mimic_vectors.txt");
    }

    public MentionClusterDistSemExtractor(String str) throws FileNotFoundException, IOException {
        this.words = null;
        this.words = WordVectorReader.getEmbeddings(FileLocator.getAsStream(str));
    }

    public List<Feature> extract(JCas jCas, CollectionTextRelation collectionTextRelation, IdentifiedAnnotation identifiedAnnotation) throws AnalysisEngineProcessException {
        ArrayList arrayList = new ArrayList();
        if (StringMatchingFeatureExtractor.isPronoun(identifiedAnnotation)) {
            return arrayList;
        }
        double d = 0.0d;
        double d2 = 0.0d;
        ConllDependencyNode nominalHeadNode = DependencyUtility.getNominalHeadNode(jCas, identifiedAnnotation);
        double[] phraseVec = getPhraseVec(identifiedAnnotation);
        boolean z = false;
        String lowerCase = nominalHeadNode != null ? nominalHeadNode.getCoveredText().toLowerCase() : null;
        if (lowerCase != null) {
            Iterator it = new ListIterable(collectionTextRelation.getMembers()).iterator();
            while (it.hasNext()) {
                Markable markable = (Markable) it.next();
                if (identifiedAnnotation.getBegin() < markable.getEnd()) {
                    break;
                }
                double[] phraseVec2 = getPhraseVec(markable);
                double d3 = 0.0d;
                for (int i = 0; i < phraseVec2.length; i++) {
                    d3 += phraseVec[i] * phraseVec2[i];
                }
                if (d3 > d2) {
                    d2 = d3;
                }
                ConllDependencyNode nominalHeadNode2 = DependencyUtility.getNominalHeadNode(jCas, markable);
                String lowerCase2 = nominalHeadNode2 != null ? nominalHeadNode2.getCoveredText().toLowerCase() : null;
                if (lowerCase.equals(lowerCase2)) {
                    z = true;
                }
                if (nominalHeadNode2 != null && this.words.containsKey(lowerCase2) && this.words.containsKey(lowerCase)) {
                    double similarity = this.words.getSimilarity(lowerCase, lowerCase2);
                    if (similarity > d) {
                        d = similarity;
                    }
                }
            }
        }
        if (z) {
            d = 0.0d;
        }
        arrayList.add(new Feature("HEAD_SIMILARITY_WORD2VEC", Double.valueOf(d)));
        return arrayList;
    }

    private double[] getPhraseVec(Annotation annotation) {
        double[] dArr = new double[this.words.getDimensionality()];
        double d = 0.0d;
        Iterator it = JCasUtil.selectCovered(BaseToken.class, annotation).iterator();
        while (it.hasNext()) {
            String lowerCase = ((BaseToken) it.next()).getCoveredText().toLowerCase();
            if (this.words.containsKey(lowerCase)) {
                WordVector vector = this.words.getVector(lowerCase);
                for (int i = 0; i < dArr.length; i++) {
                    double value = vector.getValue(i);
                    int i2 = i;
                    dArr[i2] = dArr[i2] + value;
                    d = value * value;
                }
            }
        }
        for (double d2 : dArr) {
            d += d2 * d2;
        }
        double sqrt = Math.sqrt(d);
        if (sqrt > 0.0d) {
            for (int i3 = 0; i3 < dArr.length; i3++) {
                int i4 = i3;
                dArr[i4] = dArr[i4] / sqrt;
            }
        }
        return dArr;
    }
}
