package org.apache.ctakes.coreference.ae;

import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.ctakes.core.pipeline.PipeBitInfo;
import org.apache.ctakes.dependency.parser.util.DependencyUtility;
import org.apache.ctakes.typesystem.type.syntax.BaseToken;
import org.apache.ctakes.typesystem.type.syntax.ConllDependencyNode;
import org.apache.ctakes.typesystem.type.syntax.TerminalTreebankNode;
import org.apache.ctakes.typesystem.type.syntax.TreebankNode;
import org.apache.ctakes.typesystem.type.textsem.Markable;
import org.apache.ctakes.typesystem.type.textsem.TimeMention;
import org.apache.ctakes.typesystem.type.textspan.Segment;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;

@PipeBitInfo(name = "Markable Annotator (Deterministic)", description = "Annotates Markables for use by Coreference Annotators. degree_of type and a single modifier.", dependencies = {PipeBitInfo.TypeProduct.SECTION, PipeBitInfo.TypeProduct.SENTENCE, PipeBitInfo.TypeProduct.IDENTIFIED_ANNOTATION, PipeBitInfo.TypeProduct.DEPENDENCY_NODE, PipeBitInfo.TypeProduct.TIMEX}, products = {PipeBitInfo.TypeProduct.MARKABLE})
/* loaded from: input_file:org/apache/ctakes/coreference/ae/DeterministicMarkableAnnotator.class */
public class DeterministicMarkableAnnotator extends JCasAnnotator_ImplBase {
    static Pattern headerPatt = Pattern.compile("^(([A-Z][\\.\\:\\)])|(#\\d+)|(\\d+[\\.\\:\\)])) *");

    public void initialize(UimaContext uimaContext) throws ResourceInitializationException {
        super.initialize(uimaContext);
    }

    public void process(JCas jCas) throws AnalysisEngineProcessException {
        createMarkablesUsingDependencyTrees(jCas);
        for (TimeMention timeMention : JCasUtil.select(jCas, TimeMention.class)) {
            boolean z = false;
            Iterator it = JCasUtil.selectCovered(jCas, Markable.class, timeMention.getBegin(), timeMention.getEnd()).iterator();
            while (true) {
                if (!it.hasNext()) {
                    break;
                }
                Markable markable = (Markable) it.next();
                if (markable.getBegin() == timeMention.getBegin() && markable.getEnd() == timeMention.getEnd()) {
                    z = true;
                    break;
                }
            }
            if (!z) {
                new Markable(jCas, timeMention.getBegin(), timeMention.getEnd()).addToIndexes(jCas);
            }
        }
    }

    private static void createMarkablesUsingDependencyTrees(JCas jCas) {
        Iterator it = JCasUtil.select(jCas, Segment.class).iterator();
        while (it.hasNext()) {
            for (ConllDependencyNode conllDependencyNode : JCasUtil.selectCovered(jCas, ConllDependencyNode.class, (Segment) it.next())) {
                String lowerCase = conllDependencyNode.getCoveredText().toLowerCase();
                List selectCovered = JCasUtil.selectCovered(TerminalTreebankNode.class, conllDependencyNode);
                TerminalTreebankNode terminalTreebankNode = selectCovered.size() > 0 ? (TerminalTreebankNode) selectCovered.get(0) : null;
                if (conllDependencyNode.getId() != 0 && !lowerCase.matches("\\p{Punct}+")) {
                    if (conllDependencyNode.getPostag().startsWith("NN") && terminalTreebankNode != null && terminalTreebankNode.getNodeType().startsWith("N")) {
                        if (!conllDependencyNode.getForm().matches("\\s+") && !lowerCase.equals("date") && !lowerCase.equals("tablet") && !lowerCase.equals("hg") && !lowerCase.equals("lb") && !lowerCase.equals("status") && !lowerCase.equals("capsule") && !lowerCase.equals("mg") && !lowerCase.equals("cm")) {
                            int begin = conllDependencyNode.getBegin();
                            int end = conllDependencyNode.getEnd();
                            List<ConllDependencyNode> removeUnannotatedNodes = removeUnannotatedNodes(conllDependencyNode, DependencyUtility.getProgeny(conllDependencyNode, DependencyUtility.getDependencyNodes(jCas, DependencyUtility.getSentence(jCas, conllDependencyNode))));
                            if (removeUnannotatedNodes.size() > 0) {
                                for (ConllDependencyNode conllDependencyNode2 : removeUnannotatedNodes) {
                                    if (conllDependencyNode2.getBegin() < begin) {
                                        begin = conllDependencyNode2.getBegin();
                                    }
                                    if (conllDependencyNode2.getEnd() > end) {
                                        end = conllDependencyNode2.getEnd();
                                    }
                                }
                            }
                            ConllDependencyNode head = conllDependencyNode.getHead();
                            if (head != null && head.getId() != 0) {
                                if (head.getBegin() < conllDependencyNode.getBegin() && head.getBegin() > begin) {
                                    begin = ((BaseToken) JCasUtil.selectFollowing(BaseToken.class, head, 1).get(0)).getBegin();
                                }
                                if (head.getEnd() > conllDependencyNode.getEnd() && head.getEnd() < end) {
                                    end = ((BaseToken) JCasUtil.selectPreceding(BaseToken.class, head, 1).get(0)).getEnd();
                                }
                            }
                            Matcher matcher = headerPatt.matcher(lowerCase);
                            if (matcher.find()) {
                                begin += matcher.end();
                            }
                            new Markable(jCas, begin, end).addToIndexes();
                        }
                    } else if (conllDependencyNode.getPostag().equals("DT") && !conllDependencyNode.getDeprel().equals("det")) {
                        new Markable(jCas, conllDependencyNode.getBegin(), conllDependencyNode.getEnd()).addToIndexes();
                    } else if (conllDependencyNode.getCoveredText().toLowerCase().equals("it") && conllDependencyNode.getDeprel().contains("bj")) {
                        new Markable(jCas, conllDependencyNode.getBegin(), conllDependencyNode.getEnd()).addToIndexes();
                    }
                }
            }
        }
    }

    private static List<ConllDependencyNode> removeUnannotatedNodes(ConllDependencyNode conllDependencyNode, List<ConllDependencyNode> list) {
        ArrayList arrayList = new ArrayList();
        for (ConllDependencyNode conllDependencyNode2 : list) {
            if (conllDependencyNode2 == conllDependencyNode) {
                arrayList.add(conllDependencyNode2);
            }
            boolean z = false;
            Iterator it = DependencyUtility.getPath(list, conllDependencyNode2, conllDependencyNode).iterator();
            while (it.hasNext()) {
                ConllDependencyNode conllDependencyNode3 = (ConllDependencyNode) it.next();
                if (conllDependencyNode3 != conllDependencyNode && (conllDependencyNode3.getDeprel().equals("conj") || conllDependencyNode3.getDeprel().equals("cc") || conllDependencyNode3.getPostag().equals(".") || conllDependencyNode3.getPostag().equals(",") || conllDependencyNode3.getDeprel().equals("punct") || conllDependencyNode3.getDeprel().equals("meta") || conllDependencyNode3.getCoveredText().matches("(([A-Z][\\.\\:\\)])|(#\\d+)|(\\d+[\\.\\:\\)]))"))) {
                    z = true;
                    break;
                }
            }
            if (!z) {
                arrayList.add(conllDependencyNode2);
            }
        }
        return arrayList;
    }

    private static void createMarkablesUsingConstituencyTrees(JCas jCas) {
        for (TreebankNode treebankNode : JCasUtil.select(jCas, TreebankNode.class)) {
            if (treebankNode.getNodeType().equals("NP")) {
                String coveredText = treebankNode.getCoveredText();
                if (treebankNode.getChildren().size() != 1 || (!treebankNode.getChildren(0).getNodeType().equals("PRP") && !treebankNode.getChildren(0).getNodeType().equals("EX") && !treebankNode.getChildren(0).getNodeType().equals("CD"))) {
                    Matcher matcher = headerPatt.matcher(coveredText);
                    int begin = treebankNode.getBegin();
                    int end = treebankNode.getEnd();
                    if (matcher.find()) {
                        begin += matcher.end();
                    }
                    if ((coveredText.endsWith(".") || coveredText.endsWith(":")) && end - 1 > begin) {
                        end--;
                    }
                    new Markable(jCas, begin, end).addToIndexes();
                    for (int i = 0; i < treebankNode.getChildren().size() - 1; i++) {
                        TreebankNode children = treebankNode.getChildren(i);
                        if ((children instanceof TerminalTreebankNode) && children.getNodeType().startsWith("N") && !children.getNodeType().equals("NNP")) {
                            new Markable(jCas, children.getBegin(), children.getEnd()).addToIndexes();
                        }
                    }
                }
            }
        }
    }
}
