package org.apache.ctakes.relationextractor.eval;

import com.google.common.collect.Sets;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.MalformedURLException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.ctakes.chunker.ae.Chunker;
import org.apache.ctakes.chunker.ae.adjuster.ChunkAdjuster;
import org.apache.ctakes.constituency.parser.ae.ConstituencyParser;
import org.apache.ctakes.context.tokenizer.ae.ContextDependentTokenizerAnnotator;
import org.apache.ctakes.core.ae.SentenceDetectorAnnotatorBIO;
import org.apache.ctakes.core.ae.SimpleSegmentAnnotator;
import org.apache.ctakes.core.ae.TokenizerAnnotatorPTB;
import org.apache.ctakes.core.util.doc.DocIdUtil;
import org.apache.ctakes.dependency.parser.ae.ClearNLPDependencyParserAE;
import org.apache.ctakes.dictionary.lookup2.ae.DefaultJCasTermAnnotator;
import org.apache.ctakes.lvg.ae.LvgAnnotator;
import org.apache.ctakes.postagger.POSTagger;
import org.apache.ctakes.relationextractor.data.GoldAnnotationStatsCalculator;
import org.apache.ctakes.relationextractor.eval.CorpusXMI;
import org.apache.ctakes.typesystem.type.relation.LocationOfTextRelation;
import org.apache.ctakes.typesystem.type.relation.RelationArgument;
import org.apache.ctakes.typesystem.type.textsem.AnatomicalSiteMention;
import org.apache.ctakes.typesystem.type.textsem.DiseaseDisorderMention;
import org.apache.ctakes.typesystem.type.textsem.EventMention;
import org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation;
import org.apache.ctakes.typesystem.type.textsem.ProcedureMention;
import org.apache.ctakes.typesystem.type.textsem.SignSymptomMention;
import org.apache.log4j.Logger;
import org.apache.uima.analysis_engine.AnalysisEngine;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.impl.XmiCasSerializer;
import org.apache.uima.collection.CollectionReader;
import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
import org.apache.uima.fit.component.ViewCreatorAnnotator;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.factory.AggregateBuilder;
import org.apache.uima.fit.factory.AnalysisEngineFactory;
import org.apache.uima.fit.pipeline.JCasIterator;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.tcas.Annotation;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.util.XMLSerializer;
import org.cleartk.util.ViewUriUtil;
import org.cleartk.util.ae.UriToDocumentTextAnnotator;
import org.cleartk.util.cr.UriCollectionReader;
import org.jdom2.Element;
import org.jdom2.JDOMException;
import org.jdom2.input.SAXBuilder;

/* loaded from: input_file:org/apache/ctakes/relationextractor/eval/DeepPheXMI.class */
public class DeepPheXMI extends CorpusXMI {
    private static Pattern dirPatt = Pattern.compile("patient(\\d+)_report(\\d+)_(.*)");
    private static Matcher matcher = null;
    public static final Set<Integer> trainPatients = Sets.newHashSet(new Integer[]{3, 11, 92, 93});
    public static final Set<Integer> devPatients = Sets.newHashSet(new Integer[]{2, 21});
    public static final Set<Integer> testPatients = Sets.newHashSet(new Integer[]{1, 16});

    /* loaded from: input_file:org/apache/ctakes/relationextractor/eval/DeepPheXMI$DeepPheAnaforaXMLReader.class */
    public static class DeepPheAnaforaXMLReader extends JCasAnnotator_ImplBase {
        private static Logger LOGGER = Logger.getLogger(DeepPheAnaforaXMLReader.class);
        public static final String PARAM_ANAFORA_DIRECTORY = "anaforaDirectory";

        @ConfigurationParameter(name = PARAM_ANAFORA_DIRECTORY, description = "root directory of the Anafora-annotated files, with one subdirectory for each annotated file")
        private File anaforaDirectory;
        public static final String PARAM_ANAFORA_XML_SUFFIXES = "anaforaSuffixes";

        @ConfigurationParameter(name = PARAM_ANAFORA_XML_SUFFIXES, mandatory = false, description = "list of suffixes that might be added to a file name to identify the Anafora XML annotations file; only the first suffix corresponding to a file will be used")
        private String[] anaforaXMLSuffixes = {".UmlsDeepPhe.dave.completed.xml"};

        public static AnalysisEngineDescription getDescription() throws ResourceInitializationException {
            return AnalysisEngineFactory.createEngineDescription(DeepPheAnaforaXMLReader.class, new Object[0]);
        }

        public static AnalysisEngineDescription getDescription(File file) throws ResourceInitializationException {
            return AnalysisEngineFactory.createEngineDescription(DeepPheAnaforaXMLReader.class, new Object[]{PARAM_ANAFORA_DIRECTORY, file});
        }

        public void process(JCas jCas) throws AnalysisEngineProcessException {
            File file = new File(ViewUriUtil.getURI(jCas));
            LOGGER.info("processing " + file);
            ArrayList arrayList = new ArrayList();
            for (String str : this.anaforaXMLSuffixes) {
                if (this.anaforaDirectory == null) {
                    arrayList.add(new File(file + str));
                } else {
                    arrayList.add(new File(file.getPath() + str));
                }
            }
            File file2 = null;
            Iterator it = arrayList.iterator();
            while (true) {
                if (!it.hasNext()) {
                    break;
                }
                File file3 = (File) it.next();
                if (file3.exists()) {
                    file2 = file3;
                    break;
                }
            }
            if (this.anaforaXMLSuffixes.length > 0 && file2 == null) {
                throw new IllegalArgumentException("no Anafora XML file found from " + arrayList);
            }
            if (file2 != null) {
                processXmlFile(jCas, file2);
            }
        }

        private static void processXmlFile(JCas jCas, File file) throws AnalysisEngineProcessException {
            try {
                Element rootElement = new SAXBuilder().build(file.toURI().toURL()).getRootElement();
                ArrayList<String[]> arrayList = new ArrayList();
                int length = jCas.getDocumentText().length();
                for (Element element : rootElement.getChildren("annotations")) {
                    HashMap hashMap = new HashMap();
                    for (Element element2 : element.getChildren("entity")) {
                        String removeSingleChildText = removeSingleChildText(element2, "id", null);
                        Element removeSingleChild = removeSingleChild(element2, "span", removeSingleChildText);
                        String removeSingleChildText2 = removeSingleChildText(element2, "type", removeSingleChildText);
                        Element removeSingleChild2 = removeSingleChild(element2, "properties", removeSingleChildText);
                        int i = Integer.MAX_VALUE;
                        int i2 = Integer.MIN_VALUE;
                        for (String str : removeSingleChild.getText().split(";")) {
                            String[] split = str.split(",");
                            if (split.length != 2) {
                                error("span not of the format 'number,number'", removeSingleChildText);
                            }
                            int parseInt = Integer.parseInt(split[0]);
                            int parseInt2 = Integer.parseInt(split[1]);
                            if (parseInt < i) {
                                i = parseInt;
                            }
                            if (parseInt2 > i2) {
                                i2 = parseInt2;
                            }
                        }
                        if (i < 0 || i2 >= length) {
                            error("Illegal begin or end boundary", removeSingleChildText);
                        } else {
                            DiseaseDisorderMention diseaseDisorderMention = null;
                            if (removeSingleChildText2.equals("Disease_Disorder")) {
                                DiseaseDisorderMention diseaseDisorderMention2 = new DiseaseDisorderMention(jCas, i, i2);
                                String removeSingleChildText3 = removeSingleChildText(removeSingleChild2, "body_location", removeSingleChildText);
                                if (removeSingleChildText3 != null && !removeSingleChildText3.equals("")) {
                                    arrayList.add(new String[]{removeSingleChildText, removeSingleChildText3});
                                }
                                diseaseDisorderMention = diseaseDisorderMention2;
                            } else if (removeSingleChildText2.equals("Procedure")) {
                                DiseaseDisorderMention procedureMention = new ProcedureMention(jCas, i, i2);
                                String removeSingleChildText4 = removeSingleChildText(removeSingleChild2, "body_location", removeSingleChildText);
                                if (removeSingleChildText4 != null && !removeSingleChildText4.equals("")) {
                                    arrayList.add(new String[]{removeSingleChildText, removeSingleChildText4});
                                }
                                diseaseDisorderMention = procedureMention;
                            } else if (removeSingleChildText2.equals("Sign_symptom")) {
                                DiseaseDisorderMention signSymptomMention = new SignSymptomMention(jCas, i, i2);
                                String removeSingleChildText5 = removeSingleChildText(removeSingleChild2, "body_location", removeSingleChildText);
                                if (removeSingleChildText5 != null && !removeSingleChildText5.equals("")) {
                                    arrayList.add(new String[]{removeSingleChildText, removeSingleChildText5});
                                }
                                diseaseDisorderMention = signSymptomMention;
                            } else if (removeSingleChildText2.equals("Metastasis")) {
                                DiseaseDisorderMention eventMention = new EventMention(jCas, i, i2);
                                String removeSingleChildText6 = removeSingleChildText(removeSingleChild2, "body_location", removeSingleChildText);
                                if (removeSingleChildText6 != null && !removeSingleChildText6.equals("")) {
                                    arrayList.add(new String[]{removeSingleChildText, removeSingleChildText6});
                                }
                                diseaseDisorderMention = eventMention;
                            } else if (removeSingleChildText2.equals("Anatomical_site")) {
                                DiseaseDisorderMention anatomicalSiteMention = new AnatomicalSiteMention(jCas, i, i2);
                                removeSingleChildText(removeSingleChild2, "associatedCode", removeSingleChildText);
                                extractAttributeValues(removeSingleChild2, anatomicalSiteMention, removeSingleChildText);
                                diseaseDisorderMention = anatomicalSiteMention;
                            } else {
                                LOGGER.info("This entity type is not being extracted yet!");
                            }
                            if (diseaseDisorderMention != null) {
                                diseaseDisorderMention.addToIndexes();
                                hashMap.put(removeSingleChildText, diseaseDisorderMention);
                            }
                        }
                    }
                    for (String[] strArr : arrayList) {
                        LocationOfTextRelation locationOfTextRelation = new LocationOfTextRelation(jCas);
                        locationOfTextRelation.setCategory(GoldAnnotationStatsCalculator.targetRelationType);
                        RelationArgument relationArgument = new RelationArgument(jCas);
                        relationArgument.setArgument((Annotation) hashMap.get(strArr[0]));
                        locationOfTextRelation.setArg1(relationArgument);
                        RelationArgument relationArgument2 = new RelationArgument(jCas);
                        relationArgument2.setArgument((Annotation) hashMap.get(strArr[1]));
                        locationOfTextRelation.setArg2(relationArgument2);
                        locationOfTextRelation.setDiscoveryTechnique(1);
                        locationOfTextRelation.addToIndexes();
                    }
                }
            } catch (MalformedURLException e) {
                throw new AnalysisEngineProcessException(e);
            } catch (IOException e2) {
                throw new AnalysisEngineProcessException(e2);
            } catch (JDOMException e3) {
                throw new AnalysisEngineProcessException(e3);
            }
        }

        private static void extractAttributeValues(Element element, IdentifiedAnnotation identifiedAnnotation, String str) {
        }

        private static Element getSingleChild(Element element, String str, String str2) {
            List children = element.getChildren(str);
            if (children.size() != 1) {
                error(String.format("not exactly one '%s' child", str), str2);
            }
            if (children.size() > 0) {
                return (Element) children.get(0);
            }
            return null;
        }

        private static Element removeSingleChild(Element element, String str, String str2) {
            Element singleChild = getSingleChild(element, str, str2);
            element.removeChildren(str);
            return singleChild;
        }

        private static String removeSingleChildText(Element element, String str, String str2) {
            String text = getSingleChild(element, str, str2).getText();
            if (text.isEmpty()) {
                error(String.format("an empty '%s' child", str), str2);
                text = null;
            }
            element.removeChildren(str);
            return text;
        }

        private static void error(String str, String str2) {
            LOGGER.error(String.format("found %s in annotation with ID %s", str, str2));
        }
    }

    public static void generateXMI(File file, File file2) throws Exception {
        if (!file.exists()) {
            file.mkdirs();
        }
        ArrayList arrayList = new ArrayList();
        arrayList.addAll(getTrainTextFiles(file2));
        arrayList.addAll(getDevTextFiles(file2));
        arrayList.addAll(getTestTextFiles(file2));
        CollectionReader collectionReaderFromFiles = UriCollectionReader.getCollectionReaderFromFiles(arrayList);
        AggregateBuilder aggregateBuilder = new AggregateBuilder();
        aggregateBuilder.add(UriToDocumentTextAnnotator.getDescription(), new String[0]);
        aggregateBuilder.add(getDeepPhePreprocessingPipeline(), new String[0]);
        aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(ViewCreatorAnnotator.class, new Object[]{"viewName", "GoldView"}), new String[0]);
        aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(CorpusXMI.CopyDocumentTextToGoldView.class, new Object[0]), new String[0]);
        aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(CorpusXMI.DocumentIDAnnotator.class, new Object[0]), new String[]{GoldAnnotationStatsCalculator.systemViewName, "GoldView"});
        aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(new AnalysisEngineDescription[]{DeepPheAnaforaXMLReader.getDescription(file2)}), new String[]{GoldAnnotationStatsCalculator.systemViewName, "GoldView"});
        JCasIterator jCasIterator = new JCasIterator(collectionReaderFromFiles, new AnalysisEngine[]{aggregateBuilder.createAggregate()});
        while (jCasIterator.hasNext()) {
            JCas jCas = (JCas) jCasIterator.next();
            String documentID = DocIdUtil.getDocumentID(jCas.getView("GoldView"));
            if (documentID == null) {
                throw new IllegalArgumentException("No documentID for CAS:\n" + jCas);
            }
            FileOutputStream fileOutputStream = new FileOutputStream(toXMIFile(file, new File(documentID)));
            new XmiCasSerializer(jCas.getTypeSystem()).serialize(jCas.getCas(), new XMLSerializer(fileOutputStream).getContentHandler());
            fileOutputStream.close();
        }
    }

    public static List<File> getTrainTextFiles(File file) {
        return getSetTextFiles(file, trainPatients);
    }

    public static List<File> getDevTextFiles(File file) {
        return getSetTextFiles(file, devPatients);
    }

    public static List<File> getTestTextFiles(File file) {
        return getSetTextFiles(file, testPatients);
    }

    private static List<File> getSetTextFiles(File file, Set<Integer> set) {
        ArrayList arrayList = new ArrayList();
        for (File file2 : file.listFiles()) {
            if (file2.isDirectory()) {
                matcher = dirPatt.matcher(file2.getName());
                if (matcher.matches() && set.contains(Integer.valueOf(Integer.parseInt(matcher.group(1))))) {
                    arrayList.add(new File(file2, file2.getName()));
                }
            }
        }
        return arrayList;
    }

    private static AnalysisEngineDescription getDeepPhePreprocessingPipeline() throws ResourceInitializationException, MalformedURLException {
        AggregateBuilder aggregateBuilder = new AggregateBuilder();
        aggregateBuilder.add(SimpleSegmentAnnotator.createAnnotatorDescription(), new String[0]);
        aggregateBuilder.add(SentenceDetectorAnnotatorBIO.getDescription(), new String[0]);
        aggregateBuilder.add(TokenizerAnnotatorPTB.createAnnotatorDescription(), new String[0]);
        aggregateBuilder.add(LvgAnnotator.createAnnotatorDescription(), new String[0]);
        aggregateBuilder.add(ContextDependentTokenizerAnnotator.createAnnotatorDescription(), new String[0]);
        aggregateBuilder.add(POSTagger.createAnnotatorDescription(), new String[0]);
        aggregateBuilder.add(ConstituencyParser.createAnnotatorDescription(), new String[0]);
        aggregateBuilder.add(ClearNLPDependencyParserAE.createAnnotatorDescription(), new String[0]);
        aggregateBuilder.add(Chunker.createAnnotatorDescription(), new String[0]);
        aggregateBuilder.add(ChunkAdjuster.createAnnotatorDescription(new String[]{"NP", "NP"}, 1), new String[0]);
        aggregateBuilder.add(ChunkAdjuster.createAnnotatorDescription(new String[]{"NP", "PP", "NP"}, 2), new String[0]);
        aggregateBuilder.add(DefaultJCasTermAnnotator.createAnnotatorDescription(), new String[0]);
        return aggregateBuilder.createAggregateDescription();
    }
}
