package org.apache.ctakes.lvg.ae;

import gov.nih.nlm.nls.lvg.Api.LvgCmdApi;
import gov.nih.nlm.nls.lvg.Api.LvgLexItemApi;
import gov.nih.nlm.nls.lvg.Lib.Category;
import gov.nih.nlm.nls.lvg.Lib.LexItem;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.StringTokenizer;
import org.apache.ctakes.core.util.ListFactory;
import org.apache.ctakes.lvg.resource.LvgCmdApiResource;
import org.apache.ctakes.lvg.resource.LvgCmdApiResourceImpl;
import org.apache.ctakes.typesystem.type.syntax.Lemma;
import org.apache.ctakes.typesystem.type.syntax.WordToken;
import org.apache.ctakes.typesystem.type.textspan.Segment;
import org.apache.log4j.Logger;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.FSIterator;
import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.descriptor.ExternalResource;
import org.apache.uima.fit.factory.AnalysisEngineFactory;
import org.apache.uima.fit.factory.ExternalResourceFactory;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;

/* loaded from: input_file:org/apache/ctakes/lvg/ae/LvgAnnotator.class */
public class LvgAnnotator extends JCasAnnotator_ImplBase {
    public static final String[] defaultExclusionWords = {"And", "and", "By", "by", "For", "for", "In", "in", "Of", "of", "On", "on", "The", "the", "To", "to", "With", "with"};
    public static final String[] defaultTreebankMap = {"adj|JJ", "adv|RB", "aux|AUX", "compl|CS", "conj|CC", "det|DET", "modal|MD", "noun|NN", "prep|IN", "pron|PRP", "verb|VB"};
    public static final String PARAM_POST_LEMMAS = "PostLemmas";

    @ConfigurationParameter(name = "PostLemmas", mandatory = false, defaultValue = {"false"}, description = "Whether to extract the lexical variants and write to cas (creates large files)")
    private boolean postLemmas;
    public static final String PARAM_USE_LEMMA_CACHE = "UseLemmaCache";

    @ConfigurationParameter(name = "UseLemmaCache", mandatory = false, defaultValue = {"false"}, description = "Whether to use a cache for lemmas")
    private boolean useLemmaCache;
    public static final String PARAM_LEMMA_CACHE_FILE_LOCATION = "LemmaCacheFileLocation";
    public static final String PARAM_LEMMA_CACHE_FREQUENCY_CUTOFF = "LemmaCacheFrequencyCutoff";

    @ConfigurationParameter(name = "LemmaCacheFrequencyCutoff", mandatory = false, description = "Threshold for the frequency of a lemma to be loaded into the cache", defaultValue = {"20"})
    private int cmdCacheFreqCutoff;
    public static final String PARAM_USE_SEGMENTS = "UseSegments";

    @ConfigurationParameter(name = PARAM_USE_SEGMENTS, mandatory = false, defaultValue = {"false"}, description = "Whether to use segments found in upstream cTAKES components")
    private boolean useSegments;
    public static final String PARAM_SKIP_SEGMENTS = "SegmentsToSkip";

    @ConfigurationParameter(name = PARAM_SKIP_SEGMENTS, mandatory = false, defaultValue = {}, description = "Segment IDs to skip during processing")
    private String[] skipSegmentIDs;
    private Set<String> skipSegmentsSet;
    public static final String PARAM_XT_MAP = "XeroxTreebankMap";
    private Map<String, String> xeroxTreebankMap;
    public static final String PARAM_USE_CMD_CACHE = "UseCmdCache";

    @ConfigurationParameter(name = PARAM_USE_CMD_CACHE, mandatory = false, defaultValue = {"false"}, description = "Use cache to track canonical forms")
    private boolean useCmdCache;
    public static final String PARAM_CMD_CACHE_FILE = "CmdCacheFileLocation";

    @ConfigurationParameter(name = PARAM_CMD_CACHE_FILE, mandatory = false, defaultValue = {"/org/apache/ctakes/lvg/2005_norm.voc"}, description = "File with stored cache of canonical forms")
    private String cmdCacheFileLocation;
    public static final String PARAM_LEMMA_FREQ_CUTOFF = "CmdCacheFrequencyCutoff";

    @ConfigurationParameter(name = PARAM_LEMMA_FREQ_CUTOFF, mandatory = false, description = "Minimum frequency required for loading from cache", defaultValue = {"20"})
    private int lemmaCacheFreqCutoff;
    public static final String PARAM_EXCLUSION_WORDS = "ExclusionSet";
    private Set<String> exclusionSet;
    public static final String PARAM_LVGCMDAPI_RESRC_KEY = "LvgCmdApi";

    @ExternalResource(key = PARAM_LVGCMDAPI_RESRC_KEY, mandatory = true)
    private LvgCmdApiResource lvgResource;
    private LvgCmdApi lvgCmd;
    private LvgLexItemApi lvgLexItem;
    private Map<String, String> normCacheMap;
    private Map<String, Set<LemmaLocalClass>> lemmaCacheMap;

    @ConfigurationParameter(name = "LemmaCacheFileLocation", mandatory = false, defaultValue = {"/org/apache/ctakes/lvg/2005_lemma.voc"}, description = "Path to lemma cache file -- if useLemmaCache and postLemmas are true")
    private String lemmaCacheFileLocation = null;

    @ConfigurationParameter(name = PARAM_XT_MAP, mandatory = false, description = "Mapping from Xerox parts of speech to Treebank equivalents")
    private String[] xtMaps = defaultTreebankMap;

    @ConfigurationParameter(name = PARAM_EXCLUSION_WORDS, mandatory = false, description = "Words to exclude when doing LVG normalization")
    private String[] wordsToExclude = defaultExclusionWords;
    private Logger logger = Logger.getLogger(getClass().getName());

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:org/apache/ctakes/lvg/ae/LvgAnnotator$LemmaLocalClass.class */
    public class LemmaLocalClass {
        public String word;
        public Set<String> posSet;

        LemmaLocalClass() {
        }
    }

    public void initialize(UimaContext uimaContext) throws ResourceInitializationException {
        super.initialize(uimaContext);
        configInit();
        try {
            this.lvgCmd = this.lvgResource.getLvg();
            if (this.useCmdCache) {
                this.logger.info("Loading Cmd cache=" + this.cmdCacheFileLocation);
                loadCmdCacheFile(this.cmdCacheFileLocation);
                this.logger.info("Loaded " + this.normCacheMap.size() + " entries");
            }
            if (this.postLemmas) {
                this.lvgLexItem = this.lvgResource.getLvgLex();
                if (this.useLemmaCache) {
                    this.logger.info("Loading Lemma cache=" + this.lemmaCacheFileLocation);
                    loadLemmaCacheFile(this.lemmaCacheFileLocation);
                    this.logger.info("Loaded " + this.lemmaCacheMap.size() + " entries");
                }
            }
        } catch (IOException e) {
            throw new ResourceInitializationException(e);
        }
    }

    private void configInit() {
        this.skipSegmentsSet = new HashSet();
        for (int i = 0; i < this.skipSegmentIDs.length; i++) {
            this.skipSegmentsSet.add(this.skipSegmentIDs[i]);
        }
        this.xeroxTreebankMap = new HashMap();
        for (int i2 = 0; i2 < this.xtMaps.length; i2++) {
            StringTokenizer stringTokenizer = new StringTokenizer(this.xtMaps[i2], "|");
            if (stringTokenizer.countTokens() == 2) {
                this.xeroxTreebankMap.put(stringTokenizer.nextToken(), stringTokenizer.nextToken());
            }
        }
        this.exclusionSet = new HashSet();
        for (int i3 = 0; i3 < this.wordsToExclude.length; i3++) {
            this.exclusionSet.add(this.wordsToExclude[i3]);
        }
    }

    public void process(JCas jCas) throws AnalysisEngineProcessException {
        this.logger.info("process(JCas)");
        String documentText = jCas.getDocumentText();
        try {
            if (this.useSegments) {
                FSIterator it = jCas.getJFSIndexRepository().getAnnotationIndex(Segment.type).iterator();
                while (it.hasNext()) {
                    Segment segment = (Segment) it.next();
                    if (!this.skipSegmentsSet.contains(segment.getId())) {
                        annotateRange(jCas, documentText, segment.getBegin(), segment.getEnd());
                    }
                }
            } else {
                annotateRange(jCas, documentText, 0, documentText.length());
            }
        } catch (Exception e) {
            throw new AnalysisEngineProcessException(e);
        }
    }

    protected void annotateRange(JCas jCas, String str, int i, int i2) throws AnalysisEngineProcessException {
        FSIterator it = jCas.getJFSIndexRepository().getAnnotationIndex(WordToken.type).iterator();
        while (it.hasNext()) {
            WordToken wordToken = (WordToken) it.next();
            if (wordToken.getBegin() >= i && wordToken.getEnd() <= i2) {
                String substring = str.substring(wordToken.getBegin(), wordToken.getEnd());
                String suggestion = wordToken.getSuggestion();
                if (suggestion != null && suggestion.length() > 0) {
                    substring = suggestion;
                }
                if (!this.exclusionSet.contains(substring)) {
                    setCanonicalForm(wordToken, substring);
                    if (this.postLemmas) {
                        setLemma(wordToken, substring, jCas);
                    }
                }
            }
        }
    }

    private void setCanonicalForm(WordToken wordToken, String str) throws AnalysisEngineProcessException {
        String str2 = null;
        if (this.useCmdCache) {
            str2 = this.normCacheMap.get(str);
            if (str2 == null) {
            }
        }
        if (str2 == null) {
            try {
                String[] split = this.lvgCmd.MutateToString(str).split("\\|");
                if (split != null && split.length >= 2 && !split[1].matches("No Output")) {
                    str2 = split[1];
                }
            } catch (Exception e) {
                throw new AnalysisEngineProcessException(e);
            }
        }
        if (str2 != null) {
            wordToken.setCanonicalForm(str2);
        }
    }

    /* JADX WARN: Multi-variable type inference failed */
    /* JADX WARN: Type inference failed for: r0v73, types: [java.util.Set] */
    private void setLemma(WordToken wordToken, String str, JCas jCas) throws AnalysisEngineProcessException {
        Set<LemmaLocalClass> set;
        HashMap hashMap = null;
        if (this.useLemmaCache && (set = this.lemmaCacheMap.get(str)) != null) {
            hashMap = new HashMap();
            for (LemmaLocalClass lemmaLocalClass : set) {
                hashMap.put(lemmaLocalClass.word, lemmaLocalClass.posSet);
            }
        }
        if (hashMap == null) {
            hashMap = new HashMap();
            try {
                Iterator it = this.lvgLexItem.MutateLexItem(str).iterator();
                while (it.hasNext()) {
                    LexItem lexItem = (LexItem) it.next();
                    Category GetTargetCategory = lexItem.GetTargetCategory();
                    String GetTargetTerm = lexItem.GetTargetTerm();
                    for (long j : Category.ToValuesArray(GetTargetCategory.GetValue())) {
                        String str2 = this.xeroxTreebankMap.get(Category.ToName(j));
                        if (str2 != null) {
                            HashSet hashSet = hashMap.containsKey(GetTargetTerm) ? (Set) hashMap.get(GetTargetTerm) : new HashSet();
                            hashSet.add(str2);
                            hashMap.put(GetTargetTerm, hashSet);
                        }
                    }
                }
            } catch (Exception e) {
                throw new AnalysisEngineProcessException(e);
            }
        }
        ArrayList arrayList = new ArrayList(hashMap.keySet().size());
        for (String str3 : hashMap.keySet()) {
            for (String str4 : (Set) hashMap.get(str3)) {
                Lemma lemma = new Lemma(jCas);
                lemma.setKey(str3);
                lemma.setPosTag(str4);
                arrayList.add(lemma);
            }
        }
        wordToken.setLemmaEntries(ListFactory.buildList(jCas, (Lemma[]) arrayList.toArray(new Lemma[arrayList.size()])));
    }

    private void loadCmdCacheFile(String str) throws FileNotFoundException, IOException {
        InputStream resourceAsStream = getClass().getResourceAsStream(str);
        Throwable th = null;
        try {
            BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(resourceAsStream));
            Throwable th2 = null;
            try {
                try {
                    this.normCacheMap = new HashMap();
                    for (String readLine = bufferedReader.readLine(); readLine != null; readLine = bufferedReader.readLine()) {
                        StringTokenizer stringTokenizer = new StringTokenizer(readLine, "|");
                        if (stringTokenizer.countTokens() != 7) {
                            this.logger.warn("Invalid LVG norm cache line: " + readLine);
                        } else if (Integer.parseInt(stringTokenizer.nextToken()) > this.cmdCacheFreqCutoff) {
                            String nextToken = stringTokenizer.nextToken();
                            String nextToken2 = stringTokenizer.nextToken();
                            if (!this.normCacheMap.containsKey(nextToken)) {
                                this.normCacheMap.put(nextToken, nextToken2);
                            }
                        } else {
                            this.logger.debug("Discarding norm cache line due to frequency cutoff: " + readLine);
                        }
                    }
                    if (bufferedReader != null) {
                        if (0 != 0) {
                            try {
                                bufferedReader.close();
                            } catch (Throwable th3) {
                                th2.addSuppressed(th3);
                            }
                        } else {
                            bufferedReader.close();
                        }
                    }
                    if (resourceAsStream != null) {
                        if (0 == 0) {
                            resourceAsStream.close();
                            return;
                        }
                        try {
                            resourceAsStream.close();
                        } catch (Throwable th4) {
                            th.addSuppressed(th4);
                        }
                    }
                } catch (Throwable th5) {
                    th2 = th5;
                    throw th5;
                }
            } catch (Throwable th6) {
                if (bufferedReader != null) {
                    if (th2 != null) {
                        try {
                            bufferedReader.close();
                        } catch (Throwable th7) {
                            th2.addSuppressed(th7);
                        }
                    } else {
                        bufferedReader.close();
                    }
                }
                throw th6;
            }
        } catch (Throwable th8) {
            if (resourceAsStream != null) {
                if (0 != 0) {
                    try {
                        resourceAsStream.close();
                    } catch (Throwable th9) {
                        th.addSuppressed(th9);
                    }
                } else {
                    resourceAsStream.close();
                }
            }
            throw th8;
        }
    }

    private void loadLemmaCacheFile(String str) throws FileNotFoundException, IOException {
        InputStream resourceAsStream = getClass().getResourceAsStream(str);
        Throwable th = null;
        try {
            BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(resourceAsStream));
            Throwable th2 = null;
            try {
                try {
                    this.lemmaCacheMap = new HashMap();
                    for (String readLine = bufferedReader.readLine(); readLine != null; readLine = bufferedReader.readLine()) {
                        StringTokenizer stringTokenizer = new StringTokenizer(readLine, "|");
                        if (stringTokenizer.countTokens() != 4) {
                            this.logger.warn("Invalid LVG lemma cache line: " + readLine);
                        } else if (Integer.parseInt(stringTokenizer.nextToken()) > this.lemmaCacheFreqCutoff) {
                            String nextToken = stringTokenizer.nextToken();
                            String nextToken2 = stringTokenizer.nextToken();
                            String nextToken3 = stringTokenizer.nextToken();
                            String substring = nextToken3.substring(1, nextToken3.length() - 1);
                            LemmaLocalClass lemmaLocalClass = new LemmaLocalClass();
                            lemmaLocalClass.word = nextToken2;
                            lemmaLocalClass.posSet = new HashSet();
                            for (long j : Category.ToValuesArray(Category.ToValue(substring))) {
                                String str2 = this.xeroxTreebankMap.get(Category.ToName(j));
                                if (str2 != null) {
                                    lemmaLocalClass.posSet.add(str2);
                                }
                            }
                            Set<LemmaLocalClass> hashSet = !this.lemmaCacheMap.containsKey(nextToken) ? new HashSet() : this.lemmaCacheMap.get(nextToken);
                            hashSet.add(lemmaLocalClass);
                            this.lemmaCacheMap.put(nextToken, hashSet);
                        } else {
                            this.logger.debug("Discarding lemma cache line due to frequency cutoff: " + readLine);
                        }
                    }
                    if (bufferedReader != null) {
                        if (0 != 0) {
                            try {
                                bufferedReader.close();
                            } catch (Throwable th3) {
                                th2.addSuppressed(th3);
                            }
                        } else {
                            bufferedReader.close();
                        }
                    }
                    if (resourceAsStream != null) {
                        if (0 == 0) {
                            resourceAsStream.close();
                            return;
                        }
                        try {
                            resourceAsStream.close();
                        } catch (Throwable th4) {
                            th.addSuppressed(th4);
                        }
                    }
                } catch (Throwable th5) {
                    th2 = th5;
                    throw th5;
                }
            } catch (Throwable th6) {
                if (bufferedReader != null) {
                    if (th2 != null) {
                        try {
                            bufferedReader.close();
                        } catch (Throwable th7) {
                            th2.addSuppressed(th7);
                        }
                    } else {
                        bufferedReader.close();
                    }
                }
                throw th6;
            }
        } catch (Throwable th8) {
            if (resourceAsStream != null) {
                if (0 != 0) {
                    try {
                        resourceAsStream.close();
                    } catch (Throwable th9) {
                        th.addSuppressed(th9);
                    }
                } else {
                    resourceAsStream.close();
                }
            }
            throw th8;
        }
    }

    public static AnalysisEngineDescription createAnnotatorDescription() throws ResourceInitializationException {
        try {
            return AnalysisEngineFactory.createEngineDescription(LvgAnnotator.class, new Object[]{PARAM_USE_CMD_CACHE, false, "UseLemmaCache", false, PARAM_USE_SEGMENTS, false, "LemmaCacheFrequencyCutoff", 20, PARAM_LEMMA_FREQ_CUTOFF, 20, "PostLemmas", false, PARAM_LVGCMDAPI_RESRC_KEY, ExternalResourceFactory.createExternalResourceDescription(LvgCmdApiResourceImpl.class, new File(LvgCmdApiResourceImpl.class.getResource("/org/apache/ctakes/lvg/data/config/lvg.properties").toURI()), new Object[0])});
        } catch (URISyntaxException e) {
            throw new ResourceInitializationException(e);
        }
    }
}
