package org.apache.ctakes.core.ae;

import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.function.Function;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import org.apache.ctakes.core.pipeline.PipeBitInfo;
import org.apache.ctakes.core.util.Pair;
import org.apache.ctakes.core.util.regex.RegexSpanFinder;
import org.apache.ctakes.core.util.regex.TimeoutMatcher;
import org.apache.ctakes.typesystem.type.textspan.Segment;
import org.apache.log4j.Logger;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;

@PipeBitInfo(name = "Regex Sectionizer (A)", description = "Annotates Document Sections by detecting Section Headers using Regular Expressions.", products = {PipeBitInfo.TypeProduct.SECTION})
/* loaded from: input_file:org/apache/ctakes/core/ae/RegexSectionizer.class */
public abstract class RegexSectionizer extends JCasAnnotator_ImplBase {
    public static final String PARAM_TAG_DIVIDERS = "TagDividers";

    @ConfigurationParameter(name = PARAM_TAG_DIVIDERS, description = "True if lines of divider characters ____ , ---- , === should divide sections", defaultValue = {"true"}, mandatory = false)
    private boolean _tagDividers = true;
    private static final String DEFAULT_SEGMENT_ID = "SIMPLE_SEGMENT";
    private static final String SECTION_NAME_EX = "SECTION_NAME";
    private static final Logger LOGGER = Logger.getLogger("RegexSectionizer");
    private static final Pattern DIVIDER_LINE_PATTERN = Pattern.compile("^[_\\-=]{4,}\\r?\\n");
    public static final String DIVIDER_LINE_NAME = "DIVIDER_LINE";
    protected static final SectionTag LINE_DIVIDER_TAG = new SectionTag(DIVIDER_LINE_NAME, DIVIDER_LINE_NAME, TagType.DIVIDER);
    private static final Map<String, SectionType> _sectionTypes = new ConcurrentHashMap();

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:org/apache/ctakes/core/ae/RegexSectionizer$SectionTag.class */
    public static final class SectionTag {
        private final String __name;
        private final String __typeName;
        private final TagType __tagType;

        private SectionTag(String str, String str2, TagType tagType) {
            this.__name = str;
            this.__typeName = str2;
            this.__tagType = tagType;
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    /* loaded from: input_file:org/apache/ctakes/core/ae/RegexSectionizer$SectionType.class */
    public static final class SectionType {
        private static final SectionType DEFAULT_TYPE = new SectionType("SIMPLE_SEGMENT", null, null, true);
        private final String __name;
        private final Pattern __headerPattern;
        private final Pattern __footerPattern;
        private final boolean __shouldParse;

        public SectionType(String str, String str2, String str3, boolean z) {
            this.__name = str;
            this.__headerPattern = str2 == null ? null : Pattern.compile(str2, 10);
            this.__footerPattern = str3 == null ? null : Pattern.compile(str3, 10);
            this.__shouldParse = z;
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:org/apache/ctakes/core/ae/RegexSectionizer$TagType.class */
    public enum TagType {
        HEADER,
        FOOTER,
        DIVIDER
    }

    public static boolean shouldParseSegment(String str) {
        return _sectionTypes.getOrDefault(str, SectionType.DEFAULT_TYPE).__shouldParse;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public static void addSectionType(SectionType sectionType) {
        _sectionTypes.put(sectionType.__name, sectionType);
    }

    public static Map<String, SectionType> getSectionTypes() {
        return Collections.unmodifiableMap(_sectionTypes);
    }

    public void initialize(UimaContext uimaContext) throws ResourceInitializationException {
        super.initialize(uimaContext);
        loadSections();
    }

    public void process(JCas jCas) throws AnalysisEngineProcessException {
        LOGGER.info("Starting processing");
        if (_sectionTypes.isEmpty()) {
            LOGGER.info("Finished processing, no section types defined");
            return;
        }
        String documentText = jCas.getDocumentText();
        Map<Pair<Integer>, SectionTag> findHeaderTags = findHeaderTags(documentText);
        if (findHeaderTags.isEmpty()) {
            LOGGER.debug("No section headers found");
        }
        Map<Pair<Integer>, SectionTag> findFooterTags = findFooterTags(documentText);
        HashMap hashMap = new HashMap();
        if (this._tagDividers) {
            hashMap.putAll(findDividerLines(documentText));
        }
        createSegments(jCas, findHeaderTags, findFooterTags, hashMap);
        LOGGER.info("Finished processing");
    }

    protected abstract void loadSections() throws ResourceInitializationException;

    private static Map<Pair<Integer>, SectionTag> findHeaderTags(String str) {
        HashMap hashMap = new HashMap();
        for (SectionType sectionType : _sectionTypes.values()) {
            if (sectionType.__headerPattern != null) {
                hashMap.putAll(findSectionTags(str, sectionType.__name, sectionType.__headerPattern, TagType.HEADER));
            }
        }
        return hashMap;
    }

    private static Map<Pair<Integer>, SectionTag> findFooterTags(String str) {
        HashMap hashMap = new HashMap();
        for (SectionType sectionType : _sectionTypes.values()) {
            if (sectionType.__footerPattern != null) {
                hashMap.putAll(findSectionTags(str, sectionType.__name, sectionType.__footerPattern, TagType.FOOTER));
            }
        }
        return hashMap;
    }

    static Map<Pair<Integer>, SectionTag> findSectionTags(String str, String str2, Pattern pattern, TagType tagType) {
        String str3;
        HashMap hashMap = new HashMap();
        try {
            TimeoutMatcher timeoutMatcher = new TimeoutMatcher(pattern, str);
            Throwable th = null;
            try {
                try {
                    for (Matcher nextMatch = timeoutMatcher.nextMatch(); nextMatch != null; nextMatch = timeoutMatcher.nextMatch()) {
                        Pair pair = new Pair(Integer.valueOf(nextMatch.start()), Integer.valueOf(nextMatch.end()));
                        try {
                            str3 = nextMatch.group(SECTION_NAME_EX);
                            if (str3 == null || str3.isEmpty()) {
                                str3 = str2;
                            }
                        } catch (IllegalArgumentException e) {
                            str3 = str2;
                        }
                        hashMap.put(pair, new SectionTag(str3, str2, tagType));
                    }
                    if (timeoutMatcher != null) {
                        if (0 != 0) {
                            try {
                                timeoutMatcher.close();
                            } catch (Throwable th2) {
                                th.addSuppressed(th2);
                            }
                        } else {
                            timeoutMatcher.close();
                        }
                    }
                } finally {
                }
            } finally {
            }
        } catch (IllegalArgumentException e2) {
            LOGGER.error(e2.getMessage());
        }
        return hashMap;
    }

    private static void createSegments(JCas jCas, Map<Pair<Integer>, SectionTag> map, Map<Pair<Integer>, SectionTag> map2, Map<Pair<Integer>, SectionTag> map3) {
        String documentText = jCas.getDocumentText();
        HashMap hashMap = new HashMap(map.size() + map2.size());
        hashMap.putAll(map);
        hashMap.putAll(map2);
        hashMap.putAll(map3);
        if (hashMap.isEmpty()) {
            Segment segment = new Segment(jCas, 0, documentText.length() - 1);
            segment.setId("SIMPLE_SEGMENT");
            segment.setPreferredText("SIMPLE_SEGMENT");
            segment.addToIndexes();
            return;
        }
        List<Pair<Integer>> createBoundsList = createBoundsList(hashMap.keySet());
        Pair<Integer> pair = createBoundsList.get(0);
        if (pair.getValue1().intValue() > 0) {
            int intValue = pair.getValue1().intValue();
            if (!documentText.substring(0, intValue).trim().isEmpty()) {
                Segment segment2 = new Segment(jCas, 0, intValue);
                segment2.setId("SIMPLE_SEGMENT");
                segment2.setPreferredText("SIMPLE_SEGMENT");
                segment2.addToIndexes();
            }
        }
        int size = createBoundsList.size();
        for (int i = 0; i < size; i++) {
            Pair<Integer> pair2 = createBoundsList.get(i);
            int intValue2 = pair2.getValue2().intValue();
            int intValue3 = i + 1 < size ? createBoundsList.get(i + 1).getValue1().intValue() : documentText.length();
            if (intValue3 - intValue2 > 1 && !documentText.substring(intValue2, intValue3).trim().isEmpty()) {
                while (Character.isWhitespace(documentText.charAt(intValue2))) {
                    intValue2++;
                }
                SectionTag sectionTag = (SectionTag) hashMap.get(pair2);
                Segment segment3 = new Segment(jCas, intValue2, intValue3);
                if (sectionTag.__tagType == TagType.HEADER) {
                    segment3.setId(sectionTag.__typeName);
                    segment3.setPreferredText(sectionTag.__name);
                } else {
                    segment3.setId("SIMPLE_SEGMENT");
                    segment3.setPreferredText("SIMPLE_SEGMENT");
                }
                segment3.addToIndexes();
            }
        }
    }

    private static List<Pair<Integer>> createBoundsList(Collection<Pair<Integer>> collection) {
        ArrayList arrayList = new ArrayList(collection);
        arrayList.sort((pair, pair2) -> {
            return ((Integer) pair.getValue1()).intValue() - ((Integer) pair2.getValue2()).intValue();
        });
        HashSet hashSet = new HashSet();
        for (int i = 0; i < arrayList.size() - 1; i++) {
            Pair pair3 = (Pair) arrayList.get(i);
            int i2 = i + 1;
            while (true) {
                if (i2 < arrayList.size()) {
                    Pair pair4 = (Pair) arrayList.get(i2);
                    if (((Integer) pair4.getValue1()).intValue() < ((Integer) pair3.getValue2()).intValue()) {
                        if (((Integer) pair3.getValue2()).intValue() >= ((Integer) pair4.getValue2()).intValue()) {
                            hashSet.add(pair4);
                            break;
                        }
                        if (((Integer) pair3.getValue1()).intValue() >= ((Integer) pair4.getValue1()).intValue() && ((Integer) pair4.getValue2()).intValue() > ((Integer) pair3.getValue2()).intValue()) {
                            hashSet.add(pair3);
                            break;
                        }
                        i2++;
                    }
                }
            }
        }
        arrayList.removeAll(hashSet);
        return arrayList;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public static boolean isBoolean(String str) {
        String lowerCase = str.trim().toLowerCase();
        return lowerCase.equalsIgnoreCase("true") || lowerCase.equalsIgnoreCase("false");
    }

    private static Map<Pair<Integer>, SectionTag> findDividerLines(String str) {
        Function function = pair -> {
            return LINE_DIVIDER_TAG;
        };
        try {
            RegexSpanFinder regexSpanFinder = new RegexSpanFinder(DIVIDER_LINE_PATTERN);
            Throwable th = null;
            try {
                try {
                    Map<Pair<Integer>, SectionTag> map = (Map) regexSpanFinder.findSpans(str).stream().collect(Collectors.toMap(Function.identity(), function));
                    if (regexSpanFinder != null) {
                        if (0 != 0) {
                            try {
                                regexSpanFinder.close();
                            } catch (Throwable th2) {
                                th.addSuppressed(th2);
                            }
                        } else {
                            regexSpanFinder.close();
                        }
                    }
                    return map;
                } finally {
                }
            } finally {
            }
        } catch (IllegalArgumentException e) {
            return Collections.emptyMap();
        }
    }
}
