package org.apache.ctakes.gui.dictionary.util;

import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.function.Function;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.ctakes.gui.dictionary.umls.Concept;

/* loaded from: input_file:org/apache/ctakes/gui/dictionary/util/RareWordUtil.class */
public final class RareWordUtil {
    private static final Set<String> BAD_POS_TERM_SET = new HashSet(Arrays.asList("be", "has", "have", "had", "do", "does", "did", "is", "isn", "am", "are", "was", "were", "zero", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten", "and", "or", "but", "for", "nor", "so", "yet", "while", "because", "this", "that", "these", "those", "the", "an", "a", "there", "can", "should", "will", "may", "shall", "might", "must", "could", "would", "some", "many", "any", "each", "all", "few", "most", "both", "half", "none", "twice", "at", "before", "after", "behind", "beneath", "beside", "between", "into", "through", "across", "of", "concerning", "like", "unlike", "except", "with", "within", "without", "toward", "to", "past", "against", "during", "until", "throughout", "below", "besides", "beyond", "from", "inside", "near", "outside", "since", "upon", "my", "our", "your", "her", "their", "whose", "i", "you", "he", "she", "it", "them", "they", "we", "us", "mine", "yours", "his", "hers", "its", "ours", "theirs", "about", "off", "up", "along", "away", "back", "by", "down", "forward", "in", "on", "out", "over", "around", "under", "to", "what", "whatever", "which", "whichever", "who", "whom", "which", "that", "whoever", "whomever", "how", "where", "when", "however", "wherever", "whenever", "no", "not", "oh", "mr", "mrs", "miss", "dr", "as", "only", "also", "either", "neither", "whether", "eleven", "twelve", "thirteen", "fourteen", "fifteen", "sixteen", "seventeen", "eighteen", "nineteen", "twenty", "thirty", "forty", "fifty", "sixty", "seventy", "eighty", "ninety", "hundred", "thousand", "million", "billion", "trillion"));
    private static final Pattern SPACE_PATTERN = Pattern.compile("\\s+");
    public static final IndexedRareWord NULL_RARE_WORD = new IndexedRareWord(null, -1, -1);

    /* loaded from: input_file:org/apache/ctakes/gui/dictionary/util/RareWordUtil$IndexedRareWord.class */
    public static final class IndexedRareWord {
        public final String __word;
        public final int __index;
        public final int __tokenCount;

        private IndexedRareWord(String str, int i, int i2) {
            this.__word = str;
            this.__index = i;
            this.__tokenCount = i2;
        }
    }

    private RareWordUtil() {
    }

    public static Collection<String> getUnwantedPosTexts() {
        return Collections.unmodifiableCollection(BAD_POS_TERM_SET);
    }

    public static boolean isRarableToken(String str) {
        if (str.length() <= 1) {
            return false;
        }
        boolean z = false;
        int i = 0;
        while (true) {
            if (i >= str.length()) {
                break;
            }
            if (Character.isLetter(str.charAt(i))) {
                z = true;
                break;
            }
            i++;
        }
        return z && !BAD_POS_TERM_SET.contains(str);
    }

    public static Map<String, Long> getTokenCounts(Collection<Concept> collection) {
        Stream flatMap = collection.stream().map((v0) -> {
            return v0.getTexts();
        }).flatMap((v0) -> {
            return v0.stream();
        });
        Pattern pattern = SPACE_PATTERN;
        pattern.getClass();
        return (Map) flatMap.map((v1) -> {
            return r1.split(v1);
        }).flatMap((v0) -> {
            return Arrays.stream(v0);
        }).filter(RareWordUtil::isRarableToken).collect(Collectors.groupingBy(Function.identity(), Collectors.counting()));
    }

    private static void incrementCount(Map<String, Integer> map, String str) {
        Integer num = map.get(str);
        if (num == null) {
            num = 0;
        }
        map.put(str, Integer.valueOf(num.intValue() + 1));
    }

    public static IndexedRareWord getIndexedRareWord(String str, Map<String, Long> map) {
        Long l;
        String[] split = SPACE_PATTERN.split(str);
        int i = 0;
        long j = Long.MAX_VALUE;
        for (int i2 = 0; i2 < split.length; i2++) {
            if (split[i2].length() < 48 && (l = map.get(split[i2])) != null && l.longValue() < j) {
                i = i2;
                j = l.longValue();
            }
        }
        return j == Long.MAX_VALUE ? NULL_RARE_WORD : new IndexedRareWord(split[i], i, split.length);
    }
}
