package org.apache.ctakes.gui.dictionary.util;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.regex.Pattern;
import java.util.stream.Collectors;

/* loaded from: input_file:org/apache/ctakes/gui/dictionary/util/TextTokenizer.class */
public final class TextTokenizer {
    private static final String[] PREFIXES = {"e-", "a-", "u-", "x-", "agro-", "ante-", "anti-", "arch-", "be-", "bi-", "bio-", "co-", "counter-", "cross-", "cyber-", "de-", "eco-", "ex-", "extra-", "inter-", "intra-", "macro-", "mega-", "micro-", "mid-", "mini-", "multi-", "neo-", "non-", "over-", "pan-", "para-", "peri-", "post-", "pre-", "pro-", "pseudo-", "quasi-", "re-", "semi-", "sub-", "super-", "tri-", "ultra-", "un-", "uni-", "vice-", "electro-", "gasto-", "homo-", "hetero-", "ortho-", "phospho-"};
    private static final String[] SUFFIXES = {"-esque", "-ette", "-fest", "-fold", "-gate", "-itis", "-less", "-most", "-o-torium", "-rama", "-wise"};
    private static final Set<String> PREFIX_SET = new HashSet(Arrays.asList(PREFIXES));
    private static final Set<String> SUFFIX_SET = new HashSet(Arrays.asList(SUFFIXES));
    private static Pattern WHITESPACE = Pattern.compile("\\s+");

    private TextTokenizer() {
    }

    private static String getNextCharTerm(String str) {
        StringBuilder sb = new StringBuilder();
        int length = str.length();
        for (int i = 0; i < length; i++) {
            char charAt = str.charAt(i);
            if (!Character.isLetterOrDigit(charAt)) {
                return sb.toString();
            }
            sb.append(charAt);
        }
        return sb.toString();
    }

    private static boolean isPrefix(String str) {
        return PREFIX_SET.contains(str + "-");
    }

    private static boolean isSuffix(String str, int i) {
        if (str.length() <= i) {
            return false;
        }
        String nextCharTerm = getNextCharTerm(str.substring(i));
        if (nextCharTerm.isEmpty()) {
            return false;
        }
        return SUFFIX_SET.contains("-" + nextCharTerm);
    }

    private static boolean isOwnerApostrophe(CharSequence charSequence, int i) {
        return charSequence.length() == i + 1 && charSequence.charAt(i) == 's';
    }

    private static boolean isNumberDecimal(CharSequence charSequence, int i) {
        return charSequence.length() == i + 1 && Character.isDigit(charSequence.charAt(i));
    }

    public static List<String> getTokens(String str) {
        return getTokens(str, false);
    }

    public static List<String> getTokens(String str, boolean z) {
        ArrayList arrayList = new ArrayList();
        StringBuilder sb = new StringBuilder();
        int length = str.length();
        boolean z2 = false;
        for (int i = 0; i < length; i++) {
            char charAt = str.charAt(i);
            if (Character.isLetterOrDigit(charAt)) {
                if (sb.length() != 0 && z && z2 && !Character.isDigit(charAt)) {
                    arrayList.add(sb.toString());
                    sb.setLength(0);
                }
                z2 = Character.isDigit(charAt);
                sb.append(charAt);
            } else {
                z2 = false;
                if (charAt == '-' && (isPrefix(sb.toString()) || isSuffix(str, i + 1))) {
                    sb.append(charAt);
                } else if ((charAt == '\'' && isOwnerApostrophe(str, i + 1)) || (charAt == '.' && isNumberDecimal(str, i + 1))) {
                    if (sb.length() != 0) {
                        arrayList.add(sb.toString());
                        sb.setLength(0);
                    }
                    sb.append(charAt);
                } else {
                    if (sb.length() != 0) {
                        arrayList.add(sb.toString());
                        sb.setLength(0);
                    }
                    arrayList.add("" + charAt);
                }
            }
        }
        if (sb.length() != 0) {
            arrayList.add(sb.toString());
        }
        return arrayList;
    }

    public static String getTokenizedText(String str) {
        return getTokenizedText(str, false);
    }

    public static String getTokenizedText(String str, boolean z) {
        if (str.isEmpty()) {
            return str;
        }
        String[] split = WHITESPACE.split(str.toLowerCase());
        if (split.length == 0) {
            return "";
        }
        String str2 = split[split.length - 1];
        if (str2.endsWith(",") || str2.endsWith(";") || str2.endsWith(".")) {
            split[split.length - 1] = str2.substring(0, str2.length() - 1);
        }
        return (String) Arrays.stream(split).map(str3 -> {
            return getTokens(str3, z);
        }).flatMap((v0) -> {
            return v0.stream();
        }).collect(Collectors.joining(" "));
    }
}
