package smile.nlp.tokenizer;

import java.util.ArrayList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import smile.nlp.dictionary.EnglishDictionary;

/* loaded from: input_file:smile/nlp/tokenizer/SimpleSentenceSplitter.class */
public class SimpleSentenceSplitter implements SentenceSplitter {
    private static final Pattern REGEX_CARRIAGE_RETURN = Pattern.compile("[\\n\\r]+");
    private static final Pattern REGEX_FORGOTTEN_SPACE = Pattern.compile("(.)([\\.!?])([\\D&&\\S&&[^\\.\"'`\\)\\}\\]]])");
    private static final Pattern REGEX_SENTENCE = Pattern.compile("(['\"`]*[\\(\\{\\[]?[a-zA-Z0-9]+.*?)([\\.!?:])(?:(?=([\\(\\[\\{\"'`\\)\\}\\]<]*[ \u0019]+)[\\(\\[\\{\"'`\\)\\}\\] ]*([A-Z0-9][a-z]*))|(?=([\\(\\)\"'`\\)\\}<\\] \u0019]+)\\s))");
    private static final Pattern REGEX_WHITESPACE = Pattern.compile("\\s+");
    private static final Pattern REGEX_LAST_WORD = Pattern.compile("\\b([\\w0-9\\.']+)$");
    private static SimpleSentenceSplitter singleton = new SimpleSentenceSplitter();

    private SimpleSentenceSplitter() {
    }

    public static SimpleSentenceSplitter getInstance() {
        return singleton;
    }

    @Override // smile.nlp.tokenizer.SentenceSplitter
    public String[] split(String str) {
        ArrayList arrayList = new ArrayList();
        int i = 0;
        String str2 = REGEX_FORGOTTEN_SPACE.matcher(REGEX_CARRIAGE_RETURN.matcher(str).replaceAll(" ").replace((char) 25, ' ')).replaceAll("$1$2\u0019$3") + "\n";
        Matcher matcher = REGEX_SENTENCE.matcher(str2);
        StringBuilder sb = new StringBuilder();
        int i2 = 0;
        while (matcher.find()) {
            i2 = matcher.end();
            String trim = matcher.group(1).trim();
            String group = matcher.group(2);
            String group2 = matcher.group(3);
            if (group2 == null) {
                group2 = matcher.group(5);
                if (group2 == null) {
                    group2 = "";
                } else {
                    i2 = matcher.end(5);
                }
            } else {
                i2 = matcher.end(3);
            }
            i += REGEX_WHITESPACE.split(trim).length;
            String group3 = matcher.group(4);
            if (group3 == null) {
                group3 = "";
            }
            if (group.compareTo(".") == 0) {
                Matcher matcher2 = REGEX_LAST_WORD.matcher(trim);
                String group4 = matcher2.find() ? matcher2.group() : "";
                if ((group4.matches(".*[AEIOUaeiou]+.*") || !group4.matches(".*[a-z]+.*") || group4.matches(".*[y]+.*")) && !group4.matches("([a-zA-Z][\\.])+") && ((!group4.matches("^[A-Za-z]$") || group4.matches("^[I]$")) && !EnglishAbbreviations.contains(group4.toLowerCase()))) {
                    sb.append(trim);
                    sb.append(group);
                    sb.append(group2.trim());
                    arrayList.add(sb.toString());
                    sb = new StringBuilder();
                    i = 0;
                } else if (!EnglishDictionary.CONCISE.contains(group3) || i <= 6) {
                    sb.append(trim);
                    sb.append(group);
                    if (group2.indexOf(25) == -1) {
                        sb.append(' ');
                    }
                } else {
                    sb.append(trim);
                    sb.append(group);
                    sb.append(group2.trim());
                    arrayList.add(sb.toString());
                    sb = new StringBuilder();
                    i = 0;
                }
            } else if (group.matches("[!?]") || (group.compareTo(":") == 0 && i > 6)) {
                sb.append(trim);
                sb.append(group);
                sb.append(group2.trim());
                arrayList.add(sb.toString());
                sb = new StringBuilder();
                i = 0;
            } else {
                sb.append(trim);
                sb.append(group);
                if (group2.indexOf(25) == -1) {
                    sb.append(' ');
                }
            }
        }
        if (i2 < str2.length()) {
            String trim2 = str2.substring(i2).trim();
            if (!trim2.isEmpty()) {
                arrayList.add(trim2);
            }
        }
        String[] strArr = new String[arrayList.size()];
        for (int i3 = 0; i3 < strArr.length; i3++) {
            strArr[i3] = ((String) arrayList.get(i3)).replaceAll("\u0019", "");
        }
        return strArr;
    }
}
