package com.microsoft.recognizers.text.matcher;

import com.microsoft.recognizers.text.utilities.StringUtility;
import java.util.ArrayList;
import java.util.List;

/* loaded from: input_file:com/microsoft/recognizers/text/matcher/SimpleTokenizer.class */
public class SimpleTokenizer implements ITokenizer {
    @Override // com.microsoft.recognizers.text.matcher.ITokenizer
    public List<Token> tokenize(String str) {
        ArrayList arrayList = new ArrayList();
        if (StringUtility.isNullOrEmpty(str)) {
            return arrayList;
        }
        boolean z = false;
        int i = 0;
        char[] charArray = str.toCharArray();
        for (int i2 = 0; i2 < charArray.length; i2++) {
            char c = charArray[i2];
            if (Character.isWhitespace(c)) {
                if (z) {
                    arrayList.add(new Token(i, i2 - i, str.substring(i, i2)));
                    z = false;
                }
            } else if (!Character.isLetterOrDigit(c) || isCjk(c)) {
                if (z) {
                    arrayList.add(new Token(i, i2 - i, str.substring(i, i2)));
                    z = false;
                }
                arrayList.add(new Token(i2, 1, str.substring(i2, i2 + 1)));
            } else if (!z) {
                i = i2;
                z = true;
            }
        }
        if (z) {
            arrayList.add(new Token(i, charArray.length - i, str.substring(i)));
        }
        return arrayList;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public boolean isChinese(char c) {
        return (c >= 19968 && c <= 40895) || (c >= 13312 && c <= 19903);
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public boolean isJapanese(char c) {
        return (c >= 12352 && c <= 12447) || (c >= 12448 && c <= 12543) || (c >= 65382 && c <= 65437);
    }

    protected boolean isKorean(char c) {
        return (c >= 44032 && c <= 55215) || (c >= 4352 && c <= 4607) || ((c >= 12592 && c <= 12687) || (c >= 65456 && c <= 65500));
    }

    private boolean isCjk(char c) {
        return isChinese(c) || isJapanese(c) || isKorean(c);
    }
}
