package uk.ac.shef.dcs.sti.util;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.regex.Pattern;
import org.apache.jena.sparql.sse.Tags;
import org.apache.xalan.templates.Constants;
import org.apache.zookeeper.server.quorum.QuorumStats;
import uk.ac.shef.dcs.util.StringUtils;

/* loaded from: input_file:uk/ac/shef/dcs/sti/util/DataTypeClassifier.class */
public class DataTypeClassifier implements Serializable {
    private static List<String> MONTHS = new ArrayList(Arrays.asList("january", "jan", "february", "feb", "march", "mar", "april", "apr", "may", "june", "jun", "july", "jul", "august", "aug", "september", "sep", "october", "oct", "november", "nov", "december", "dec"));
    private static Pattern[] DATES = {Pattern.compile("([0-9]{4})[\\.\\/\\-]([0-9]{1,2})[\\.\\/\\-]([0-9]{1,2})"), Pattern.compile("^(?=\\d)(?:(?!(?:(?:0?[5-9]|1[0-4])(?:\\.|-|\\/)10(?:\\.|-|\\/)(?:1582))|(?:(?:0?[3-9]|1[0-3])(?:\\.|-|\\/)0?9(?:\\.|-|\\/)(?:1752)))(31(?!(?:\\.|-|\\/)(?:0?[2469]|11))|30(?!(?:\\.|-|\\/)0?2)|(?:29(?:(?!(?:\\.|-|\\/)0?2(?:\\.|-|\\/))|(?=\\D0?2\\D(?:(?!000[04]|(?:(?:1[^0-6]|[2468][^048]|[3579][^26])00))(?:(?:(?:\\d\\d)(?:[02468][048]|[13579][26])(?!\\x20BC))|(?:00(?:42|3[0369]|2[147]|1[258]|09)\\x20BC))))))|2[0-8]|1\\d|0?[1-9])([-.\\/])(1[012]|(?:0?[1-9]))\\2((?=(?:00(?:4[0-5]|[0-3]?\\d)\\x20BC)|(?:\\d{4}(?:$|(?=\\x20\\d)\\x20)))\\d{4}(?:\\x20BC)?)(?:$|(?=\\x20\\d)\\x20))?((?:(?:0?[1-9]|1[012])(?::[0-5]\\d){0,2}(?:\\x20[aApP][mM]))|(?:[01]\\d|2[0-3])(?::[0-5]\\d){1,2})?$"), Pattern.compile("^(?:(?:(?:(?:(?:1[6-9]|[2-9]\\d)?(?:0[48]|[2468][048]|[13579][26])|(?:(?:16|[2468][048]|[3579][26])00)))(\\/|-|\\.)(?:0?2\\1(?:29)))|(?:(?:(?:1[6-9]|[2-9]\\d)?\\d{2})(\\/|-|\\.)(?:(?:(?:0?[13578]|1[02])\\2(?:31))|(?:(?:0?[1,3-9]|1[0-2])\\2(29|30))|(?:(?:0?[1-9])|(?:1[0-2]))\\2(?:0?[1-9]|1\\d|2[0-8]))))$"), Pattern.compile("^(((0?[1-9]|[12]\\d|3[01])[\\.\\-\\/](0?[13578]|1[02])[\\.\\-\\/]((1[6-9]|[2-9]\\d)?\\d{2}))|((0?[1-9]|[12]\\d|30)[\\.\\-\\/](0?[13456789]|1[012])[\\.\\-\\/]((1[6-9]|[2-9]\\d)?\\d{2}))|((0?[1-9]|1\\d|2[0-8])[\\.\\-\\/]0?2[\\.\\-\\/]((1[6-9]|[2-9]\\d)?\\d{2}))|(29[\\.\\-\\/]0?2[\\.\\-\\/]((1[6-9]|[2-9]\\d)?(0[48]|[2468][048]|[13579][26])|((16|[2468][048]|[3579][26])00)|00)))$"), Pattern.compile("^([0]?[1-9]|[1][0-2])[./-]([0]?[1-9]|[1|2][0-9]|[3][0|1])[./-]([0-9]{4}|[0-9]{2})$")};

    /* loaded from: input_file:uk/ac/shef/dcs/sti/util/DataTypeClassifier$DataType.class */
    public enum DataType implements Serializable {
        UNKNOWN(QuorumStats.Provider.UNKNOWN_STATE),
        EMPTY(Constants.ELEMNAME_EMPTY_STRING),
        ORDERED_NUMBER("onumber"),
        NUMBER("number"),
        DATE("date"),
        SHORT_TEXT("stext"),
        LONG_TEXT("ltext"),
        LONG_STRING("lstring"),
        NAMED_ENTITY(Tags.tagNE);

        private String value;
        private static final long serialVersionUID = -1208425578110405913L;

        DataType(String str) {
            this.value = str;
        }

        public String getValue() {
            return this.value;
        }

        public void setValue(String str) {
            this.value = str;
        }
    }

    public static boolean isOrderedNumber(String... strArr) {
        int i = 0;
        int i2 = Integer.MIN_VALUE;
        for (String str : strArr) {
            if (str.length() != 0) {
                try {
                    int intValue = Integer.valueOf(str).intValue();
                    if (i2 == Integer.MIN_VALUE) {
                        i2 = intValue;
                    } else {
                        if (intValue != i2 + 1 && intValue != i2) {
                            i++;
                        }
                        i2 = intValue;
                    }
                } catch (NumberFormatException e) {
                    return false;
                }
            }
        }
        int length = (int) (0.2d * strArr.length);
        return i <= (length < 2 ? 2 : length) && ((double) i) <= ((double) strArr.length) * 0.5d;
    }

    public static DataType classify(String str) {
        String trim = str.trim();
        for (Pattern pattern : DATES) {
            if (pattern.matcher(trim).matches()) {
                return DataType.DATE;
            }
        }
        String[] split = trim.split("\\s+");
        if (split.length == 1 && split[0].length() > 25) {
            return DataType.LONG_STRING;
        }
        String trim2 = StringUtils.toAlphaNumericWhitechar(trim).trim();
        if (trim2.length() < 1) {
            return DataType.EMPTY;
        }
        String[] split2 = trim2.split("\\s+");
        int i = 0;
        for (String str2 : split2) {
            if (MONTHS.contains(str2.toLowerCase())) {
                i++;
            }
        }
        if (i > split2.length * 0.2d) {
            return DataType.DATE;
        }
        if (StringUtils.isNumericArray(split2)) {
            for (String str3 : split2) {
                if (str3.equals("AD") || str3.equals("BC") || str3.equals("A.D.") || str3.equals("B.C.")) {
                    return DataType.DATE;
                }
            }
            try {
                Integer valueOf = Integer.valueOf(trim2.trim());
                if (valueOf.intValue() >= 1800 && valueOf.intValue() < 2050) {
                    return DataType.DATE;
                }
            } catch (NumberFormatException e) {
            }
            return DataType.NUMBER;
        }
        if (split2.length >= 10) {
            return (split2.length >= 15 || (split2.length >= 5 && isLikelySentence(str))) ? DataType.LONG_TEXT : DataType.UNKNOWN;
        }
        int i2 = 0;
        int i3 = 0;
        for (int i4 = 0; i4 < split2.length; i4++) {
            if (StringUtils.isCapitalized(split2[i4])) {
                i2++;
            } else if (Character.isLowerCase(split2[i4].charAt(0))) {
                i3++;
            }
        }
        boolean z = false;
        boolean z2 = false;
        if (StringUtils.isCapitalized(split2[0])) {
            z = true;
        } else if (isCapitalizedIDString(split2[0])) {
            z = true;
        }
        if (StringUtils.isCapitalized(split2[split2.length - 1])) {
            z2 = true;
        } else if (isCapitalizedIDString(split2[split2.length - 1])) {
            z2 = true;
        }
        return (!z || i2 < split2.length - i2) ? (z && z2) ? DataType.NAMED_ENTITY : !isLikelySentence(str) ? DataType.SHORT_TEXT : DataType.LONG_TEXT : DataType.NAMED_ENTITY;
    }

    private static boolean isLikelySentence(String str) {
        return str.endsWith(".") || str.endsWith("?") || str.endsWith("!");
    }

    public static boolean isCapitalizedIDString(String str) {
        boolean z = true;
        int i = 0;
        int i2 = 0;
        while (true) {
            if (i2 >= str.length()) {
                break;
            }
            if (Character.isAlphabetic(str.charAt(i2))) {
                i++;
                if (!Character.isUpperCase(str.charAt(i2))) {
                    z = false;
                    break;
                }
            }
            i2++;
        }
        if (i > 0) {
            return z;
        }
        return false;
    }
}
