package edu.pitt.dbmi.nlp.noble.tools;

import edu.pitt.dbmi.nlp.noble.util.FileTools;
import edu.pitt.dbmi.nlp.noble.util.Sender;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.lang.reflect.Array;
import java.net.URL;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Date;
import java.util.Formatter;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.StringTokenizer;
import java.util.TreeSet;
import java.util.regex.Pattern;

/* loaded from: input_file:edu/pitt/dbmi/nlp/noble/tools/TextTools.class */
public class TextTools {
    public static final String DEFAULT_TEXT_TOOLS_URL = "http://slidetutor.upmc.edu/term/servlet/TextToolsServlet";
    public static final int NO_VALUE = Integer.MIN_VALUE;
    private static final String separator = "\t";
    private static Map<String, String> plurals;
    private static Map<Character, String> urlEscapeCodeMap;
    private static Map<Character, String> xmlEscapeCodeMap;
    private static Map<String, String> stopWords;
    private static Map<String, String> prepostionWords;
    private static Map<String, String> commonWords;
    private static Map<String, String> timePatterns;
    private Sender sender;

    /* loaded from: input_file:edu/pitt/dbmi/nlp/noble/tools/TextTools$StringStats.class */
    public static class StringStats {
        public int upperCase;
        public int lowerCase;
        public int digits;
        public int length;
        public int whiteSpace;
        public int alphabetic;
        public boolean isCapitalized;
        public boolean isLowercase;
        public boolean isUppercase;

        public String toString() {
            return "upperCase=" + this.upperCase + ",lowerCase=" + this.lowerCase + ",digits=" + this.digits + ",length=" + this.length + ",whiteSpace=" + this.whiteSpace + ",alphabetic=" + this.alphabetic + ",isCapitalized=" + this.isCapitalized + ",isLowercase=" + this.isLowercase + ",isUppercase=" + this.isUppercase;
        }
    }

    private static Map<String, String> getPluralTable() {
        if (plurals == null) {
            plurals = loadResource("/resources/PluralTable.lst");
        }
        return plurals;
    }

    public static Set<String> getStopWords() {
        if (stopWords == null) {
            stopWords = loadResource("/resources/StopWords.lst");
        }
        return stopWords.keySet();
    }

    public static Set<String> getPrepostitionWords() {
        if (prepostionWords == null) {
            prepostionWords = loadResource("/resources/PrepositionWords.lst");
        }
        return prepostionWords.keySet();
    }

    public static Set<String> getCommonWords() {
        if (commonWords == null) {
            commonWords = new HashMap();
            Iterator<String> it = loadResource("/resources/CommonWords.lst").keySet().iterator();
            while (it.hasNext()) {
                String normalize = normalize(it.next());
                if (normalize.length() > 0) {
                    commonWords.put(normalize, "");
                }
            }
        }
        return commonWords.keySet();
    }

    public static Map<String, String> loadResourceAsMap(String str, String str2) {
        InputStream fileInputStream;
        LinkedHashMap linkedHashMap = new LinkedHashMap();
        try {
            File file = new File(str);
            fileInputStream = file.exists() ? new FileInputStream(file) : str.startsWith("http://") ? new URL(str).openStream() : TextTools.class.getResourceAsStream(str);
        } catch (IOException e) {
            e.printStackTrace();
        }
        if (fileInputStream == null) {
            System.err.println("ERROR: Could not load resource: " + str);
            return linkedHashMap;
        }
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(fileInputStream));
        for (String readLine = bufferedReader.readLine(); readLine != null; readLine = bufferedReader.readLine()) {
            String trim = readLine.trim();
            if (trim.length() > 0 && !trim.startsWith("#")) {
                String[] split = readLine.trim().split(str2);
                if (split.length >= 2) {
                    linkedHashMap.put(split[1].trim(), split[0].trim());
                } else {
                    linkedHashMap.put(split[0].trim(), null);
                }
            }
        }
        bufferedReader.close();
        fileInputStream.close();
        return linkedHashMap;
    }

    public static Map<String, String> loadResource(String str) {
        return loadResourceAsMap(str, separator);
    }

    public static List<String> loadResourceAsList(String str) {
        InputStream fileInputStream;
        ArrayList arrayList = new ArrayList();
        try {
            File file = new File(str);
            fileInputStream = file.exists() ? new FileInputStream(file) : str.startsWith("http://") ? new URL(str).openStream() : TextTools.class.getResourceAsStream(str);
        } catch (IOException e) {
            e.printStackTrace();
        }
        if (fileInputStream == null) {
            System.err.println("ERROR: Could not load resource: " + str);
            return arrayList;
        }
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(fileInputStream));
        for (String readLine = bufferedReader.readLine(); readLine != null; readLine = bufferedReader.readLine()) {
            String trim = readLine.trim();
            if (trim.length() > 0 && !trim.startsWith("#")) {
                arrayList.add(trim);
            }
        }
        bufferedReader.close();
        fileInputStream.close();
        return arrayList;
    }

    public TextTools(URL url) {
        this.sender = new Sender(url);
    }

    public TextTools() {
        try {
            this.sender = new Sender(new URL(DEFAULT_TEXT_TOOLS_URL));
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    public static boolean isPlural(String str) {
        Iterator<String> it = getPluralTable().keySet().iterator();
        while (it.hasNext()) {
            if (str.endsWith(it.next())) {
                return true;
            }
        }
        return false;
    }

    public static boolean isStopWord(String str) {
        getStopWords();
        return stopWords.containsKey(str.trim());
    }

    public static boolean isPrepositionWord(String str) {
        getPrepostitionWords();
        return prepostionWords.containsKey(str.trim());
    }

    public static boolean isCommonWord(String str) {
        getCommonWords();
        return commonWords.containsKey(normalize(str.trim()));
    }

    public static String convertToSingularForm(String str) {
        if (str.endsWith("'s")) {
            return str.substring(0, str.length() - 2);
        }
        for (String str2 : getPluralTable().keySet()) {
            if (str.endsWith(str2)) {
                return str.substring(0, str.length() - str2.length()) + getPluralTable().get(str2);
            }
        }
        return str;
    }

    public static String stem(String str) {
        if (str == null || str.length() == 0) {
            return "";
        }
        Stemmer stemmer = new Stemmer();
        stemmer.add(str.toLowerCase());
        stemmer.stem();
        return stemmer.getResultString();
    }

    public static List<String> getWords(String str) {
        ArrayList arrayList = new ArrayList();
        StringTokenizer stringTokenizer = new StringTokenizer(str, " ,!?;:-–—~_\\/|\t\n\r<>^()[]\"");
        while (stringTokenizer.hasMoreTokens()) {
            String nextToken = stringTokenizer.nextToken();
            if (nextToken.length() > 0) {
                while (nextToken.endsWith(".")) {
                    nextToken = nextToken.substring(0, nextToken.length() - 1);
                }
                if (nextToken.length() > 0) {
                    arrayList.add(nextToken);
                }
            }
        }
        return arrayList;
    }

    public static String[] getNGrams(String str, int i) {
        ArrayList arrayList = new ArrayList();
        List<String> words = getWords(str);
        for (int i2 = i; i2 > 0; i2--) {
            for (int i3 = 0; i3 <= words.size() - i2; i3++) {
                StringBuffer stringBuffer = new StringBuffer();
                for (int i4 = i3; i4 < i3 + i2; i4++) {
                    stringBuffer.append(words.get(i4) + " ");
                }
                arrayList.add(stringBuffer.toString().trim());
            }
        }
        return (String[]) arrayList.toArray(new String[0]);
    }

    public static String getText(InputStream inputStream) throws IOException {
        return getText(inputStream, "\n");
    }

    public static String getText(InputStream inputStream, String str) throws IOException {
        return FileTools.getText(inputStream, str);
    }

    public static String stripDiacritics(String str) {
        if (str == null) {
            return null;
        }
        StringBuilder sb = new StringBuilder();
        for (int i = 0; i < str.length(); i++) {
            char charAt = str.charAt(i);
            int indexOf = "ÀàÈèÌìÒòÙùÁáÉéÍíÓóÚúÝýÂâÊêÎîÔôÛûŶŷÃãÕõÑñÄäËëÏïÖöÜüŸÿÅåÇçŐőŰű".indexOf(charAt);
            if (indexOf > -1) {
                sb.append("AaEeIiOoUuAaEeIiOoUuYyAaEeIiOoUuYyAaOoNnAaEeIiOoUuYyAaCcOoUu".charAt(indexOf));
            } else {
                sb.append(charAt);
            }
        }
        return sb.toString();
    }

    public static String normalize(String str) {
        return normalize(str, false);
    }

    public static String normalize(String str, boolean z, boolean z2) {
        return normalize(str, z, z2, true);
    }

    public static String normalize(String str, boolean z, boolean z2, boolean z3) {
        return normalize(str, z, z2, z3, false);
    }

    public static String normalize(String str, boolean z, boolean z2, boolean z3, boolean z4, boolean z5) {
        Collection normalizeWords = normalizeWords(str, z, z2, z3);
        if (z4) {
            normalizeWords = z5 ? new TreeSet(normalizeWords) : new LinkedHashSet(normalizeWords);
        } else if (z5) {
            Collections.sort((List) normalizeWords);
        }
        StringBuffer stringBuffer = new StringBuffer();
        Iterator<String> it = normalizeWords.iterator();
        while (it.hasNext()) {
            stringBuffer.append(it.next() + " ");
        }
        return stringBuffer.toString().trim();
    }

    public static String normalize(String str, boolean z, boolean z2, boolean z3, boolean z4) {
        return normalize(str, z, z2, z3, z4, true);
    }

    public static String normalize(String str, boolean z) {
        return normalize(str, z, true);
    }

    public static List<String> normalizeWords(String str, boolean z) {
        return normalizeWords(str, z, true);
    }

    public static List<String> normalizeWords(String str, boolean z, boolean z2) {
        return normalizeWords(str, z, z2, true);
    }

    public static List<String> normalizeWords(String str, boolean z, boolean z2, boolean z3) {
        String[] split = stripDiacritics(str.trim()).toLowerCase().replaceAll("\\b([a-z]+)'s?", "$1").replaceAll("\\(i?e?s\\)", "").replaceAll("(\\d+)\\.(\\d+)", "$1_$2").replaceAll("\\.(\\d+)", "_$1").replaceAll("\\s*\\W\\s*", " ").replaceAll("(\\d+)_(\\d+)", "$1.$2").replaceAll("_(\\d+)", ".$1").split("\\s+");
        ArrayList arrayList = new ArrayList(Arrays.asList(split));
        if (z3) {
            for (int i = 0; i < split.length; i++) {
                if (isStopWord(split[i]) || (z2 && split[i].matches("\\d+"))) {
                    arrayList.remove(split[i]);
                }
            }
        }
        if (z) {
            for (int i2 = 0; i2 < arrayList.size(); i2++) {
                String str2 = (String) arrayList.get(i2);
                arrayList.set(i2, isLikelyAbbreviation(str2) ? str2 : stem(str2));
            }
        }
        return arrayList;
    }

    public static boolean isLikelyAbbreviation(String str) {
        if (Pattern.compile("[\\(\\)\\[\\]+,0-9]").matcher(str).find() && Pattern.compile("[A-Za-z]").matcher(str).find()) {
            return true;
        }
        StringStats stringStats = getStringStats(str);
        if (str.length() > 5 || !stringStats.isUppercase) {
            return !stringStats.isCapitalized && stringStats.lowerCase > 0 && stringStats.upperCase > 0;
        }
        return true;
    }

    public static int getLevenshteinDistance(CharSequence charSequence, CharSequence charSequence2) {
        int[][] iArr = new int[charSequence.length() + 1][charSequence2.length() + 1];
        for (int i = 0; i <= charSequence.length(); i++) {
            iArr[i][0] = i;
        }
        for (int i2 = 0; i2 <= charSequence2.length(); i2++) {
            iArr[0][i2] = i2;
        }
        for (int i3 = 1; i3 <= charSequence.length(); i3++) {
            for (int i4 = 1; i4 <= charSequence2.length(); i4++) {
                iArr[i3][i4] = Math.min(Math.min(iArr[i3 - 1][i4] + 1, iArr[i3][i4 - 1] + 1), iArr[i3 - 1][i4 - 1] + (charSequence.charAt(i3 - 1) == charSequence2.charAt(i4 - 1) ? 0 : 1));
            }
        }
        return iArr[charSequence.length()][charSequence2.length()];
    }

    public static boolean similar(String str, String str2) {
        if (str == null && str2 == null) {
            return true;
        }
        if (str == null || str2 == null || Math.abs(str.length() - str2.length()) > 3) {
            return false;
        }
        if (str.equalsIgnoreCase(str2)) {
            return true;
        }
        if (str.length() <= 4 || str2.length() <= 4) {
            return false;
        }
        String[] split = str.split("[\\s_]");
        String[] split2 = str2.split("[\\s_]");
        if (split.length != split2.length) {
            return false;
        }
        for (int i = 0; i < split.length; i++) {
            if (split[i].charAt(0) != split2[i].charAt(0)) {
                return false;
            }
        }
        return getLevenshteinDistance(str.toLowerCase(), str2.toLowerCase()) <= (str.length() < 7 ? 1 : str.length() >= 20 ? 3 : 2);
    }

    public static Map<Character, String> getURLEscapeCode() {
        if (urlEscapeCodeMap == null) {
            urlEscapeCodeMap = new HashMap();
            urlEscapeCodeMap.put(' ', "%20");
            urlEscapeCodeMap.put('<', "%3C");
            urlEscapeCodeMap.put('>', "%3E");
            urlEscapeCodeMap.put('#', "%23");
            urlEscapeCodeMap.put('%', "%25");
            urlEscapeCodeMap.put('{', "%7B");
            urlEscapeCodeMap.put('}', "%7D");
            urlEscapeCodeMap.put('|', "%7C");
            urlEscapeCodeMap.put('\\', "%5C");
            urlEscapeCodeMap.put('^', "%5E");
            urlEscapeCodeMap.put('~', "%7E");
            urlEscapeCodeMap.put('[', "%5B");
            urlEscapeCodeMap.put(']', "%5D");
            urlEscapeCodeMap.put('`', "%60");
            urlEscapeCodeMap.put(';', "%3B");
            urlEscapeCodeMap.put('/', "%2F");
            urlEscapeCodeMap.put('?', "%3F");
            urlEscapeCodeMap.put(':', "%3A");
            urlEscapeCodeMap.put('@', "%40");
            urlEscapeCodeMap.put('=', "%3D");
            urlEscapeCodeMap.put('&', "%26");
            urlEscapeCodeMap.put('$', "%24");
        }
        return urlEscapeCodeMap;
    }

    public static Map<Character, String> getHTMLEscapeCode() {
        if (xmlEscapeCodeMap == null) {
            xmlEscapeCodeMap = new HashMap();
            xmlEscapeCodeMap.put('\"', "&quot;");
            xmlEscapeCodeMap.put('\'', "&apos;");
            xmlEscapeCodeMap.put('<', "&lt;");
            xmlEscapeCodeMap.put('>', "&gt;");
            xmlEscapeCodeMap.put('&', " &amp;");
        }
        return xmlEscapeCodeMap;
    }

    public static String escapeURL(String str) {
        StringBuffer stringBuffer = new StringBuffer();
        Map<Character, String> uRLEscapeCode = getURLEscapeCode();
        for (char c : str.toCharArray()) {
            if (uRLEscapeCode.containsKey(Character.valueOf(c))) {
                stringBuffer.append(uRLEscapeCode.get(Character.valueOf(c)));
            } else {
                stringBuffer.append(c);
            }
        }
        return stringBuffer.toString();
    }

    public static String escapeHTML(String str) {
        StringBuffer stringBuffer = new StringBuffer();
        Map<Character, String> hTMLEscapeCode = getHTMLEscapeCode();
        for (char c : str.toCharArray()) {
            if (hTMLEscapeCode.containsKey(Character.valueOf(c))) {
                stringBuffer.append(hTMLEscapeCode.get(Character.valueOf(c)));
            } else {
                stringBuffer.append(c);
            }
        }
        return stringBuffer.toString();
    }

    public static <T> T[] addAll(T[] tArr, T t) {
        T[] tArr2 = (T[]) ((Object[]) Array.newInstance(tArr.length > 0 ? tArr[0].getClass() : t != null ? t.getClass() : Object.class, tArr.length + 1));
        System.arraycopy(tArr, 0, tArr2, 0, tArr.length);
        tArr2[tArr2.length - 1] = t;
        return tArr2;
    }

    public static <T> T[] addAll(T[] tArr, T[] tArr2) {
        T[] tArr3 = (T[]) ((Object[]) Array.newInstance(tArr.length > 0 ? tArr[0].getClass() : tArr2.length > 0 ? tArr2[0].getClass() : Object.class, tArr.length + tArr2.length));
        System.arraycopy(tArr, 0, tArr3, 0, tArr.length);
        System.arraycopy(tArr2, 0, tArr3, tArr.length, tArr2.length);
        return tArr3;
    }

    public static int charCount(String str, char c) {
        int i = 0;
        for (char c2 : str.toCharArray()) {
            if (c2 == c) {
                i++;
            }
        }
        return i;
    }

    public static String copyCharacterCase(String str, String str2) {
        StringBuffer stringBuffer = new StringBuffer();
        char[] charArray = str.toCharArray();
        char[] charArray2 = str2.toCharArray();
        boolean z = false;
        boolean z2 = false;
        int i = 0;
        while (i < charArray.length && i < charArray2.length) {
            z = Character.isUpperCase(charArray[i]);
            z2 = Character.isLowerCase(charArray[i]);
            String str3 = "" + charArray2[i];
            stringBuffer.append(z ? str3.toUpperCase() : z2 ? str3.toLowerCase() : str3);
            i++;
        }
        for (int i2 = i; i2 < charArray2.length; i2++) {
            String str4 = "" + charArray2[i2];
            stringBuffer.append(z ? str4.toUpperCase() : z2 ? str4.toLowerCase() : str4);
        }
        return stringBuffer.toString();
    }

    public static double parseDecimalValue(String str) {
        double d = 0.0d;
        if (str == null) {
            return 0.0d;
        }
        if (str.matches("\\d+\\.\\d+")) {
            try {
                d = Double.parseDouble(str);
            } catch (Exception e) {
                e.printStackTrace();
            }
        } else {
            d = parseIntegerValue(str);
        }
        return d;
    }

    public static int parseIntegerValue(String str) {
        int i = 0;
        if (!str.matches("[IiVvXx]+")) {
            if (str.matches("[a-zA-Z]+")) {
                return str.equalsIgnoreCase("zero") ? 0 : str.equalsIgnoreCase("one") ? 1 : str.equalsIgnoreCase("two") ? 2 : str.equalsIgnoreCase("three") ? 3 : str.equalsIgnoreCase("four") ? 4 : str.equalsIgnoreCase("five") ? 5 : str.equalsIgnoreCase("six") ? 6 : str.equalsIgnoreCase("seven") ? 7 : str.equalsIgnoreCase("eight") ? 8 : str.equalsIgnoreCase("nine") ? 9 : str.equalsIgnoreCase("ten") ? 10 : str.equalsIgnoreCase("eleven") ? 11 : str.equalsIgnoreCase("twelve") ? 12 : Integer.MIN_VALUE;
            }
            try {
                return Integer.parseInt(str);
            } catch (NumberFormatException e) {
                return NO_VALUE;
            }
        }
        boolean z = false;
        for (int i2 = 0; i2 < str.length(); i2++) {
            switch (str.charAt(i2)) {
                case 'I':
                case 'i':
                    i++;
                    z = true;
                    break;
                case 'V':
                case 'v':
                    i += z ? 3 : 5;
                    z = false;
                    break;
                case 'X':
                case 'x':
                    i += z ? 8 : 10;
                    z = false;
                    break;
            }
        }
        return i;
    }

    public static boolean isNumber(String str) {
        return str.matches("\\d+(\\.\\d+)?");
    }

    public static String toString(double d) {
        Formatter formatter = new Formatter();
        if ((d * 10.0d) % 10.0d == 0.0d) {
            formatter.format("%d", Integer.valueOf((int) d));
        } else {
            formatter.format("%.2f", Double.valueOf(d));
        }
        return "" + formatter.out();
    }

    public static String getCapitalizedWords(String str) {
        StringBuffer stringBuffer = new StringBuffer();
        for (String str2 : str.split("[\\s_]+")) {
            if (str2.length() > 2) {
                stringBuffer.append(Character.toUpperCase(str2.charAt(0)) + str2.substring(1).toLowerCase() + " ");
            } else {
                stringBuffer.append(str2.toLowerCase() + " ");
            }
        }
        return stringBuffer.toString().trim();
    }

    public static List<String> parseCSVline(String str) {
        return parseCSVline(str, ',');
    }

    public static List<String> parseCSVline(String str, char c) {
        ArrayList arrayList = new ArrayList();
        boolean z = false;
        int i = 0;
        for (int i2 = 0; i2 < str.length(); i2++) {
            if (str.charAt(i2) == '\"') {
                z = !z;
            }
            if (!z && str.charAt(i2) == c) {
                String trim = str.substring(i, i2).trim();
                if (trim.startsWith("\"") && trim.endsWith("\"")) {
                    trim = trim.substring(1, trim.length() - 1);
                }
                arrayList.add(trim.trim());
                i = i2 + 1;
            }
        }
        if (i < str.length()) {
            String trim2 = str.substring(i).trim();
            if (trim2.startsWith("\"") && trim2.endsWith("\"")) {
                trim2 = trim2.substring(1, trim2.length() - 1);
            }
            arrayList.add(trim2.trim());
        }
        return arrayList;
    }

    public static List<String> getSentences(String str) {
        return SentenceDetector.getSentences(str);
    }

    public static boolean isReportSection(String str) {
        return str.matches("^\\[[A-Za-z \\-]*\\]$") || str.matches("^[A-Z \\-]*:$");
    }

    public static StringStats getStringStats(String str) {
        StringStats stringStats = new StringStats();
        stringStats.length = str.length();
        char[] charArray = str.toCharArray();
        for (int i = 0; i < charArray.length; i++) {
            if (Character.isUpperCase(charArray[i])) {
                stringStats.upperCase++;
            }
            if (Character.isLowerCase(charArray[i])) {
                stringStats.lowerCase++;
            }
            if (Character.isDigit(charArray[i])) {
                stringStats.digits++;
            }
            if (Character.isWhitespace(charArray[i])) {
                stringStats.whiteSpace++;
            }
            if (Character.isAlphabetic(charArray[i])) {
                stringStats.alphabetic++;
            }
        }
        stringStats.isUppercase = stringStats.upperCase == stringStats.length;
        stringStats.isLowercase = stringStats.lowerCase == stringStats.length;
        stringStats.isCapitalized = stringStats.length > 0 && Character.isUpperCase(charArray[0]) && stringStats.lowerCase == stringStats.alphabetic - 1;
        return stringStats;
    }

    public static Date parseDate(String str) {
        if (timePatterns == null) {
            timePatterns = new HashMap();
            timePatterns.put("[12][09]\\d{2}[01]\\d{3} \\d{4}", "yyyyMMdd HHmm");
            timePatterns.put("[12][09]\\d{2}[01]\\d{3}", "yyyyMMdd");
            timePatterns.put("\\d{1,2}/\\d{1,2}/\\d{4}", "MM/dd/yyyy");
            timePatterns.put("([A-Z][a-z]+ ){2}\\d{1,2} [\\d:]+ [A-Z]+ \\d{4}", "EEE MMM dd kk:mm:ss z yyyy");
        }
        String trim = str.trim();
        for (String str2 : timePatterns.keySet()) {
            if (trim.matches(str2)) {
                try {
                    return new SimpleDateFormat(timePatterns.get(str2)).parse(trim);
                } catch (ParseException e) {
                }
            }
        }
        return null;
    }

    public static void main(String[] strArr) throws Exception {
        System.out.println(getStringStats("Cancer."));
    }
}
