package jp.go.nict.langrid.wrapper.ws_1_2.util;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/* loaded from: input_file:jp/go/nict/langrid/wrapper/ws_1_2/util/ExceptionWord.class */
public class ExceptionWord {
    private static ArrayList<HashMap<String, String>> replace = new ArrayList<>();
    private static HashMap<String, ArrayList<ArrayList<String>>> exceptionWords = new HashMap<>();
    private static HashMap<String, ArrayList<String>> separatorArrays = new HashMap<>();
    private static String patternForEncode = "[.!?][-_~*.!?()a-zA-Z0-9;\\/:@&=+$,%#]*[-_~*()a-zA-Z0-9;\\/:@&=+$,%#]";

    public static String encodeExceptionWord(String str) {
        for (int i = 0; i < replace.size(); i++) {
            String[] flatten = flatten(exceptionWords.get(replace.get(i).get("name")));
            if (flatten.length != 0) {
                for (int i2 = 0; i2 < flatten.length; i2++) {
                    flatten[i2] = flatten[i2].toLowerCase();
                }
                Arrays.sort(flatten);
                Matcher matcher = Pattern.compile("(\\s+)|(<[^>]*>)").matcher(str);
                if (matcher.find()) {
                    ArrayList arrayList = new ArrayList();
                    int i3 = 0;
                    do {
                        HashMap hashMap = new HashMap();
                        hashMap.put("head", Integer.valueOf(i3));
                        hashMap.put("tail", Integer.valueOf(matcher.start()));
                        arrayList.add(hashMap);
                        i3 = matcher.end();
                    } while (matcher.find());
                    HashMap hashMap2 = new HashMap();
                    hashMap2.put("head", Integer.valueOf(i3));
                    hashMap2.put("tail", Integer.valueOf(str.length()));
                    arrayList.add(hashMap2);
                    Collections.reverse(arrayList);
                    Iterator it = arrayList.iterator();
                    while (it.hasNext()) {
                        HashMap hashMap3 = (HashMap) it.next();
                        String substring = str.substring(((Integer) hashMap3.get("head")).intValue(), ((Integer) hashMap3.get("tail")).intValue());
                        if (Arrays.binarySearch(flatten, substring.toLowerCase()) >= 0) {
                            str = str.substring(0, ((Integer) hashMap3.get("head")).intValue()) + substring.replace(replace.get(i).get("symbol"), replace.get(i).get("rule")) + str.substring(((Integer) hashMap3.get("tail")).intValue());
                        }
                    }
                }
            }
        }
        return str;
    }

    public static String encodeInvalidSeparatorWithLanguage(String str, String str2) {
        String encode = encode(str, patternForEncode);
        if (str2.equals("ja") || str2.equals("zh") || str2.equals("ko") || str2.startsWith("zh-")) {
            Iterator<HashMap<String, String>> it = replace.iterator();
            while (it.hasNext()) {
                HashMap<String, String> next = it.next();
                encode = encode.replace(next.get("symbol") + "」", next.get("rule") + "」");
            }
        }
        if (str2.equals("en") || str2.equals("de") || str2.equals("es") || str2.equals("pt") || str2.equals("fr") || str2.equals("it") || str2.equals("ko")) {
            String[] strArr = (String[]) getSeparators(str2).toArray(new String[0]);
            Arrays.sort(strArr);
            Iterator<HashMap<String, String>> it2 = replace.iterator();
            while (it2.hasNext()) {
                HashMap<String, String> next2 = it2.next();
                if (Arrays.binarySearch(strArr, next2.get("symbol")) >= 0) {
                    encode = encode.replace(next2.get("symbol") + "\"", next2.get("rule") + "\"");
                }
            }
        }
        Matcher matcher = Pattern.compile("([.!?>]|^)\\s+(\\d+\\.?)+\\.").matcher(encode);
        while (matcher.find()) {
            encode = encode.substring(0, matcher.end() - 1) + "[[#dot]]" + encode.substring(matcher.end());
        }
        return encode;
    }

    public static String encode(String str, String str2) {
        Matcher matcher = Pattern.compile(str2, 2).matcher(str);
        while (matcher.find()) {
            String group = matcher.group();
            int indexOf = str.indexOf(group);
            String substring = str.substring(0, indexOf);
            String substring2 = str.substring(indexOf + group.length());
            Iterator<HashMap<String, String>> it = replace.iterator();
            while (it.hasNext()) {
                HashMap<String, String> next = it.next();
                group = group.replace(next.get("symbol"), next.get("rule"));
            }
            str = substring + group + substring2;
        }
        return str;
    }

    public static String decode(String str) {
        Iterator<HashMap<String, String>> it = replace.iterator();
        while (it.hasNext()) {
            HashMap<String, String> next = it.next();
            str = str.replace(next.get("rule"), next.get("symbol"));
        }
        return str;
    }

    public static ArrayList<String> getSeparators(String str) {
        ArrayList<String> arrayList = separatorArrays.get(str);
        return arrayList != null ? arrayList : str.startsWith("zh-") ? separatorArrays.get("zh") : separatorArrays.get("en");
    }

    public static String[] flatten(ArrayList<ArrayList<String>> arrayList) {
        ArrayList arrayList2 = new ArrayList();
        if (arrayList == null) {
            return (String[]) arrayList2.toArray(new String[0]);
        }
        Iterator<ArrayList<String>> it = arrayList.iterator();
        while (it.hasNext()) {
            arrayList2.addAll(it.next());
        }
        return (String[]) arrayList2.toArray(new String[0]);
    }

    static {
        separatorArrays.put("bg", new ArrayList<>(Arrays.asList(".", "?", "!")));
        separatorArrays.put("de", new ArrayList<>(Arrays.asList(".", "?", "!")));
        separatorArrays.put("en", new ArrayList<>(Arrays.asList(".", "?", "!")));
        separatorArrays.put("es", new ArrayList<>(Arrays.asList(".", "?", "!")));
        separatorArrays.put("fr", new ArrayList<>(Arrays.asList(".", "?", "!")));
        separatorArrays.put("it", new ArrayList<>(Arrays.asList(".", "?", "!")));
        separatorArrays.put("ja", new ArrayList<>(Arrays.asList("。", "．", ".", "？", "！", "?", "!")));
        separatorArrays.put("ko", new ArrayList<>(Arrays.asList(".", "?", "!")));
        separatorArrays.put("pt", new ArrayList<>(Arrays.asList(".", "?", "!")));
        separatorArrays.put("zh", new ArrayList<>(Arrays.asList("。", "？", "?", "!", "！", "．", ".")));
        ArrayList<ArrayList<String>> arrayList = new ArrayList<>();
        arrayList.add(new ArrayList<>(Arrays.asList("a.", "Abb.", "accel.", "ahd.", "Al.", "Anm.", "Anon.", "approx.", "Apr.", "Apt.", "art.", "Aug.", "av.", "Ave.")));
        arrayList.add(new ArrayList<>(Arrays.asList("b.", "Bd.", "Bde.", "bld.", "bldg.", "Blvd.", "bzw.")));
        arrayList.add(new ArrayList<>(Arrays.asList("c.", "ca.", "cf.", "chap.", "chaps.", "cho.", "Co.", "col.", "col Ped.", "Corp.", "corp.", "cresc.")));
        arrayList.add(new ArrayList<>(Arrays.asList("d.", "Dec.", "decresc.", "ders.", "dept.", "dimin.", "do.", "Dr.")));
        arrayList.add(new ArrayList<>(Arrays.asList("e.", "ea.", "ed.", "eds.", "enc.", "env.", "etc.", "exp.", "ex.")));
        arrayList.add(new ArrayList<>(Arrays.asList("f.", "Feb.", "ff.", "fig.", "Fl.", "figs.", "fol.", "Fri.")));
        arrayList.add(new ArrayList<>(Arrays.asList("g.", "Gl.", "govt.")));
        arrayList.add(new ArrayList<>(Arrays.asList("h.", "Hg.", "hg.", "Hgg.", "Hrsg.", "hmhge.", "hrsg.")));
        arrayList.add(new ArrayList<>(Arrays.asList("i.", "ib.", "ibid.", "id.", "Inc.", "inc.", "inv.")));
        arrayList.add(new ArrayList<>(Arrays.asList("j.", "Jan.", "Jg.", "Jul.", "Jun.", "Jr.")));
        arrayList.add(new ArrayList<>(Arrays.asList("k.")));
        arrayList.add(new ArrayList<>(Arrays.asList("l.", "ll.", "Ln.", "Ltd.", "ltd.", "lib.")));
        arrayList.add(new ArrayList<>(Arrays.asList("m.", "Mar.", "mdse.", "Messers.", "mhd.", "mo.", "Mon.", "Mr.", "Mrs.", "Ms.")));
        arrayList.add(new ArrayList<>(Arrays.asList("n.", "nd.", "nhd.", "Nm.", "nn.", "no.", "nos.", "Nov.", "Nr.")));
        arrayList.add(new ArrayList<>(Arrays.asList("o.", "Oct.", "od.", "op.cit.")));
        arrayList.add(new ArrayList<>(Arrays.asList("p.", "par.", "pars.", "Ph.D.", "pl.", "p.m.", "pmk.", "po.", "policli.", "pp.", "Prof.", "pseud.")));
        arrayList.add(new ArrayList<>(Arrays.asList("q.", "qtr.")));
        arrayList.add(new ArrayList<>(Arrays.asList("r.", "rall.", "Rd.", "Re.", "rec.", "REG.", "Ret.", "rinforz.", "rinfz.", "rit.", "ritard.", "Rm.", "Rp.")));
        arrayList.add(new ArrayList<>(Arrays.asList("s.", "Sat.", "sec.", "Seg.", "Sep.", "Sept.", "SFOR.", "Sig.", "smorz.", "Sp.", "spp.", "so.", "St.", "st.", "Sun.", "Syn.")));
        arrayList.add(new ArrayList<>(Arrays.asList("t.", "Taf.", "T.B.", "T.H.I.", "Thu.", "t.i.d.", "trans.", "transl.", "Tue")));
        arrayList.add(new ArrayList<>(Arrays.asList("u.", "ut.", "UVs.")));
        arrayList.add(new ArrayList<>(Arrays.asList("v.", "vgl.", "viz.", "Vol.", "vol.", "volz.", "vs.", "ver.")));
        arrayList.add(new ArrayList<>(Arrays.asList("w.", "WC.", "Wed.", "wk.", "wks.")));
        arrayList.add(new ArrayList<>(Arrays.asList("x.")));
        arrayList.add(new ArrayList<>(Arrays.asList("y.")));
        arrayList.add(new ArrayList<>(Arrays.asList("z.")));
        arrayList.add(new ArrayList<>(Arrays.asList("übers.")));
        exceptionWords.put("dot", arrayList);
        ArrayList<ArrayList<String>> arrayList2 = new ArrayList<>();
        arrayList2.add(new ArrayList<>(new ArrayList()));
        exceptionWords.put("question", arrayList2);
        exceptionWords.put("exlamation", arrayList2);
        exceptionWords.put("kuten", arrayList2);
        exceptionWords.put("mbdot", arrayList2);
        exceptionWords.put("mbexclamation", arrayList2);
        exceptionWords.put("mbquestion", arrayList2);
        HashMap<String, String> hashMap = new HashMap<>();
        hashMap.put("name", "dot");
        hashMap.put("symbol", ".");
        hashMap.put("rule", "[[#dot]]");
        replace.add(hashMap);
        HashMap<String, String> hashMap2 = new HashMap<>();
        hashMap2.put("name", "question");
        hashMap2.put("symbol", "?");
        hashMap2.put("rule", "[[#question]]");
        replace.add(hashMap2);
        HashMap<String, String> hashMap3 = new HashMap<>();
        hashMap3.put("name", "exclamation");
        hashMap3.put("symbol", "!");
        hashMap3.put("rule", "[[#exclamation]]");
        replace.add(hashMap3);
        HashMap<String, String> hashMap4 = new HashMap<>();
        hashMap4.put("name", "kuten");
        hashMap4.put("symbol", "。");
        hashMap4.put("rule", "[[#kuten]]");
        replace.add(hashMap4);
        HashMap<String, String> hashMap5 = new HashMap<>();
        hashMap5.put("name", "mbdot");
        hashMap5.put("symbol", "．");
        hashMap5.put("rule", "[[#mbdot]]");
        replace.add(hashMap5);
        HashMap<String, String> hashMap6 = new HashMap<>();
        hashMap6.put("name", "mbexclamation");
        hashMap6.put("symbol", "！");
        hashMap6.put("rule", "[[#mbexclamation]]");
        replace.add(hashMap6);
        HashMap<String, String> hashMap7 = new HashMap<>();
        hashMap7.put("name", "mbquestion");
        hashMap7.put("symbol", "？");
        hashMap7.put("rule", "[[#mbquestion]]");
        replace.add(hashMap7);
    }
}
