package vn.hungnt.postagger;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.regex.Pattern;

/* loaded from: input_file:vn/hungnt/postagger/InitialTagger_Vn.class */
public class InitialTagger_Vn {
    private static final Pattern NUMP = Pattern.compile("[0-9]");
    public static HashMap<String, String> FREQDICT = Utils.getDictionary("Dicts/VNFREQ.DICT");
    public static HashMap<String, String> UNKNWORDSDICT = Utils.getDictionary("Dicts/VNOTHERS.DICT");
    public static HashMap<String, String> VNNAMES = Utils.getDictionary("Dicts/VNNAMES.DICT");

    public static void VnInitTagger4Corpus(String str, String str2) throws IOException {
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(new File(str)), "UTF-8"));
        BufferedWriter bufferedWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(str2), "UTF-8"));
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                bufferedReader.close();
                bufferedWriter.close();
                return;
            }
            String trim = readLine.trim();
            if (trim.length() == 0) {
                bufferedWriter.write("\n");
            } else {
                for (WordTag wordTag : VnInitTagger4Sentence(trim)) {
                    bufferedWriter.write(String.valueOf(wordTag.word) + "/" + wordTag.tag + " ");
                }
                bufferedWriter.write("\n");
            }
        }
    }

    public static List<WordTag> VnInitTagger4Sentence(String str) {
        ArrayList arrayList = new ArrayList();
        for (String str2 : str.split(" ")) {
            arrayList.add(new WordTag(str2, FREQDICT.containsKey(str2) ? FREQDICT.get(str2) : UNKNWORDSDICT.containsKey(str2) ? UNKNWORDSDICT.get(str2) : VNNAMES.containsKey(str2) ? "Np" : NUMP.matcher(str2).find() ? "M" : (str2.length() == 1 && Character.isUpperCase(str2.charAt(0))) ? "Y" : Utils.isAbbre(str2) ? "Ny" : Utils.isVnProperNoun(str2) ? "Np" : "N"));
        }
        return arrayList;
    }

    public static void main(String[] strArr) throws IOException {
        for (WordTag wordTag : VnInitTagger4Sentence("Chiến_tranh đi qua để lại quê_hương Thái_Mỹ , huyện Củ_Chi LBKT TP._HCM RBKT hộ gia_đình chính_sách và hơn 2.000 ha \" đất thép \" .")) {
            System.out.print(String.valueOf(wordTag.word) + "/" + wordTag.tag + " ");
        }
    }
}
