package vn.hungnt.util;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import vn.hungnt.postagger.RDRPOSTagger;

/* loaded from: input_file:vn/hungnt/util/CoNLLFormatCreator.class */
public class CoNLLFormatCreator {
    public static String fromTaggedSen2CoNLL(String str) {
        String substring;
        String substring2;
        StringBuilder sb = new StringBuilder();
        int i = 0;
        for (String str2 : str.split(" ")) {
            String trim = str2.trim();
            if (trim.length() != 0) {
                i++;
                if (trim.equals("///")) {
                    substring = "/";
                    substring2 = "/";
                } else {
                    int lastIndexOf = trim.lastIndexOf("/");
                    substring = trim.substring(0, lastIndexOf);
                    substring2 = trim.substring(lastIndexOf + 1);
                }
                sb.append(i);
                sb.append("\t");
                sb.append(substring);
                sb.append("\t");
                sb.append("_");
                sb.append("\t");
                if (substring2.equals("LBKT") || substring2.equals("RBKT")) {
                    sb.append(substring2);
                } else {
                    sb.append(substring2.charAt(0));
                }
                sb.append("\t");
                sb.append(substring2);
                sb.append("\t");
                sb.append("_");
                sb.append("\t");
                sb.append("0");
                sb.append("\t");
                sb.append("root");
                sb.append("\t");
                sb.append("_");
                sb.append("\t");
                sb.append("_");
                sb.append("\n");
            }
        }
        return sb.toString();
    }

    public static String toCoNLL4Sentence(String str) throws IOException {
        new StringBuilder();
        return fromTaggedSen2CoNLL(RDRPOSTagger.tagVnWSSentence(str));
    }

    public static void toCoNLL4Corpus(String str) throws IOException {
        System.out.println("Tagging word-segmented corpus: " + str);
        RDRPOSTagger.tagVnWSCorpus(str);
        System.out.println("\tOutput POS-tagged corpus: " + str + ".TAGGED");
        System.out.println("Converting POS-tagged corpus to be in CoNLL format...");
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(new File(String.valueOf(str) + ".TAGGED")), "UTF-8"));
        BufferedWriter bufferedWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(String.valueOf(str) + ".TAGGED.CONLL"), "UTF-8"));
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                bufferedReader.close();
                bufferedWriter.close();
                System.out.println("\tOutput POS-tagged corpus in CoNLL format: " + str + ".TAGGED.CONLL");
                return;
            }
            bufferedWriter.write(String.valueOf(fromTaggedSen2CoNLL(readLine)) + "\n");
        }
    }

    public static void main(String[] strArr) throws IOException {
    }
}
