package org.apache.ctakes.smokingstatus.MLutil;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.StringTokenizer;
import org.apache.ctakes.smokingstatus.Const;

/* loaded from: input_file:org/apache/ctakes/smokingstatus/MLutil/GenerateTrainingData.class */
public class GenerateTrainingData {
    Set<String> keywords;
    Set<String> stopwords;
    List<List<Comparable>> features = new ArrayList();

    GenerateTrainingData(String str, String str2) {
        this.stopwords = new HashSet();
        this.keywords = new HashSet();
        try {
            this.keywords = readLinesFromFile(str);
        } catch (Exception e) {
            e.printStackTrace();
        }
        try {
            this.stopwords = readLinesFromFile(str2);
        } catch (Exception e2) {
            e2.printStackTrace();
        }
    }

    private static Set<String> readLinesFromFile(String str) throws IOException {
        HashSet hashSet = new HashSet();
        BufferedReader bufferedReader = new BufferedReader(new FileReader(new File(str)));
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                return hashSet;
            }
            if (!readLine.startsWith("//") && readLine.trim().length() > 0) {
                hashSet.add(readLine.toLowerCase());
            }
        }
    }

    public void makeFeatures(String str) {
        String str2 = "";
        String str3 = "";
        try {
            BufferedReader bufferedReader = new BufferedReader(new FileReader(str));
            while (true) {
                String readLine = bufferedReader.readLine();
                if (readLine == null) {
                    bufferedReader.close();
                    return;
                }
                if (readLine.length() != 0) {
                    StringTokenizer stringTokenizer = new StringTokenizer(readLine, "|");
                    while (stringTokenizer.hasMoreTokens()) {
                        str3 = stringTokenizer.nextToken().trim();
                        str2 = stringTokenizer.nextToken().trim();
                    }
                    if (str2.toLowerCase().startsWith("p")) {
                        str2 = Const.CLASS_PAST_SMOKER;
                    } else if (str2.toLowerCase().startsWith("c")) {
                        str2 = Const.CLASS_CURR_SMOKER;
                    } else if (str2.toLowerCase().startsWith("s")) {
                        str2 = Const.CLASS_SMOKER;
                    } else {
                        System.out.println("Undefined class label:" + str2);
                        System.exit(1);
                    }
                    str3 = str3.toLowerCase().replaceAll("[.?!:;()',\"{}<>#+]", " ").trim().toLowerCase().replaceAll("-{2,}", " ").trim();
                    String[] split = str3.split("\\s");
                    ArrayList arrayList = new ArrayList();
                    ArrayList arrayList2 = new ArrayList();
                    for (int i = 0; i < split.length; i++) {
                        if (!this.stopwords.contains(split[i]) && split[i].trim().length() > 0) {
                            arrayList.add(split[i]);
                        }
                    }
                    for (int i2 = 0; i2 < arrayList.size() - 1; i2++) {
                        arrayList2.add(((String) arrayList.get(i2)) + "_" + ((String) arrayList.get(i2 + 1)));
                    }
                    ArrayList arrayList3 = new ArrayList();
                    for (String str4 : this.keywords) {
                        int i3 = 0;
                        if (str4.indexOf("_") != -1) {
                            int i4 = 0;
                            while (true) {
                                if (i4 >= arrayList2.size()) {
                                    break;
                                }
                                if (str4.equalsIgnoreCase((String) arrayList2.get(i4))) {
                                    i3 = 1;
                                    break;
                                }
                                i4++;
                            }
                        } else {
                            int i5 = 0;
                            while (true) {
                                if (i5 >= arrayList.size()) {
                                    break;
                                }
                                if (str4.equalsIgnoreCase((String) arrayList.get(i5))) {
                                    i3 = 1;
                                    break;
                                }
                                i5++;
                            }
                        }
                        arrayList3.add(new Integer(i3));
                    }
                    int i6 = 0;
                    for (int i7 = 0; i7 < arrayList.size(); i7++) {
                        String str5 = (String) arrayList.get(i7);
                        if (str5.matches("19\\d\\d") || str5.matches("19\\d\\ds") || str5.matches("20\\d\\d") || str5.matches("20\\d\\ds") || str5.matches("[1-9]0s") || str5.matches("\\d{1,2}[/-]\\d{1,2}") || str5.matches("\\d{1,2}[/-]\\d{4}") || str5.matches("\\d{1,2}[/-]\\d{1,2}[/-]\\d{2}") || str5.matches("\\d{1,2}[/-]\\d{1,2}[/-]\\d{4}")) {
                            i6 = 1;
                            break;
                        }
                    }
                    arrayList3.add(new Integer(i6));
                    arrayList3.add(new String(str2));
                    this.features.add(arrayList3);
                }
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    private void printLibsvmDataToFile(String str) {
        try {
            PrintStream printStream = new PrintStream(new FileOutputStream(str));
            Iterator<List<Comparable>> it = this.features.iterator();
            while (it.hasNext()) {
                ArrayList arrayList = (ArrayList) it.next();
                String str2 = (String) arrayList.get(arrayList.size() - 1);
                int i = -1;
                if (str2.equals(Const.CLASS_CURR_SMOKER)) {
                    i = 1;
                } else if (str2.equals(Const.CLASS_PAST_SMOKER)) {
                    i = 2;
                } else if (str2.equals(Const.CLASS_SMOKER)) {
                    i = 3;
                } else {
                    System.out.println("Undefined class label:" + str2);
                    System.exit(1);
                }
                printStream.print(i + " ");
                for (int i2 = 0; i2 < arrayList.size() - 1; i2++) {
                    printStream.print((i2 + 1) + ":" + arrayList.get(i2) + " ");
                }
                printStream.print('\n');
            }
            printStream.close();
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        }
    }

    public static void main(String[] strArr) {
        GenerateTrainingData generateTrainingData = new GenerateTrainingData("C:/cTAKES-1.0.5/smoking status/resources/ss/data/PCS/keywords_PCS_NHGRI.txt", "C:/cTAKES-1.0.5/smoking status/resources/ss/data/PCS/stopwords_PCS.txt");
        generateTrainingData.makeFeatures("C:/Temp/SentenceLevelSmokingStatus_PCS.txt");
        generateTrainingData.printLibsvmDataToFile("C:/Temp/libsvm_data.txt");
    }
}
