package uk.ac.shef.dcs.sti.TODO.evaluation;

import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.HashSet;
import java.util.List;
import javax.xml.parsers.ParserConfigurationException;
import org.apache.any23.util.FileUtils;
import org.apache.commons.lang3.StringEscapeUtils;
import org.w3c.dom.Node;
import org.xml.sax.SAXException;
import uk.ac.shef.dcs.sti.STIException;
import uk.ac.shef.dcs.sti.core.model.Table;
import uk.ac.shef.dcs.sti.parser.table.TableParser;
import uk.ac.shef.dcs.sti.parser.table.TableParserMusicBrainz;
import uk.ac.shef.dcs.sti.parser.table.creator.TableObjCreatorMusicBrainz;
import uk.ac.shef.dcs.sti.parser.table.hodetector.TableHODetectorByHTMLTag;
import uk.ac.shef.dcs.sti.parser.table.normalizer.TableNormalizerSimple;
import uk.ac.shef.dcs.sti.parser.table.validator.TableValidatorGeneric;

/* loaded from: input_file:uk/ac/shef/dcs/sti/TODO/evaluation/DataStats_TableSize_NameLength_Analysis_MB_IMDB.class */
public class DataStats_TableSize_NameLength_Analysis_MB_IMDB {
    private static TableParser xtractor;

    public static void main(String[] strArr) throws IOException, SAXException, ParserConfigurationException, STIException {
        xtractor = new TableParserMusicBrainz(new TableNormalizerSimple(), new TableHODetectorByHTMLTag(), new TableObjCreatorMusicBrainz(), new TableValidatorGeneric());
        int i = 0;
        for (File file : new File("E:\\Data\\table_annotation\\freebase_crawl\\music_record_label\\raw").listFiles()) {
            String str = "E:\\Data\\table_annotation\\freebase_crawl\\music_record_label\\gs\\musicbrainz_gs(entity)_reformatted" + file.getAbsolutePath().replaceAll("\\\\", "/").substring("E:\\Data\\table_annotation\\freebase_crawl\\music_record_label\\raw".length()) + ".keys";
            if (new File(str).exists()) {
                i++;
                System.out.println(i);
                PrintWriter printWriter = new PrintWriter(new FileWriter("D:\\Work\\lodie\\tmp_result/out.csv", true));
                PrintWriter printWriter2 = new PrintWriter(new FileWriter("D:\\Work\\lodie\\tmp_result/out_name.csv", true));
                checkGroundTruth(file.toString(), str, printWriter, printWriter2);
                printWriter.close();
                printWriter2.close();
            }
        }
        System.out.println();
    }

    public static Table checkGroundTruth(String str, String str2, PrintWriter printWriter, PrintWriter printWriter2) throws IOException, ParserConfigurationException, SAXException, STIException {
        List<Table> extract = xtractor.extract(FileUtils.readFileContent(new File(str)), str);
        if (extract.size() <= 0) {
            return null;
        }
        Table table = extract.get(0);
        if (str2 == null) {
            return table;
        }
        if (new File(str2).exists()) {
            String[] readFileLines = FileUtils.readFileLines(new File(str2));
            HashSet hashSet = new HashSet();
            HashSet hashSet2 = new HashSet();
            for (String str3 : readFileLines) {
                if (str3.trim().length() >= 1) {
                    String trim = str3.split("=")[0].trim();
                    int intValue = Integer.valueOf(trim.split(",")[0].trim()).intValue();
                    int intValue2 = Integer.valueOf(trim.split(",")[1].trim()).intValue();
                    hashSet.add(Integer.valueOf(intValue2));
                    hashSet2.add(Integer.valueOf(intValue));
                    int length = table.getContentCell(intValue, intValue2).getText().replaceAll("[\\-_]", " ").trim().split("\\s+").length;
                    printWriter2.println(length);
                    if (length > 10) {
                        System.out.println(">10:" + str);
                    }
                    if (length > 20) {
                        System.out.println(">20:" + str);
                    }
                    if (length > 120) {
                        System.out.println(">120:" + str);
                    }
                }
            }
            printWriter.println(hashSet2.size() + "," + hashSet.size());
            if (hashSet.size() > 6) {
                System.out.println("c>6:" + str);
            }
            if (hashSet.size() > 10) {
                System.out.println("c>6:" + str);
            }
        }
        return table;
    }

    private static String extract_text_content_from_html(List<Node> list) {
        String textContent = list.get(0).getTextContent();
        int indexOf = textContent.indexOf("<td>");
        if (indexOf == -1) {
            indexOf = textContent.indexOf("<th>");
        }
        if (indexOf == -1) {
            indexOf = textContent.indexOf("<thead>");
        }
        if (indexOf != -1) {
            textContent = textContent.substring(indexOf + 4);
            int indexOf2 = textContent.indexOf("</td>");
            if (indexOf2 == -1) {
                indexOf2 = textContent.indexOf("</th>");
            }
            if (indexOf2 == -1) {
                indexOf2 = textContent.indexOf("</thead>");
            }
            if (indexOf2 != -1) {
                textContent = textContent.substring(0, indexOf2).trim();
            }
        }
        return StringEscapeUtils.unescapeHtml4(textContent);
    }
}
