package uk.ac.shef.dcs.sti.TODO.evaluation;

import com.gargoylesoftware.htmlunit.html.HtmlTableHeaderCell;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.List;
import org.apache.any23.extractor.html.DomUtils;
import org.apache.any23.extractor.html.TagSoupParser;
import org.apache.hadoop.util.StringUtils;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

/* loaded from: input_file:uk/ac/shef/dcs/sti/TODO/evaluation/KeyFileGenerator_from_HTMLOutput.class */
public class KeyFileGenerator_from_HTMLOutput {
    protected TagSoupParser parser;

    public static void main(String[] strArr) throws FileNotFoundException {
        KeyFileGenerator_from_HTMLOutput keyFileGenerator_from_HTMLOutput = new KeyFileGenerator_from_HTMLOutput();
        for (File file : new File("E:\\Data\\table annotation\\freebase_crawl\\music_record_label\\musicbrainz_computed").listFiles()) {
            if (!file.toString().endsWith("attributes.html")) {
                keyFileGenerator_from_HTMLOutput.createKeyFiles(file.toString(), "E:\\Data\\table annotation\\freebase_crawl\\music_record_label\\musicbrainz_computed_reformatted");
            }
        }
    }

    public void createKeyFiles(String str, String str2) throws FileNotFoundException {
        this.parser = new TagSoupParser(new FileInputStream(str), str);
        Document document = null;
        try {
            document = this.parser.getDOM();
        } catch (IOException e) {
            e.printStackTrace();
        }
        File file = new File(str);
        System.out.println(file);
        String name = file.getName();
        if (name.endsWith("htm.html")) {
            name = name.substring(0, name.indexOf(".html")).trim();
        }
        String str3 = str2 + File.separator + name + ".header.keys";
        String str4 = str2 + File.separator + name + ".relation.keys";
        String str5 = str2 + File.separator + name + ".entity.keys";
        List<Node> findAll = DomUtils.findAll(document, "//TABLE");
        Node node = findAll.get(0);
        Node node2 = findAll.get(1);
        generateEntity_annotationKeys(node, str5);
        generateHeader_annotationKeys(node, str3);
        generateRelation_annotationKeys(node2, str4);
    }

    public void generateEntity_annotationKeys(Node node, String str) throws FileNotFoundException {
        PrintWriter printWriter = new PrintWriter(str);
        int i = -2;
        boolean z = false;
        for (Node node2 : DomUtils.findAll(node, "//TR")) {
            i++;
            if (!node2.getChildNodes().item(1).getNodeName().equalsIgnoreCase(HtmlTableHeaderCell.TAG_NAME)) {
                NodeList childNodes = node2.getChildNodes();
                int i2 = -1;
                int i3 = 0;
                String str2 = null;
                for (int i4 = 0; i4 < childNodes.getLength(); i4++) {
                    Node item = childNodes.item(i4);
                    if (item.getNodeType() != 3 && item.getNodeName().equals("TD")) {
                        if ((item.getAttributes() == null ? null : item.getAttributes().getNamedItem("bgcolor")) == null) {
                            if (item.getNodeName().equals("TD")) {
                                i2++;
                                String textContent = item.getTextContent();
                                if (textContent.indexOf("[") != -1 && textContent.indexOf("]") != -1) {
                                    i3++;
                                }
                            }
                        } else if (item.getNodeName().equals("TD")) {
                            i2++;
                            Node node3 = null;
                            try {
                                node3 = item.getChildNodes().item(1).getChildNodes().item(0);
                            } catch (NullPointerException e) {
                            }
                            if (node3 != null) {
                                str2 = (node3.getAttributes() == null ? null : node3.getAttributes().getNamedItem("href").getTextContent()).substring(23);
                            }
                            int i5 = i2 - i3;
                            if (str2 != null) {
                                printWriter.println(i + StringUtils.COMMA_STR + i5 + "=" + str2);
                            }
                        }
                    }
                }
                if (0 != 0) {
                    break;
                }
            } else if (z) {
                break;
            } else {
                z = true;
            }
        }
        printWriter.close();
    }

    public int generateRelation_annotationKeys(Node node, String str) throws FileNotFoundException {
        PrintWriter printWriter = new PrintWriter(str);
        List<Node> findAll = DomUtils.findAll(node, "//TR");
        int i = -1;
        int i2 = -1;
        for (int i3 = 0; i3 < findAll.size(); i3++) {
            NodeList childNodes = findAll.get(i3).getChildNodes();
            int i4 = -1;
            int i5 = 0;
            int i6 = 0;
            while (true) {
                if (i6 >= childNodes.getLength()) {
                    break;
                }
                Node item = childNodes.item(i6);
                if (item.getNodeName().equals("TH")) {
                    i4++;
                    Node namedItem = item.getAttributes() == null ? null : item.getAttributes().getNamedItem("bgcolor");
                    if (namedItem != null) {
                        if (namedItem.getTextContent().equals("yellow")) {
                            i = i4 - i5;
                            i2 = i3;
                            break;
                        }
                    } else if (!item.getTextContent().trim().equals("-")) {
                        i5++;
                    }
                }
                i6++;
            }
            if (i != -1) {
                break;
            }
        }
        NodeList childNodes2 = findAll.get(i2).getChildNodes();
        int i7 = -1;
        int i8 = 0;
        for (int i9 = 0; i9 < childNodes2.getLength(); i9++) {
            Node item2 = childNodes2.item(i9);
            if (item2.getNodeName().equals("TH")) {
                i7++;
                Node namedItem2 = item2.getAttributes() == null ? null : item2.getAttributes().getNamedItem("bgcolor");
                if (namedItem2 == null) {
                    if (!item2.getTextContent().trim().equals("-")) {
                        i8++;
                    }
                } else if (namedItem2.getTextContent().equals("yellow")) {
                    i7--;
                } else {
                    printWriter.println(i + StringUtils.COMMA_STR + (i7 - i8) + "=" + exractAnnotation(item2.getTextContent()));
                }
            }
        }
        printWriter.close();
        return i;
    }

    public void generateHeader_annotationKeys(Node node, String str) throws FileNotFoundException {
        PrintWriter printWriter = new PrintWriter(str);
        NodeList childNodes = DomUtils.findAll(node, "//TR").get(0).getChildNodes();
        int i = -1;
        int i2 = 0;
        for (int i3 = 0; i3 < childNodes.getLength(); i3++) {
            Node item = childNodes.item(i3);
            if (item.getNodeName().equalsIgnoreCase(HtmlTableHeaderCell.TAG_NAME)) {
                i++;
                if ((item.getAttributes() == null ? null : item.getAttributes().getNamedItem("bgcolor")) != null) {
                    printWriter.println((i - i2) + "=" + exractAnnotation(item.getTextContent()));
                } else if (!item.getTextContent().trim().equals("-")) {
                    i2++;
                }
            }
        }
        printWriter.close();
    }

    private String exractAnnotation(String str) {
        String trim = str.split("=")[0].trim();
        int indexOf = trim.indexOf("(");
        return trim.substring(0, indexOf == -1 ? trim.length() : indexOf).trim();
    }
}
