package uk.ac.shef.dcs.sti.TODO.gs;

import com.gargoylesoftware.htmlunit.html.HtmlTableColumn;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.List;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.apache.any23.extractor.html.DomUtils;
import org.apache.any23.util.FileUtils;
import org.apache.commons.io.filefilter.SuffixFileFilter;
import org.apache.hadoop.util.StringUtils;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
import uk.ac.shef.dcs.kbsearch.model.Clazz;
import uk.ac.shef.dcs.sti.STIEnum;
import uk.ac.shef.dcs.sti.core.model.TCell;
import uk.ac.shef.dcs.sti.core.model.TColumnHeader;
import uk.ac.shef.dcs.sti.core.model.TColumnHeaderAnnotation;
import uk.ac.shef.dcs.sti.core.model.TContext;
import uk.ac.shef.dcs.sti.core.model.Table;

/* loaded from: input_file:uk/ac/shef/dcs/sti/TODO/gs/GS_Stats_Limaye.class */
public class GS_Stats_Limaye {
    public static void main(String[] strArr) throws IOException, SAXException, ParserConfigurationException {
        String str = strArr[0];
        String replaceAll = strArr[1].replaceAll("\\\\", "/");
        for (File file : FileUtils.listFilesRecursively(new File(str), new SuffixFileFilter(".xml"))) {
            String str2 = replaceAll + file.getAbsolutePath().replaceAll("\\\\", "/").substring(str.length());
            if (new File(str2).exists()) {
                System.out.println(file);
                readTable(file.toString(), str2.toString(), "E:\\Data\\table annotation\\workspace\\WWT_GroundTruth\\annotation/stats.csv");
            } else {
                System.err.println("clean file for annotation does not exist: " + str2);
            }
        }
        System.out.println();
    }

    public static Table readTable(String str, String str2, String str3) throws IOException, ParserConfigurationException, SAXException {
        Table table;
        List<Node> findAllByTag;
        String textContent;
        DocumentBuilder newDocumentBuilder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
        Document parse = newDocumentBuilder.parse(str);
        List<Node> findAll = DomUtils.findAll(parse, "//logicalTable/content");
        if (findAll == null || findAll.size() == 0) {
            return null;
        }
        boolean z = false;
        ArrayList<String[]> arrayList = new ArrayList();
        NodeList childNodes = findAll.get(0).getChildNodes();
        for (int i = 0; i < childNodes.getLength(); i++) {
            Node item = childNodes.item(i);
            if (!item.getNodeName().equals("#text")) {
                if (item.getNodeName().equals("header")) {
                    z = true;
                }
                List<Node> findAll2 = DomUtils.findAll(item, "cell");
                String[] strArr = new String[findAll2.size()];
                for (int i2 = 0; i2 < findAll2.size(); i2++) {
                    List<Node> findAll3 = DomUtils.findAll(findAll2.get(i2), "text");
                    String str4 = "";
                    if (findAll3 != null && findAll3.size() > 0) {
                        str4 = findAll3.get(0).getTextContent();
                    }
                    strArr[i2] = str4;
                }
                arrayList.add(strArr);
            }
        }
        int i3 = 0;
        for (String[] strArr2 : arrayList) {
            if (strArr2.length > i3) {
                i3 = strArr2.length;
            }
        }
        int i4 = 0;
        if (z) {
            table = new Table(String.valueOf(str.hashCode()), str, arrayList.size() - 1, i3);
            i4 = 1;
            for (int i5 = 0; i5 < i3; i5++) {
                table.setColumnHeader(i5, new TColumnHeader(((String[]) arrayList.get(0))[i5]));
            }
        } else {
            table = new Table(String.valueOf(str.hashCode()), str, arrayList.size(), i3);
            for (int i6 = 0; i6 < i3; i6++) {
                table.setColumnHeader(i6, new TColumnHeader(STIEnum.TABLE_HEADER_UNKNOWN.getValue()));
            }
        }
        for (int i7 = i4; i7 < arrayList.size(); i7++) {
            String[] strArr3 = (String[]) arrayList.get(i7);
            for (int i8 = 0; i8 < strArr3.length; i8++) {
                table.setContentCell(i7 - i4, i8, new TCell(strArr3[i8]));
            }
        }
        List<Node> findAll4 = DomUtils.findAll(parse, "//logicalTable/tableContext");
        if (findAll4 != null || findAll4.size() != 0) {
            NodeList childNodes2 = findAll4.get(0).getChildNodes();
            int i9 = 0;
            while (i9 < childNodes2.getLength()) {
                Node item2 = childNodes2.item(i9);
                if (!item2.getNodeName().equals("#text") && (findAllByTag = DomUtils.findAllByTag(item2, "text")) != null && findAllByTag.size() > 0 && (textContent = findAllByTag.get(0).getTextContent()) != null) {
                    table.addContext(i9 == 1 ? new TContext(textContent, TContext.TableContextType.PAGETITLE, 1.0d) : new TContext(textContent, TContext.TableContextType.PARAGRAPH_BEFORE, 1.0d));
                }
                i9++;
            }
        }
        if (table.getContexts().size() > 1) {
            table.getContexts().remove(1);
        }
        if (str2 == null) {
            return table;
        }
        Document parse2 = newDocumentBuilder.parse(str2);
        List<Node> findAll5 = DomUtils.findAll(parse2, "//columnAnnotations/colAnnos");
        String str5 = String.valueOf(findAll5.size()) + StringUtils.COMMA_STR;
        for (int i10 = 0; i10 < findAll5.size(); i10++) {
            Node node = findAll5.get(i10);
            int intValue = Integer.valueOf(node.getAttributes().getNamedItem(HtmlTableColumn.TAG_NAME).getTextContent()).intValue();
            NodeList childNodes3 = node.getChildNodes();
            ArrayList arrayList2 = new ArrayList();
            for (int i11 = 0; i11 < childNodes3.getLength(); i11++) {
                Node item3 = childNodes3.item(i11);
                if (item3.getNodeName().equals("anno")) {
                    arrayList2.add(new TColumnHeaderAnnotation(table.getColumnHeader(intValue).getHeaderText(), new Clazz(item3.getAttributes().getNamedItem("name").getTextContent(), item3.getAttributes().getNamedItem("name").getTextContent()), Double.valueOf(item3.getAttributes().getNamedItem("value").getTextContent().trim()).doubleValue()));
                }
            }
            table.getTableAnnotations().setHeaderAnnotation(intValue, (TColumnHeaderAnnotation[]) arrayList2.toArray(new TColumnHeaderAnnotation[0]));
        }
        List<Node> findAll6 = DomUtils.findAll(parse2, "//cellAnnotatoons/row");
        int i12 = 0;
        for (int i13 = 0; i13 < findAll6.size(); i13++) {
            List<Node> findAll7 = DomUtils.findAll(findAll6.get(i13), "entity");
            for (int i14 = 0; i14 < findAll7.size(); i14++) {
                Node node2 = findAll7.get(i14);
                if (node2.getTextContent() != null && node2.getTextContent().length() != 0) {
                    i12++;
                }
            }
        }
        String str6 = str5 + i12;
        PrintWriter printWriter = new PrintWriter(new FileWriter(str3, true));
        printWriter.println(str6);
        printWriter.close();
        return table;
    }
}
