package uk.ac.shef.dcs.sti.parser.table.creator;

import cern.colt.matrix.ObjectMatrix2D;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.LinkedHashSet;
import java.util.List;
import org.apache.any23.extractor.html.DomUtils;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import uk.ac.shef.dcs.kbsearch.model.Clazz;
import uk.ac.shef.dcs.kbsearch.model.Entity;
import uk.ac.shef.dcs.sti.STIEnum;
import uk.ac.shef.dcs.sti.core.model.TCell;
import uk.ac.shef.dcs.sti.core.model.TCellAnnotation;
import uk.ac.shef.dcs.sti.core.model.TColumnHeader;
import uk.ac.shef.dcs.sti.core.model.TColumnHeaderAnnotation;
import uk.ac.shef.dcs.sti.core.model.TContext;
import uk.ac.shef.dcs.sti.core.model.Table;
import uk.ac.shef.dcs.util.StringUtils;

/* loaded from: input_file:uk/ac/shef/dcs/sti/parser/table/creator/TableObjCreatorWikipedia.class */
public class TableObjCreatorWikipedia implements TableObjCreator {
    private boolean firstLinkOnlyFromListInCell;
    private boolean addAnnotations;

    public TableObjCreatorWikipedia(boolean z, boolean z2) {
        this.firstLinkOnlyFromListInCell = false;
        this.firstLinkOnlyFromListInCell = z;
        this.addAnnotations = z2;
    }

    @Override // uk.ac.shef.dcs.sti.parser.table.creator.TableObjCreator
    public Table create(ObjectMatrix2D objectMatrix2D, String str, String str2, TContext... tContextArr) {
        Table table = new Table(str, str2, objectMatrix2D.rows() - 1, objectMatrix2D.columns());
        for (TContext tContext : tContextArr) {
            table.addContext(tContext);
        }
        for (int i = 0; i < objectMatrix2D.columns(); i++) {
            Object obj = objectMatrix2D.get(0, i);
            if (obj == null) {
                table.setColumnHeader(i, new TColumnHeader(STIEnum.TABLE_HEADER_UNKNOWN.getValue()));
            } else {
                Node node = (Node) obj;
                String textContent = node.getTextContent();
                String xPathForNode = DomUtils.getXPathForNode(node);
                TColumnHeader tColumnHeader = new TColumnHeader(textContent);
                tColumnHeader.setHeaderXPath(xPathForNode);
                table.setColumnHeader(i, tColumnHeader);
                if (this.addAnnotations) {
                    ArrayList arrayList = new ArrayList();
                    List<Node> findAllByTag = DomUtils.findAllByTag(node, "A");
                    if (findAllByTag.size() > 0) {
                        for (Node node2 : findAllByTag) {
                            if (!node2.getParentNode().getNodeName().equalsIgnoreCase("sub") && !node2.getParentNode().getNodeName().equalsIgnoreCase("sup")) {
                                String str3 = null;
                                try {
                                    str3 = node2.getAttributes().getNamedItem("href").getNodeValue();
                                } catch (NullPointerException e) {
                                }
                                String textContent2 = node2.getTextContent();
                                if (textContent2.length() != 0) {
                                    arrayList.add(new TColumnHeaderAnnotation(textContent2, new Clazz(str3, str3), 1.0d));
                                }
                            }
                        }
                    }
                    table.getTableAnnotations().setHeaderAnnotation(i, (TColumnHeaderAnnotation[]) arrayList.toArray(new TColumnHeaderAnnotation[0]));
                }
            }
        }
        for (int i2 = 1; i2 < objectMatrix2D.rows(); i2++) {
            for (int i3 = 0; i3 < objectMatrix2D.columns(); i3++) {
                extract((Node) objectMatrix2D.get(i2, i3), i2, i3, table);
            }
        }
        return table;
    }

    private void extract(Node node, int i, int i2, Table table) {
        String cellTextByLinkAnchor = getCellTextByLinkAnchor(node);
        String str = "";
        for (int i3 = 0; i3 < cellTextByLinkAnchor.length(); i3++) {
            str = cellTextByLinkAnchor.charAt(i3) == 8211 ? str + "-" : str + cellTextByLinkAnchor.charAt(i3);
        }
        int i4 = i - 1;
        table.setContentCell(i4, i2, new TCell(str));
        if (this.addAnnotations) {
            LinkedHashSet linkedHashSet = new LinkedHashSet();
            List<Node> findAllByTag = DomUtils.findAllByTag(node, "A");
            if (findAllByTag.size() > 0) {
                for (Node node2 : findAllByTag) {
                    if (!node2.getParentNode().getNodeName().equalsIgnoreCase("sub") && !node2.getParentNode().getNodeName().equalsIgnoreCase("sup")) {
                        String str2 = null;
                        try {
                            str2 = node2.getAttributes().getNamedItem("href").getNodeValue();
                        } catch (NullPointerException e) {
                        }
                        String textContent = node2.getTextContent();
                        if (textContent.length() != 0) {
                            linkedHashSet.add(new TCellAnnotation(textContent, new Entity(str2, str2), 1.0d, new HashMap()));
                            if (this.firstLinkOnlyFromListInCell) {
                                break;
                            }
                        } else {
                            continue;
                        }
                    }
                }
            }
            table.getTableAnnotations().setContentCellAnnotations(i4, i2, (TCellAnnotation[]) linkedHashSet.toArray(new TCellAnnotation[0]));
        }
    }

    private String getCellTextByLinkAnchor(Node node) {
        NodeList childNodes = node.getChildNodes();
        String str = "";
        int i = 0;
        for (int i2 = 0; i2 < childNodes.getLength(); i2++) {
            Node item = childNodes.item(i2);
            if (item.getNodeName() != null && item.getNodeName().equalsIgnoreCase("A")) {
                str = str + item.getTextContent() + "|";
                i++;
            }
        }
        if (str.length() == 0) {
            for (int i3 = 0; i3 < childNodes.getLength(); i3++) {
                Node item2 = childNodes.item(i3);
                if (item2.getNodeName() != null && !item2.getNodeName().equalsIgnoreCase("SUP") && !item2.getNodeName().equalsIgnoreCase("SUB")) {
                    if (item2.getNodeName() == null || !item2.getNodeName().equalsIgnoreCase("SPAN")) {
                        str = str + item2.getTextContent() + "|";
                    } else {
                        try {
                            if (!item2.getAttributes().getNamedItem("class").getTextContent().equals("sortkey")) {
                                str = item2.getTextContent();
                            }
                        } catch (Exception e) {
                        }
                    }
                }
            }
        }
        if (str.endsWith("|")) {
            str = str.substring(0, str.length() - 1).trim();
        }
        if (this.firstLinkOnlyFromListInCell && i > 1) {
            String replaceAll = node.getTextContent().replaceAll("\\|", " ");
            String alphaNumericWhitechar = StringUtils.toAlphaNumericWhitechar(str);
            ArrayList arrayList = new ArrayList(Arrays.asList(StringUtils.toAlphaNumericWhitechar(replaceAll).split("\\s+")));
            new ArrayList(Arrays.asList(alphaNumericWhitechar.split("\\s+"))).retainAll(arrayList);
            if (r0.size() / arrayList.size() > 0.9d) {
                return str.split("\\|")[0].trim();
            }
        }
        return str.replaceAll("\\|", ", ");
    }
}
