package uk.ac.shef.dcs.sti.parser.list;

import com.gargoylesoftware.htmlunit.html.HtmlBody;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.any23.extractor.html.DomUtils;
import org.apache.any23.extractor.html.TagSoupParser;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import uk.ac.shef.dcs.sti.parser.list.splitter.ListItemSplitter;
import uk.ac.shef.dcs.sti.parser.list.validator.ListValidator;

/* loaded from: input_file:uk/ac/shef/dcs/sti/parser/list/ListXtractorHTML.class */
public class ListXtractorHTML extends ListXtractor {
    private String[] listTagSelectors;

    public ListXtractorHTML(ListItemSplitter listItemSplitter, ListValidator... listValidatorArr) {
        super(listItemSplitter, listValidatorArr);
        this.listTagSelectors = new String[]{"UL", "OL"};
    }

    @Override // uk.ac.shef.dcs.sti.parser.list.ListXtractor
    public List extract(String str, String str2) {
        ArrayList arrayList = new ArrayList();
        this.parser = new TagSoupParser(new ByteArrayInputStream(str.getBytes()), str2);
        try {
            Document dom = this.parser.getDOM();
            int i = 0;
            for (String str3 : this.listTagSelectors) {
                for (Node node : DomUtils.findAllByTag(dom, str3)) {
                    i++;
                    if (isValidPosition(node)) {
                        uk.ac.shef.dcs.sti.core.model.List extractList = extractList(node, String.valueOf(i), str2, new String[0]);
                        if (extractList != null) {
                            arrayList.add(extractList);
                        }
                    }
                }
            }
            return arrayList;
        } catch (IOException e) {
            return arrayList;
        }
    }

    protected boolean isValidPosition(Node node) {
        Node parentNode = node.getParentNode();
        return parentNode == null || parentNode.getNodeName().equalsIgnoreCase(HtmlBody.TAG_NAME);
    }
}
