package uk.ac.shef.dcs.sti.parser.table.context;

import java.io.File;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.apache.any23.extractor.html.DomUtils;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import uk.ac.shef.dcs.sti.STIException;
import uk.ac.shef.dcs.sti.core.model.TContext;

/* loaded from: input_file:uk/ac/shef/dcs/sti/parser/table/context/TableContextExtractorGeneric.class */
public class TableContextExtractorGeneric implements TableContextExtractor {
    @Override // uk.ac.shef.dcs.sti.parser.table.context.TableContextExtractor
    public List<TContext> extract(File file, Document document) throws STIException {
        ArrayList arrayList = new ArrayList();
        List<Node> findAll = DomUtils.findAll(document, "//P");
        List<Node> findAll2 = DomUtils.findAll(document, "P");
        if (findAll.size() < findAll2.size()) {
            findAll = findAll2;
        }
        Iterator<Node> it = findAll.iterator();
        while (it.hasNext()) {
            String trim = it.next().getTextContent().trim();
            if (trim.length() >= 1) {
                arrayList.add(new TContext(trim, TContext.TableContextType.PARAGRAPH_BEFORE, 1.0d));
            }
        }
        Iterator<Node> it2 = DomUtils.findAll(document, "//TITLE").iterator();
        while (it2.hasNext()) {
            String trim2 = it2.next().getTextContent().trim();
            if (trim2.length() >= 1) {
                arrayList.add(new TContext(trim2, TContext.TableContextType.PAGETITLE, 1.0d));
            }
        }
        return arrayList;
    }
}
