package uk.ac.shef.dcs.sti.parser.table;

import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.apache.any23.extractor.html.DomUtils;
import org.apache.any23.extractor.html.TagSoupParser;
import org.apache.commons.io.FileUtils;
import org.w3c.dom.Node;
import uk.ac.shef.dcs.sti.STIException;
import uk.ac.shef.dcs.sti.core.model.TContext;
import uk.ac.shef.dcs.sti.core.model.Table;
import uk.ac.shef.dcs.sti.parser.table.creator.TableObjCreator;
import uk.ac.shef.dcs.sti.parser.table.hodetector.TableHODetector;
import uk.ac.shef.dcs.sti.parser.table.normalizer.TableNormalizer;
import uk.ac.shef.dcs.sti.parser.table.validator.TableValidator;

/* loaded from: input_file:uk/ac/shef/dcs/sti/parser/table/TableParserHTML.class */
public class TableParserHTML extends TableParser {
    public TableParserHTML(TableNormalizer tableNormalizer, TableHODetector tableHODetector, TableObjCreator tableObjCreator, TableValidator... tableValidatorArr) {
        super(tableNormalizer, tableHODetector, tableObjCreator, tableValidatorArr);
    }

    @Override // uk.ac.shef.dcs.sti.parser.table.TableParser
    public List<Table> extract(String str, String str2) throws STIException {
        try {
            String readFileToString = FileUtils.readFileToString(new File(str));
            ArrayList arrayList = new ArrayList();
            this.parser = new TagSoupParser(new ByteArrayInputStream(readFileToString.getBytes()), str2, "UTF-8");
            try {
                int i = 0;
                Iterator it = DomUtils.findAll(this.parser.getDOM(), "//TABLE").iterator();
                while (it.hasNext()) {
                    i++;
                    Table extractTable = extractTable((Node) it.next(), String.valueOf(i), str2, new TContext[0]);
                    if (extractTable != null) {
                        arrayList.add(extractTable);
                    }
                }
                return arrayList;
            } catch (IOException e) {
                return arrayList;
            }
        } catch (IOException e2) {
            throw new STIException(e2);
        }
    }
}
