package uk.ac.shef.dcs.sti.parser.table;

import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.util.List;
import org.apache.any23.extractor.html.TagSoupParser;
import org.apache.commons.io.FileUtils;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import uk.ac.shef.dcs.sti.STIException;
import uk.ac.shef.dcs.sti.core.model.TContext;
import uk.ac.shef.dcs.sti.core.model.Table;
import uk.ac.shef.dcs.sti.parser.table.creator.TableObjCreator;
import uk.ac.shef.dcs.sti.parser.table.hodetector.TableHODetector;
import uk.ac.shef.dcs.sti.parser.table.normalizer.TableNormalizer;
import uk.ac.shef.dcs.sti.parser.table.validator.TableValidator;

/* loaded from: input_file:uk/ac/shef/dcs/sti/parser/table/TableParser.class */
public abstract class TableParser {
    protected TableNormalizer normalizer;
    protected TableHODetector hoDetector;
    protected TableObjCreator creator;
    protected TableValidator[] validators;
    protected TagSoupParser parser;

    public TableParser(TableNormalizer tableNormalizer, TableHODetector tableHODetector, TableObjCreator tableObjCreator, TableValidator... tableValidatorArr) {
        this.normalizer = tableNormalizer;
        this.hoDetector = tableHODetector;
        this.creator = tableObjCreator;
        this.validators = tableValidatorArr;
    }

    public abstract List<Table> extract(String str, String str2) throws STIException;

    public Table extractTable(Node node, String str, String str2, TContext... tContextArr) {
        List<List<Node>> normalize = this.normalizer.normalize(node);
        if (normalize.size() == 0) {
            return null;
        }
        Table create = this.creator.create(this.hoDetector.detect(normalize), str, str2, tContextArr);
        for (TableValidator tableValidator : this.validators) {
            if (!tableValidator.validate(create)) {
                return null;
            }
        }
        return create;
    }

    public static void serialize(Table table, String str) throws IOException {
        File file = new File(str);
        if (!file.exists()) {
            file.mkdirs();
        }
        FileOutputStream fileOutputStream = new FileOutputStream(str + File.separator + table.getSourceId().replaceAll("[^\\d\\w]", "_") + "_" + table.getTableId());
        ObjectOutputStream objectOutputStream = new ObjectOutputStream(fileOutputStream);
        objectOutputStream.writeObject(table);
        objectOutputStream.close();
        fileOutputStream.close();
    }

    public static Table deserialize(String str) throws IOException, ClassNotFoundException {
        FileInputStream fileInputStream = new FileInputStream(str);
        ObjectInputStream objectInputStream = new ObjectInputStream(fileInputStream);
        Table table = (Table) objectInputStream.readObject();
        objectInputStream.close();
        fileInputStream.close();
        return table;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public Document createDocument(String str, String str2) throws STIException {
        try {
            this.parser = new TagSoupParser(new ByteArrayInputStream(FileUtils.readFileToString(new File(str)).getBytes()), str2, "UTF-8");
            Document document = null;
            try {
                document = this.parser.getDOM();
            } catch (IOException e) {
            }
            return document;
        } catch (IOException e2) {
            throw new STIException(e2);
        }
    }
}
