package uk.ac.shef.dcs.sti.parser.list;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.util.List;
import org.apache.any23.extractor.html.TagSoupParser;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import uk.ac.shef.dcs.sti.core.model.ListItem;
import uk.ac.shef.dcs.sti.parser.list.splitter.ListItemSplitter;
import uk.ac.shef.dcs.sti.parser.list.validator.ListValidator;

/* loaded from: input_file:uk/ac/shef/dcs/sti/parser/list/ListXtractor.class */
public abstract class ListXtractor {
    private ListItemSplitter tokenizer;
    protected ListValidator[] validators;
    protected TagSoupParser parser;

    public ListXtractor(ListItemSplitter listItemSplitter, ListValidator... listValidatorArr) {
        this.validators = listValidatorArr;
        this.tokenizer = listItemSplitter;
    }

    public abstract List extract(String str, String str2);

    /* JADX INFO: Access modifiers changed from: protected */
    public uk.ac.shef.dcs.sti.core.model.List extractList(Node node, String str, String str2, String... strArr) {
        ListItem listItem;
        uk.ac.shef.dcs.sti.core.model.List list = new uk.ac.shef.dcs.sti.core.model.List(str2, str);
        for (String str3 : strArr) {
            list.addContext(str3);
        }
        NodeList childNodes = node.getChildNodes();
        for (int i = 0; i < childNodes.getLength(); i++) {
            Node item = childNodes.item(i);
            if (item.getNodeType() != 3 && (listItem = this.tokenizer.tokenize(item)) != null) {
                list.addItem(listItem);
            }
        }
        for (ListValidator listValidator : this.validators) {
            if (!listValidator.isValid(list)) {
                return null;
            }
        }
        return list;
    }

    public static void serialize(uk.ac.shef.dcs.sti.core.model.List list, String str) throws IOException {
        File file = new File(str);
        if (!file.exists()) {
            file.mkdirs();
        }
        FileOutputStream fileOutputStream = new FileOutputStream(str + File.separator + list.getSourceId().replaceAll("[^\\d\\w]", "_") + "_" + list.getListId());
        ObjectOutputStream objectOutputStream = new ObjectOutputStream(fileOutputStream);
        objectOutputStream.writeObject(list);
        objectOutputStream.close();
        fileOutputStream.close();
    }

    public static uk.ac.shef.dcs.sti.core.model.List deserialize(String str) throws IOException, ClassNotFoundException {
        FileInputStream fileInputStream = new FileInputStream(str);
        ObjectInputStream objectInputStream = new ObjectInputStream(fileInputStream);
        uk.ac.shef.dcs.sti.core.model.List list = (uk.ac.shef.dcs.sti.core.model.List) objectInputStream.readObject();
        objectInputStream.close();
        fileInputStream.close();
        return list;
    }
}
