package de.l3s.boilerpipe.extractors;

import de.l3s.boilerpipe.BoilerpipeExtractor;
import de.l3s.boilerpipe.BoilerpipeProcessingException;
import de.l3s.boilerpipe.document.TextDocument;
import de.l3s.boilerpipe.sax.BoilerpipeSAXInput;
import de.l3s.boilerpipe.sax.HTMLFetcher;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.net.URL;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;

/* loaded from: input_file:resources/install/10/tika-bundle-1.14.jar:boilerpipe-1.1.0.jar:de/l3s/boilerpipe/extractors/ExtractorBase.class */
public abstract class ExtractorBase implements BoilerpipeExtractor {
    @Override // de.l3s.boilerpipe.BoilerpipeExtractor
    public String getText(String str) throws BoilerpipeProcessingException {
        try {
            return getText(new BoilerpipeSAXInput(new InputSource(new StringReader(str))).getTextDocument());
        } catch (SAXException e) {
            throw new BoilerpipeProcessingException(e);
        }
    }

    @Override // de.l3s.boilerpipe.BoilerpipeExtractor
    public String getText(InputSource inputSource) throws BoilerpipeProcessingException {
        try {
            return getText(new BoilerpipeSAXInput(inputSource).getTextDocument());
        } catch (SAXException e) {
            throw new BoilerpipeProcessingException(e);
        }
    }

    public String getText(URL url) throws BoilerpipeProcessingException {
        try {
            return getText(HTMLFetcher.fetch(url).toInputSource());
        } catch (IOException e) {
            throw new BoilerpipeProcessingException(e);
        }
    }

    @Override // de.l3s.boilerpipe.BoilerpipeExtractor
    public String getText(Reader reader) throws BoilerpipeProcessingException {
        return getText(new InputSource(reader));
    }

    @Override // de.l3s.boilerpipe.BoilerpipeExtractor
    public String getText(TextDocument textDocument) throws BoilerpipeProcessingException {
        process(textDocument);
        return textDocument.getContent();
    }
}
