package org.apache.tika.parser.html;

import de.l3s.boilerpipe.BoilerpipeExtractor;
import de.l3s.boilerpipe.BoilerpipeProcessingException;
import de.l3s.boilerpipe.document.TextBlock;
import de.l3s.boilerpipe.document.TextDocument;
import de.l3s.boilerpipe.extractors.DefaultExtractor;
import de.l3s.boilerpipe.sax.BoilerpipeHTMLContentHandler;
import java.io.Writer;
import org.apache.fontbox.ttf.HeaderTable;
import org.apache.tika.sax.WriteOutContentHandler;
import org.apache.tika.sax.XHTMLContentHandler;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.AttributesImpl;

/* loaded from: input_file:WEB-INF/lib/tika-parsers-0.8.jar:org/apache/tika/parser/html/BoilerpipeContentHandler.class */
public class BoilerpipeContentHandler extends BoilerpipeHTMLContentHandler {
    private static final char[] NL = {'\n'};
    private ContentHandler delegate;
    private BoilerpipeExtractor extractor;

    public BoilerpipeContentHandler(ContentHandler contentHandler) {
        this(contentHandler, DefaultExtractor.INSTANCE);
    }

    public BoilerpipeContentHandler(Writer writer) {
        this(new WriteOutContentHandler(writer));
    }

    public BoilerpipeContentHandler(ContentHandler contentHandler, BoilerpipeExtractor boilerpipeExtractor) {
        this.delegate = contentHandler;
        this.extractor = boilerpipeExtractor;
    }

    @Override // de.l3s.boilerpipe.sax.BoilerpipeHTMLContentHandler, org.xml.sax.ContentHandler
    public void endDocument() throws SAXException {
        super.endDocument();
        TextDocument textDocument = toTextDocument();
        try {
            this.extractor.process(textDocument);
            AttributesImpl attributesImpl = new AttributesImpl();
            this.delegate.startDocument();
            this.delegate.startPrefixMapping("", XHTMLContentHandler.XHTML);
            this.delegate.startElement(XHTMLContentHandler.XHTML, "html", "html", attributesImpl);
            this.delegate.startElement(XHTMLContentHandler.XHTML, HeaderTable.TAG, HeaderTable.TAG, attributesImpl);
            this.delegate.startElement(XHTMLContentHandler.XHTML, "title", "title", attributesImpl);
            if (textDocument.getTitle() != null) {
                char[] charArray = textDocument.getTitle().toCharArray();
                this.delegate.characters(charArray, 0, charArray.length);
                this.delegate.ignorableWhitespace(NL, 0, NL.length);
            }
            this.delegate.endElement(XHTMLContentHandler.XHTML, "title", "title");
            this.delegate.endElement(XHTMLContentHandler.XHTML, HeaderTable.TAG, HeaderTable.TAG);
            this.delegate.startElement(XHTMLContentHandler.XHTML, "body", "body", attributesImpl);
            for (TextBlock textBlock : textDocument.getTextBlocks()) {
                if (textBlock.isContent()) {
                    this.delegate.startElement(XHTMLContentHandler.XHTML, "p", "p", attributesImpl);
                    char[] charArray2 = textBlock.getText().toCharArray();
                    this.delegate.characters(charArray2, 0, charArray2.length);
                    this.delegate.endElement(XHTMLContentHandler.XHTML, "p", "p");
                    this.delegate.ignorableWhitespace(NL, 0, NL.length);
                }
            }
            this.delegate.endElement(XHTMLContentHandler.XHTML, "body", "body");
            this.delegate.endElement(XHTMLContentHandler.XHTML, "html", "html");
            this.delegate.endPrefixMapping("");
            this.delegate.endDocument();
        } catch (BoilerpipeProcessingException e) {
            throw new SAXException(e);
        }
    }
}
