package org.apache.tika.parser.html;

import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.Charset;
import org.apache.tika.exception.TikaException;
import org.apache.tika.io.CloseShieldInputStream;
import org.apache.tika.metadata.HttpHeaders;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;

/* loaded from: input_file:WEB-INF/lib/tika-parsers-0.5.jar:org/apache/tika/parser/html/HtmlParser.class */
public class HtmlParser implements Parser {
    @Override // org.apache.tika.parser.Parser
    public void parse(InputStream inputStream, ContentHandler contentHandler, Metadata metadata, ParseContext parseContext) throws IOException, SAXException, TikaException {
        InputSource inputSource = new InputSource(new CloseShieldInputStream(inputStream));
        String str = metadata.get(HttpHeaders.CONTENT_ENCODING);
        if (str != null && Charset.isSupported(str)) {
            inputSource.setEncoding(str);
        }
        org.ccil.cowan.tagsoup.Parser parser = new org.ccil.cowan.tagsoup.Parser();
        parser.setContentHandler(new XHTMLDowngradeHandler(new HtmlHandler(this, contentHandler, metadata)));
        parser.parse(inputSource);
    }

    @Override // org.apache.tika.parser.Parser
    public void parse(InputStream inputStream, ContentHandler contentHandler, Metadata metadata) throws IOException, SAXException, TikaException {
        parse(inputStream, contentHandler, metadata, new ParseContext());
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public String mapSafeElement(String str) {
        if ("H1".equals(str)) {
            return "h1";
        }
        if ("H2".equals(str)) {
            return "h2";
        }
        if ("H3".equals(str)) {
            return "h3";
        }
        if ("H4".equals(str)) {
            return "h4";
        }
        if ("H5".equals(str)) {
            return "h5";
        }
        if ("H6".equals(str)) {
            return "h6";
        }
        if ("P".equals(str)) {
            return "p";
        }
        if ("PRE".equals(str)) {
            return "pre";
        }
        if ("BLOCKQUOTE".equals(str)) {
            return "blockquote";
        }
        if ("UL".equals(str)) {
            return "ul";
        }
        if ("OL".equals(str)) {
            return "ol";
        }
        if ("MENU".equals(str)) {
            return "ul";
        }
        if ("LI".equals(str)) {
            return "li";
        }
        if ("DL".equals(str)) {
            return "dl";
        }
        if ("DT".equals(str)) {
            return "dt";
        }
        if ("DD".equals(str)) {
            return "dd";
        }
        if ("TABLE".equals(str)) {
            return "table";
        }
        if ("THEAD".equals(str)) {
            return "thead";
        }
        if ("TBODY".equals(str)) {
            return "tbody";
        }
        if ("TR".equals(str)) {
            return "tr";
        }
        if ("TH".equals(str)) {
            return "th";
        }
        if ("TD".equals(str)) {
            return "td";
        }
        return null;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public boolean isDiscardElement(String str) {
        return "STYLE".equals(str) || "SCRIPT".equals(str);
    }
}
