package net.ontopia.topicmaps.classify;

import java.util.Arrays;
import org.ccil.cowan.tagsoup.Parser;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;

/* loaded from: input_file:net/ontopia/topicmaps/classify/HTMLFormatModule.class */
public class HTMLFormatModule extends XMLFormatModule {
    protected byte[][] magicBytes = FormatModule.getBytes(new String[]{"<HTML", "<html", "<!DOCTYPE html ", "<!DOCTYPE HTML ", "<!DOCTYPE HTML ", "<!doctype html "});

    /* loaded from: input_file:net/ontopia/topicmaps/classify/HTMLFormatModule$HTMLHandler.class */
    private class HTMLHandler extends DefaultHandler {
        private TextHandlerIF thandler;
        private int skipLevel;

        private HTMLHandler(TextHandlerIF textHandlerIF) {
            this.thandler = textHandlerIF;
        }

        @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
        public void startElement(String str, String str2, String str3, Attributes attributes) throws SAXException {
            if (HTMLFormatModule.this.skipElements != null && HTMLFormatModule.this.skipElements.contains(str2)) {
                this.skipLevel++;
            } else if (this.skipLevel == 0) {
                this.thandler.startRegion(str2);
            }
        }

        @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
        public void characters(char[] cArr, int i, int i2) {
            if (this.skipLevel == 0) {
                this.thandler.text(cArr, i, i2);
            }
        }

        @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
        public void endElement(String str, String str2, String str3) throws SAXException {
            if (HTMLFormatModule.this.skipElements != null && HTMLFormatModule.this.skipElements.contains(str2)) {
                this.skipLevel--;
            } else if (this.skipLevel == 0) {
                this.thandler.endRegion();
            }
        }
    }

    public HTMLFormatModule() {
        this.extensions = new String[]{".htm", ".html", ".xhtml", ".shtml"};
        setSkipElements(Arrays.asList("style", "STYLE", "pre", "PRE", "script", "SCRIPT"));
    }

    @Override // net.ontopia.topicmaps.classify.XMLFormatModule, net.ontopia.topicmaps.classify.FormatModuleIF
    public boolean matchesContent(ClassifiableContentIF classifiableContentIF) {
        return FormatModule.startsWithSkipWhitespace(classifiableContentIF.getContent(), this.magicBytes);
    }

    @Override // net.ontopia.topicmaps.classify.XMLFormatModule
    protected XMLReader createXMLReader() throws SAXException {
        return new Parser();
    }

    @Override // net.ontopia.topicmaps.classify.XMLFormatModule
    protected ContentHandler getContentHandler(TextHandlerIF textHandlerIF) {
        return new HTMLHandler(textHandlerIF);
    }
}
