package org.apache.any23.extractor.html;

import java.io.IOException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.any23.extractor.ExtractionContext;
import org.apache.any23.extractor.ExtractionException;
import org.apache.any23.extractor.ExtractionParameters;
import org.apache.any23.extractor.ExtractionResult;
import org.apache.any23.extractor.Extractor;
import org.apache.any23.extractor.ExtractorDescription;
import org.apache.any23.rdf.RDFUtils;
import org.apache.any23.vocab.SINDICE;
import org.openrdf.model.URI;
import org.openrdf.model.impl.LiteralImpl;
import org.openrdf.model.impl.URIImpl;
import org.w3c.dom.Document;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;

/* loaded from: input_file:WEB-INF/lib/apache-any23-core-1.0.jar:org/apache/any23/extractor/html/HTMLMetaExtractor.class */
public class HTMLMetaExtractor implements Extractor.TagSoupDOMExtractor {
    private static final SINDICE vSINDICE = SINDICE.getInstance();
    private URI profile;
    private Map<String, URI> prefixes = new HashMap();
    private String documentLang;

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:WEB-INF/lib/apache-any23-core-1.0.jar:org/apache/any23/extractor/html/HTMLMetaExtractor$Meta.class */
    public class Meta {
        private String xpath;
        private URI name;
        private String lang;
        private String content;

        public Meta(String str, URI uri, String str2) {
            this.xpath = str;
            this.name = uri;
            this.content = str2;
        }

        public Meta(HTMLMetaExtractor hTMLMetaExtractor, String str, URI uri, String str2, String str3) {
            this(str, uri, str2);
            this.lang = str3;
        }

        public URI getName() {
            return this.name;
        }

        public void setName(URI uri) {
            this.name = uri;
        }

        public String getLang() {
            return this.lang;
        }

        public void setLang(String str) {
            this.lang = str;
        }

        public String getContent() {
            return this.content;
        }

        public void setContent(String str) {
            this.content = str;
        }

        public boolean equals(Object obj) {
            if (this == obj) {
                return true;
            }
            if (obj == null || getClass() != obj.getClass()) {
                return false;
            }
            Meta meta = (Meta) obj;
            return this.xpath != null ? this.xpath.equals(meta.xpath) : meta.xpath == null;
        }

        public int hashCode() {
            if (this.xpath != null) {
                return this.xpath.hashCode();
            }
            return 0;
        }
    }

    @Override // org.apache.any23.extractor.Extractor
    public void run(ExtractionParameters extractionParameters, ExtractionContext extractionContext, Document document, ExtractionResult extractionResult) throws IOException, ExtractionException {
        this.profile = extractProfile(document);
        this.documentLang = getDocumentLanguage(document);
        extractLinkDefinedPrefixes(document);
        SINDICE sindice = vSINDICE;
        String str = SINDICE.NS;
        if (this.profile != null) {
            str = this.profile.toString();
        }
        URI documentURI = extractionContext.getDocumentURI();
        for (Meta meta : extractMetaElement(document, str)) {
            String str2 = this.documentLang;
            if (meta.getLang() != null) {
                str2 = meta.getLang();
            }
            extractionResult.writeTriple(documentURI, meta.getName(), new LiteralImpl(meta.getContent(), str2));
        }
    }

    private String getDocumentLanguage(Document document) {
        String find = DomUtils.find(document, "string(/HTML/@lang)");
        if (find.equals("")) {
            return null;
        }
        return find;
    }

    private URI extractProfile(Document document) {
        String find = DomUtils.find(document, "string(/HTML/@profile)");
        if (find.equals("")) {
            return null;
        }
        return new URIImpl(find);
    }

    private void extractLinkDefinedPrefixes(Document document) {
        Iterator<Node> it = DomUtils.findAll(document, "/HTML/HEAD/LINK").iterator();
        while (it.hasNext()) {
            NamedNodeMap attributes = it.next().getAttributes();
            String textContent = attributes.getNamedItem("rel").getTextContent();
            String textContent2 = attributes.getNamedItem("href").getTextContent();
            if (textContent != null && textContent2 != null && RDFUtils.isAbsoluteURI(textContent2)) {
                this.prefixes.put(textContent, new URIImpl(textContent2));
            }
        }
    }

    private Set<Meta> extractMetaElement(Document document, String str) {
        List<Node> findAll = DomUtils.findAll(document, "/HTML/HEAD/META");
        HashSet hashSet = new HashSet();
        for (Node node : findAll) {
            NamedNodeMap attributes = node.getAttributes();
            Node namedItem = attributes.getNamedItem("name");
            Node namedItem2 = attributes.getNamedItem("content");
            if (namedItem != null && namedItem2 != null) {
                String textContent = namedItem.getTextContent();
                String textContent2 = namedItem2.getTextContent();
                String xPathForNode = DomUtils.getXPathForNode(node);
                URI prefixIfExists = getPrefixIfExists(textContent);
                if (prefixIfExists == null) {
                    prefixIfExists = new URIImpl(str + textContent);
                }
                hashSet.add(new Meta(xPathForNode, prefixIfExists, textContent2));
            }
        }
        return hashSet;
    }

    private URI getPrefixIfExists(String str) {
        String[] split = str.split("\\.");
        if (split.length == 2 && this.prefixes.containsKey(split[0])) {
            return new URIImpl(this.prefixes.get(split[0]) + split[1]);
        }
        return null;
    }

    @Override // org.apache.any23.extractor.Extractor
    public ExtractorDescription getDescription() {
        return HTMLMetaExtractorFactory.getDescriptionInstance();
    }
}
