package org.apache.any23.extractor.microdata;

import com.gargoylesoftware.htmlunit.html.HtmlArea;
import com.gargoylesoftware.htmlunit.html.HtmlBlockQuote;
import com.gargoylesoftware.htmlunit.html.HtmlCitation;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Arrays;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import org.apache.any23.extractor.ExtractionContext;
import org.apache.any23.extractor.ExtractionException;
import org.apache.any23.extractor.ExtractionParameters;
import org.apache.any23.extractor.ExtractionResult;
import org.apache.any23.extractor.Extractor;
import org.apache.any23.extractor.ExtractorDescription;
import org.apache.any23.extractor.ExtractorFactory;
import org.apache.any23.extractor.IssueReport;
import org.apache.any23.extractor.SimpleExtractorFactory;
import org.apache.any23.extractor.html.DomUtils;
import org.apache.any23.extractor.microdata.ItemPropValue;
import org.apache.any23.rdf.PopularPrefixes;
import org.apache.any23.rdf.RDFUtils;
import org.apache.any23.vocab.DCTERMS;
import org.apache.any23.vocab.XHTML;
import org.openrdf.model.Resource;
import org.openrdf.model.URI;
import org.openrdf.model.Value;
import org.openrdf.model.vocabulary.RDF;
import org.openrdf.model.vocabulary.XMLSchema;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

/* loaded from: input_file:org/apache/any23/extractor/microdata/MicrodataExtractor.class */
public class MicrodataExtractor implements Extractor.TagSoupDOMExtractor {
    private static final URI MICRODATA_ITEM = RDFUtils.uri("http://www.w3.org/1999/xhtml/microdata#item");
    public static final ExtractorFactory<MicrodataExtractor> factory = SimpleExtractorFactory.create("html-microdata", PopularPrefixes.createSubset("rdf", "doac", "foaf"), Arrays.asList("text/html;q=0.1", "application/xhtml+xml;q=0.1"), "example-microdata.html", MicrodataExtractor.class);
    private String documentLanguage;
    private boolean isStrict;
    private String defaultNamespace;

    @Override // org.apache.any23.extractor.Extractor
    public ExtractorDescription getDescription() {
        return factory;
    }

    @Override // org.apache.any23.extractor.Extractor
    public void run(ExtractionParameters extractionParameters, ExtractionContext extractionContext, Document document, ExtractionResult extractionResult) throws IOException, ExtractionException {
        MicrodataParserReport microdata = MicrodataParser.getMicrodata(document);
        if (microdata.getErrors().length > 0) {
            notifyError(microdata.getErrors(), extractionResult);
        }
        ItemScope[] detectedItemScopes = microdata.getDetectedItemScopes();
        if (detectedItemScopes.length == 0) {
            return;
        }
        this.isStrict = extractionParameters.getFlag("any23.microdata.strict");
        if (!this.isStrict) {
            this.defaultNamespace = extractionParameters.getProperty("any23.microdata.ns.default");
        }
        this.documentLanguage = getDocumentLanguage(document);
        URI documentURI = extractionContext.getDocumentURI();
        HashMap hashMap = new HashMap();
        for (ItemScope itemScope : detectedItemScopes) {
            extractionResult.writeTriple(documentURI, MICRODATA_ITEM, processType(itemScope, documentURI, extractionResult, hashMap));
        }
        processTitle(document, documentURI, extractionResult);
        processHREFElements(document, documentURI, extractionResult);
        processMetaElements(document, documentURI, extractionResult);
        processCiteElements(document, documentURI, extractionResult);
    }

    private String getDocumentLanguage(Document document) {
        String find = DomUtils.find(document, "string(/HTML/@lang)");
        if (find.equals("")) {
            return null;
        }
        return find;
    }

    private String getLanguage(Node node) {
        Node namedItem = node.getAttributes().getNamedItem("lang");
        return namedItem == null ? this.documentLanguage : namedItem.getTextContent();
    }

    private void processTitle(Document document, URI uri, ExtractionResult extractionResult) {
        NodeList elementsByTagName = document.getElementsByTagName("title");
        if (elementsByTagName.getLength() == 1) {
            Node item = elementsByTagName.item(0);
            String textContent = item.getTextContent();
            String language = getLanguage(item);
            extractionResult.writeTriple(uri, DCTERMS.getInstance().title, language == null ? RDFUtils.literal(textContent) : RDFUtils.literal(textContent, language));
        }
    }

    private void processHREFElements(Document document, URI uri, ExtractionResult extractionResult) {
        NodeList elementsByTagName = document.getElementsByTagName("a");
        for (int i = 0; i < elementsByTagName.getLength(); i++) {
            processHREFElement(elementsByTagName.item(i), uri, extractionResult);
        }
        NodeList elementsByTagName2 = document.getElementsByTagName(HtmlArea.TAG_NAME);
        for (int i2 = 0; i2 < elementsByTagName2.getLength(); i2++) {
            processHREFElement(elementsByTagName2.item(i2), uri, extractionResult);
        }
        NodeList elementsByTagName3 = document.getElementsByTagName("link");
        for (int i3 = 0; i3 < elementsByTagName3.getLength(); i3++) {
            processHREFElement(elementsByTagName3.item(i3), uri, extractionResult);
        }
    }

    private void processHREFElement(Node node, URI uri, ExtractionResult extractionResult) {
        Node namedItem;
        URL absoluteURL;
        Node namedItem2 = node.getAttributes().getNamedItem("rel");
        if (namedItem2 == null || (namedItem = node.getAttributes().getNamedItem("href")) == null) {
            return;
        }
        if (isAbsoluteURL(namedItem.getTextContent())) {
            try {
                absoluteURL = new URL(namedItem.getTextContent());
            } catch (MalformedURLException e) {
                return;
            }
        } else {
            try {
                absoluteURL = toAbsoluteURL(uri.toString(), namedItem.getTextContent(), '/');
            } catch (MalformedURLException e2) {
                return;
            }
        }
        String[] split = namedItem2.getTextContent().split(" ");
        HashSet<String> hashSet = new HashSet();
        for (String str : split) {
            if (!str.contains(":")) {
                if (str.equals("alternate") || str.equals("stylesheet")) {
                    hashSet.add("ALTERNATE-STYLESHEET");
                } else {
                    hashSet.add(str.toLowerCase());
                }
            }
        }
        for (String str2 : hashSet) {
            extractionResult.writeTriple(uri, isAbsoluteURL(str2) ? RDFUtils.uri(str2) : RDFUtils.uri(XHTML.NS + str2), RDFUtils.uri(absoluteURL.toString()));
        }
    }

    private void processMetaElements(Document document, URI uri, ExtractionResult extractionResult) {
        NodeList elementsByTagName = document.getElementsByTagName("meta");
        for (int i = 0; i < elementsByTagName.getLength(); i++) {
            Node item = elementsByTagName.item(i);
            String readAttribute = DomUtils.readAttribute(item, "name", null);
            String readAttribute2 = DomUtils.readAttribute(item, "content", null);
            if (readAttribute != null && readAttribute2 != null) {
                if (isAbsoluteURL(readAttribute)) {
                    processMetaElement(RDFUtils.uri(readAttribute), readAttribute2, getLanguage(item), uri, extractionResult);
                } else {
                    processMetaElement(readAttribute, readAttribute2, getLanguage(item), uri, extractionResult);
                }
            }
        }
    }

    private void processMetaElement(URI uri, String str, String str2, URI uri2, ExtractionResult extractionResult) {
        if (str.contains(":")) {
            return;
        }
        extractionResult.writeTriple(uri2, uri, str2 == null ? RDFUtils.literal(str) : RDFUtils.literal(str, str2));
    }

    private void processMetaElement(String str, String str2, String str3, URI uri, ExtractionResult extractionResult) {
        extractionResult.writeTriple(uri, RDFUtils.uri(XHTML.NS + str.toLowerCase()), str3 == null ? RDFUtils.literal(str2) : RDFUtils.literal(str2, str3));
    }

    private void processCiteElements(Document document, URI uri, ExtractionResult extractionResult) {
        NodeList elementsByTagName = document.getElementsByTagName(HtmlBlockQuote.TAG_NAME);
        for (int i = 0; i < elementsByTagName.getLength(); i++) {
            processCiteElement(elementsByTagName.item(i), uri, extractionResult);
        }
        NodeList elementsByTagName2 = document.getElementsByTagName("q");
        for (int i2 = 0; i2 < elementsByTagName2.getLength(); i2++) {
            processCiteElement(elementsByTagName2.item(i2), uri, extractionResult);
        }
    }

    private void processCiteElement(Node node, URI uri, ExtractionResult extractionResult) {
        if (node.getAttributes().getNamedItem(HtmlCitation.TAG_NAME) != null) {
            extractionResult.writeTriple(uri, DCTERMS.getInstance().source, RDFUtils.uri(node.getAttributes().getNamedItem(HtmlCitation.TAG_NAME).getTextContent()));
        }
    }

    private Resource processType(ItemScope itemScope, URI uri, ExtractionResult extractionResult, Map<ItemScope, Resource> map) throws ExtractionException {
        Resource uri2 = map.containsKey(itemScope) ? map.get(itemScope) : isAbsoluteURL(itemScope.getItemId()) ? RDFUtils.uri(itemScope.getItemId()) : RDFUtils.getBNode(Integer.toString(itemScope.hashCode()));
        map.put(itemScope, uri2);
        String str = "";
        if (itemScope.getType() != null) {
            extractionResult.writeTriple(uri2, RDF.TYPE, RDFUtils.uri(itemScope.getType().toString()));
            str = itemScope.getType().toString();
        }
        for (String str2 : itemScope.getProperties().keySet()) {
            for (ItemProp itemProp : itemScope.getProperties().get(str2)) {
                try {
                    processProperty(uri2, str2, itemProp, str, uri, map, extractionResult);
                } catch (MalformedURLException e) {
                    throw new ExtractionException("Error while processing on subject '" + uri2 + "' the itemProp: '" + itemProp + "' ");
                }
            }
        }
        return uri2;
    }

    private void processProperty(Resource resource, String str, ItemProp itemProp, String str2, URI uri, Map<ItemScope, Resource> map, ExtractionResult extractionResult) throws MalformedURLException, ExtractionException {
        Value literal;
        if (!isAbsoluteURL(str) && str2.equals("") && this.isStrict) {
            return;
        }
        URI uri2 = (isAbsoluteURL(str) || !str2.equals("") || this.isStrict) ? RDFUtils.uri(toAbsoluteURL(str2, str, '/').toString()) : RDFUtils.uri(toAbsoluteURL(this.defaultNamespace, str, '/').toString());
        Object content = itemProp.getValue().getContent();
        ItemPropValue.Type type = itemProp.getValue().getType();
        if (type.equals(ItemPropValue.Type.Nested)) {
            literal = processType((ItemScope) content, uri, extractionResult, map);
        } else if (type.equals(ItemPropValue.Type.Plain)) {
            literal = RDFUtils.literal((String) content, this.documentLanguage);
        } else if (type.equals(ItemPropValue.Type.Link)) {
            literal = RDFUtils.uri(toAbsoluteURL(uri.toString(), (String) content, '/').toString());
        } else {
            if (!type.equals(ItemPropValue.Type.Date)) {
                throw new RuntimeException("Invalid Type '" + type + "' for ItemPropValue with name: '" + str + "'");
            }
            literal = RDFUtils.literal(ItemPropValue.formatDateTime((Date) content), XMLSchema.DATE);
        }
        extractionResult.writeTriple(resource, uri2, literal);
    }

    private boolean isAbsoluteURL(String str) {
        boolean z = false;
        try {
            String protocol = new URL(str).getProtocol();
            if (protocol != null) {
                if (protocol.trim().length() > 0) {
                    z = true;
                }
            }
            return z;
        } catch (MalformedURLException e) {
            return false;
        }
    }

    private URL toAbsoluteURL(String str, String str2, char c) throws MalformedURLException {
        if (isAbsoluteURL(str2)) {
            return new URL(str2);
        }
        char charAt = str.charAt(str.length() - 1);
        return (charAt == '#' || charAt == '/') ? new URL(str + str2) : new URL(str + c + str2);
    }

    private void notifyError(MicrodataParserException[] microdataParserExceptionArr, ExtractionResult extractionResult) {
        for (MicrodataParserException microdataParserException : microdataParserExceptionArr) {
            extractionResult.notifyIssue(IssueReport.IssueLevel.Error, microdataParserException.toJSON(), microdataParserException.getErrorLocationBeginRow(), microdataParserException.getErrorLocationBeginCol());
        }
    }
}
