package org.apache.any23.extractor.html;

import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.any23.extractor.ExtractionContext;
import org.apache.any23.extractor.ExtractionException;
import org.apache.any23.extractor.ExtractionParameters;
import org.apache.any23.extractor.ExtractionResult;
import org.apache.any23.extractor.Extractor;
import org.apache.any23.extractor.ExtractorDescription;
import org.apache.any23.extractor.rdf.JSONLDExtractor;
import org.apache.any23.extractor.rdf.JSONLDExtractorFactory;
import org.apache.any23.extractor.rdfa.RDFa11Parser;
import org.apache.any23.rdf.RDFUtils;
import org.apache.any23.vocab.SINDICE;
import org.apache.commons.io.IOUtils;
import org.eclipse.rdf4j.model.IRI;
import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
import org.w3c.dom.Document;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;

/* loaded from: input_file:org/apache/any23/extractor/html/EmbeddedJSONLDExtractor.class */
public class EmbeddedJSONLDExtractor implements Extractor.TagSoupDOMExtractor {
    private static final SINDICE vSINDICE = SINDICE.getInstance();
    private IRI profile;
    private Map<String, IRI> prefixes = new HashMap();
    private String documentLang;
    private JSONLDExtractor extractor;

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:org/apache/any23/extractor/html/EmbeddedJSONLDExtractor$JSONLDScript.class */
    public static class JSONLDScript {
        private String xpath;

        public JSONLDScript(String str, IRI iri, String str2) {
            this.xpath = str;
        }

        public boolean equals(Object obj) {
            if (this == obj) {
                return true;
            }
            if (obj == null || !(obj instanceof JSONLDScript)) {
                return false;
            }
            JSONLDScript jSONLDScript = (JSONLDScript) obj;
            return this.xpath != null ? this.xpath.equals(jSONLDScript.xpath) : jSONLDScript.xpath == null;
        }

        public int hashCode() {
            if (this.xpath != null) {
                return this.xpath.hashCode();
            }
            return 0;
        }
    }

    public void run(ExtractionParameters extractionParameters, ExtractionContext extractionContext, Document document, ExtractionResult extractionResult) throws IOException, ExtractionException {
        this.profile = extractProfile(document);
        this.documentLang = getDocumentLanguage(document);
        extractLinkDefinedPrefixes(document);
        SINDICE sindice = vSINDICE;
        String iri = this.profile != null ? this.profile.toString() : "http://vocab.sindice.net/any23#";
        extractionContext.getDocumentIRI();
        for (JSONLDScript jSONLDScript : extractJSONLDScript(document, iri, extractionParameters, extractionContext, extractionResult)) {
        }
    }

    private String getDocumentLanguage(Document document) {
        String find = DomUtils.find(document, "string(/HTML/@lang)");
        if ("".equals(find)) {
            return null;
        }
        return find;
    }

    private IRI extractProfile(Document document) {
        String find = DomUtils.find(document, "string(/HTML/@profile)");
        if ("".equals(find)) {
            return null;
        }
        return SimpleValueFactory.getInstance().createIRI(find);
    }

    private void extractLinkDefinedPrefixes(Document document) {
        Iterator<Node> it = DomUtils.findAll(document, "/HTML/HEAD/LINK").iterator();
        while (it.hasNext()) {
            NamedNodeMap attributes = it.next().getAttributes();
            Node namedItem = attributes.getNamedItem(RDFa11Parser.REL_ATTRIBUTE);
            String textContent = namedItem == null ? null : namedItem.getTextContent();
            Node namedItem2 = attributes.getNamedItem(RDFa11Parser.HREF_ATTRIBUTE);
            String textContent2 = namedItem2 == null ? null : namedItem2.getTextContent();
            if (textContent != null && textContent2 != null && RDFUtils.isAbsoluteIRI(textContent2)) {
                this.prefixes.put(textContent, SimpleValueFactory.getInstance().createIRI(textContent2));
            }
        }
    }

    private Set<JSONLDScript> extractJSONLDScript(Document document, String str, ExtractionParameters extractionParameters, ExtractionContext extractionContext, ExtractionResult extractionResult) throws IOException, ExtractionException {
        List<Node> findAll = DomUtils.findAll(document, "//SCRIPT");
        HashSet hashSet = new HashSet();
        this.extractor = new JSONLDExtractorFactory().m109createExtractor();
        for (Node node : findAll) {
            NamedNodeMap attributes = node.getAttributes();
            for (int i = 0; i < attributes.getLength(); i++) {
                if ("application/ld+json".equalsIgnoreCase(attributes.item(i).getTextContent())) {
                    this.extractor.run(extractionParameters, extractionContext, IOUtils.toInputStream(node.getTextContent(), StandardCharsets.UTF_8), extractionResult);
                }
            }
            Node namedItem = attributes.getNamedItem("name");
            Node namedItem2 = attributes.getNamedItem(RDFa11Parser.CONTENT_ATTRIBUTE);
            if (namedItem != null && namedItem2 != null) {
                String textContent = namedItem.getTextContent();
                String textContent2 = namedItem2.getTextContent();
                String xPathForNode = DomUtils.getXPathForNode(node);
                IRI prefixIfExists = getPrefixIfExists(textContent);
                if (prefixIfExists == null) {
                    prefixIfExists = SimpleValueFactory.getInstance().createIRI(str + textContent);
                }
                hashSet.add(new JSONLDScript(xPathForNode, prefixIfExists, textContent2));
            }
        }
        return hashSet;
    }

    private IRI getPrefixIfExists(String str) {
        String[] split = str.split("\\.");
        if (split.length == 2 && this.prefixes.containsKey(split[0])) {
            return SimpleValueFactory.getInstance().createIRI(this.prefixes.get(split[0]) + split[1]);
        }
        return null;
    }

    public ExtractorDescription getDescription() {
        return EmbeddedJSONLDExtractorFactory.getDescriptionInstance();
    }
}
