package org.apache.any23.extractor.microdata;

import java.io.FileReader;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.any23.Any23;
import org.apache.any23.Any23OnlineTestBase;
import org.apache.any23.configuration.DefaultConfiguration;
import org.apache.any23.configuration.ModifiableConfiguration;
import org.apache.any23.extractor.ExtractionException;
import org.apache.any23.extractor.ExtractorFactory;
import org.apache.any23.extractor.IssueReport;
import org.apache.any23.extractor.html.AbstractExtractorTestCase;
import org.apache.any23.rdf.RDFUtils;
import org.apache.any23.source.HTTPDocumentSource;
import org.apache.any23.writer.TripleWriterHandler;
import org.eclipse.rdf4j.model.BNode;
import org.eclipse.rdf4j.model.IRI;
import org.eclipse.rdf4j.model.Literal;
import org.eclipse.rdf4j.model.Resource;
import org.eclipse.rdf4j.model.Statement;
import org.eclipse.rdf4j.model.Value;
import org.eclipse.rdf4j.model.impl.TreeModel;
import org.eclipse.rdf4j.model.util.Models;
import org.eclipse.rdf4j.model.vocabulary.RDF;
import org.eclipse.rdf4j.model.vocabulary.RDFS;
import org.eclipse.rdf4j.repository.RepositoryException;
import org.eclipse.rdf4j.rio.RDFFormat;
import org.eclipse.rdf4j.rio.RDFHandler;
import org.eclipse.rdf4j.rio.RDFHandlerException;
import org.eclipse.rdf4j.rio.RDFParseException;
import org.eclipse.rdf4j.rio.RDFParser;
import org.eclipse.rdf4j.rio.Rio;
import org.junit.Assert;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/apache/any23/extractor/microdata/MicrodataExtractorTest.class */
public class MicrodataExtractorTest extends AbstractExtractorTestCase {
    private static final Logger logger = LoggerFactory.getLogger(MicrodataExtractorTest.class);
    private static final List<String> ignoredOnlineTestNames = Arrays.asList("Test 0073", "Test 0074");

    /* loaded from: input_file:org/apache/any23/extractor/microdata/MicrodataExtractorTest$TestRDFHandler.class */
    public static class TestRDFHandler implements RDFHandler {
        private final List<Statement> statements = new ArrayList();

        protected List<Statement> getStatements() {
            return this.statements;
        }

        public void startRDF() throws RDFHandlerException {
        }

        public void endRDF() throws RDFHandlerException {
        }

        public void handleNamespace(String str, String str2) throws RDFHandlerException {
            throw new UnsupportedOperationException();
        }

        public void handleStatement(Statement statement) throws RDFHandlerException {
            this.statements.add(statement);
        }

        public void handleComment(String str) throws RDFHandlerException {
        }
    }

    @Override // org.apache.any23.extractor.html.AbstractExtractorTestCase
    protected ExtractorFactory<?> getExtractorFactory() {
        return new MicrodataExtractorFactory();
    }

    @Test
    public void testSchemaOrgNestedProps() throws RepositoryException, RDFHandlerException, IOException, RDFParseException, ExtractionException {
        extractAndVerifyAgainstNQuads("microdata-nested.html", "microdata-nested-expected.nquads");
        logger.debug(dumpModelToNQuads());
    }

    @Test
    public void testUnusedItemprop() {
        assertExtract("/microdata/unused-itemprop.html");
        assertContains((Resource) null, RDF.TYPE, (Value) RDFUtils.iri("http://schema.org/Offer"));
    }

    @Test
    public void testExample2() {
        assertExtract("/microdata/example2.html");
        assertContains((Resource) null, RDF.TYPE, (Value) RDFUtils.iri("http://microformats.org/profile/hcard"));
        assertContains((Resource) null, RDFUtils.iri("http://microformats.org/profile/hcard#given-name"), (Value) null);
        assertContains((Resource) null, RDFUtils.iri("http://microformats.org/profile/hcard#n"), (Value) null);
    }

    @Test
    public void testExample5() {
        assertExtract("/microdata/example5.html");
        assertContains((Resource) null, RDF.TYPE, (Value) RDFUtils.iri("http://schema.org/Person"));
        assertContains((Resource) null, RDF.TYPE, (Value) RDFUtils.iri("http://xmlns.com/foaf/0.1/Person"));
        assertContains((Resource) null, RDFUtils.iri("http://schema.org/additionalType"), (Value) RDFUtils.iri("http://xmlns.com/foaf/0.1/Person"));
        assertContains((Resource) null, RDFUtils.iri("http://schema.org/email"), (Value) RDFUtils.iri("mailto:mail@gmail.com"));
        assertContains((Resource) null, RDFUtils.iri("http://xmlns.com/foaf/0.1/mbox"), (Value) RDFUtils.iri("mailto:mail@gmail.com"));
    }

    private static Any23 createRunner(String str) {
        ModifiableConfiguration copy = DefaultConfiguration.copy();
        copy.setProperty("any23.microdata.strict", "on");
        Any23 any23 = new Any23(copy, new String[]{str});
        any23.setHTTPUserAgent("apache-any23-test-user-agent");
        return any23;
    }

    @Test
    public void runOnlineTests() throws Exception {
        Any23OnlineTestBase.assumeOnlineAllowed();
        Any23 createRunner = createRunner("rdf-turtle");
        HTTPDocumentSource hTTPDocumentSource = new HTTPDocumentSource(createRunner.getHTTPClient(), "https://w3c.github.io/microdata-rdf/tests/manifest.ttl");
        final HashMap hashMap = new HashMap(256);
        createRunner.extract(hTTPDocumentSource, new TripleWriterHandler() { // from class: org.apache.any23.extractor.microdata.MicrodataExtractorTest.1
            public void writeTriple(Resource resource, IRI iri, Value value, Resource resource2) {
                ((ArrayDeque) ((HashMap) hashMap.computeIfAbsent(resource, resource3 -> {
                    return new HashMap();
                })).computeIfAbsent(iri, iri2 -> {
                    return new ArrayDeque();
                })).add(value);
            }

            public void writeNamespace(String str, String str2) {
            }

            public void close() {
            }
        });
        Assert.assertFalse(hashMap.isEmpty());
        IRI iri = RDFUtils.iri("http://www.w3.org/2001/sw/DataAccess/tests/test-manifest#action");
        IRI iri2 = RDFUtils.iri("http://www.w3.org/2001/sw/DataAccess/tests/test-manifest#result");
        IRI iri3 = RDFUtils.iri("http://www.w3.org/2001/sw/DataAccess/tests/test-manifest#name");
        AtomicInteger atomicInteger = new AtomicInteger();
        AtomicInteger atomicInteger2 = new AtomicInteger();
        Map synchronizedMap = Collections.synchronizedMap(new TreeMap());
        hashMap.values().parallelStream().forEach(hashMap2 -> {
            boolean z;
            ArrayDeque arrayDeque = (ArrayDeque) hashMap2.get(RDF.TYPE);
            if (arrayDeque == null) {
                return;
            }
            Iterator it = arrayDeque.iterator();
            while (it.hasNext()) {
                Value value = (Value) it.next();
                if (value.stringValue().startsWith("http://www.w3.org/ns/rdftest#TestMicrodataNegative")) {
                    z = false;
                } else if (value.stringValue().startsWith("http://www.w3.org/ns/rdftest#TestMicrodata")) {
                    z = true;
                }
                IRI iri4 = (IRI) ((ArrayDeque) hashMap2.get(iri)).pop();
                IRI iri5 = (IRI) (hashMap2.containsKey(iri2) ? (Value) ((ArrayDeque) hashMap2.get(iri2)).pop() : null);
                String label = ((Literal) ((ArrayDeque) hashMap2.get(iri3)).pop()).getLabel();
                if (ignoredOnlineTestNames.contains(label)) {
                    atomicInteger2.incrementAndGet();
                    return;
                }
                try {
                    String str = label + ": " + ((Literal) ((ArrayDeque) hashMap2.get(RDFS.COMMENT)).pop()).getLabel();
                    final TreeModel treeModel = new TreeModel();
                    createRunner("html-microdata").extract(iri4.stringValue(), new TripleWriterHandler() { // from class: org.apache.any23.extractor.microdata.MicrodataExtractorTest.2
                        public void writeTriple(Resource resource, IRI iri6, Value value2, Resource resource2) {
                            if (MicrodataExtractor.MICRODATA_ITEM.equals(iri6)) {
                                return;
                            }
                            treeModel.add(resource, iri6, value2, new Resource[0]);
                        }

                        public void writeNamespace(String str2, String str3) {
                        }

                        public void close() {
                        }
                    });
                    final TreeModel treeModel2 = new TreeModel();
                    if (iri5 != null) {
                        createRunner("rdf-turtle").extract(iri5.stringValue(), new TripleWriterHandler() { // from class: org.apache.any23.extractor.microdata.MicrodataExtractorTest.3
                            public void writeTriple(Resource resource, IRI iri6, Value value2, Resource resource2) {
                                if ((value2 instanceof IRI) && value2.stringValue().equals("http://w3c.github.io/author/jd_salinger.html")) {
                                    value2 = RDFUtils.iri("https://w3c.github.io/author/jd_salinger.html");
                                }
                                treeModel2.add(resource, iri6, value2, new Resource[0]);
                            }

                            public void writeNamespace(String str2, String str3) {
                            }

                            public void close() {
                            }
                        });
                    }
                    if (z == Models.isomorphic(treeModel2, treeModel)) {
                        atomicInteger.incrementAndGet();
                    } else {
                        StringBuilder sb = new StringBuilder("\n" + str + "\n");
                        sb.append(iri4).append(z ? " ==> " : " =/=> ").append(iri5).append("\n");
                        HashMap hashMap2 = new HashMap();
                        AtomicInteger atomicInteger3 = new AtomicInteger();
                        int i = 0;
                        Iterator it2 = treeModel2.iterator();
                        while (it2.hasNext()) {
                            Statement statement = (Statement) it2.next();
                            Resource subject = statement.getSubject();
                            Value object = statement.getObject();
                            if (!treeModel.stream().noneMatch(statement2 -> {
                                return statement.getPredicate().equals(statement2.getPredicate()) && (!(subject instanceof BNode) ? !subject.equals(statement2.getSubject()) : !(statement2.getSubject() instanceof BNode)) && (!(object instanceof BNode) ? !object.equals(statement2.getObject()) : !(statement2.getObject() instanceof BNode));
                            })) {
                                i++;
                            } else if (z) {
                                sb.append("EXPECT: ").append(subject instanceof BNode ? hashMap2.computeIfAbsent(subject, value2 -> {
                                    return "_:" + atomicInteger3.getAndIncrement();
                                }) : subject).append(" ").append(statement.getPredicate()).append(" ").append(object instanceof BNode ? hashMap2.computeIfAbsent(object, value3 -> {
                                    return "_:" + atomicInteger3.getAndIncrement();
                                }) : object).append("\n");
                            }
                        }
                        sb.append("...").append(i).append(" statements in common...\n");
                        Iterator it3 = treeModel.iterator();
                        while (it3.hasNext()) {
                            Statement statement3 = (Statement) it3.next();
                            Resource subject2 = statement3.getSubject();
                            Value object2 = statement3.getObject();
                            if (treeModel2.stream().noneMatch(statement4 -> {
                                return statement3.getPredicate().equals(statement4.getPredicate()) && (!(subject2 instanceof BNode) ? !subject2.equals(statement4.getSubject()) : !(statement4.getSubject() instanceof BNode)) && (!(object2 instanceof BNode) ? !object2.equals(statement4.getObject()) : !(statement4.getObject() instanceof BNode));
                            }) && z) {
                                sb.append("ACTUAL: ").append(subject2 instanceof BNode ? hashMap2.computeIfAbsent(subject2, value4 -> {
                                    return "_:" + atomicInteger3.getAndIncrement();
                                }) : subject2).append(" ").append(statement3.getPredicate()).append(" ").append(object2 instanceof BNode ? hashMap2.computeIfAbsent(object2, value5 -> {
                                    return "_:" + atomicInteger3.getAndIncrement();
                                }) : object2).append("\n");
                            }
                        }
                        synchronizedMap.put(str, sb.toString());
                    }
                    return;
                } catch (Exception e) {
                    synchronizedMap.put(label, "\n" + e.toString() + "\n");
                    return;
                }
            }
        });
        if (logger.isDebugEnabled()) {
            logger.debug("passed=" + atomicInteger.get() + "; ignored=" + atomicInteger2.get());
        }
        if (synchronizedMap.isEmpty()) {
            return;
        }
        Assert.fail(synchronizedMap.size() + " failures out of " + (synchronizedMap.size() + atomicInteger.get()) + " total tests\n" + String.join("\n", synchronizedMap.keySet()) + "\n\n" + String.join("\n", synchronizedMap.values()));
    }

    @Test
    public void testMicrodataBasic() {
        assertExtract("/microdata/microdata-basic.html");
        assertModelNotEmpty();
        assertStatementsSize(null, null, null, 40);
        assertStatementsSize(RDFUtils.iri("urn:isbn:0-330-34032-8"), null, null, 4);
    }

    @Test
    public void testMicrodataMissingScheme() {
        assertExtract("/microdata/microdata-missing-scheme.html");
        assertModelNotEmpty();
        assertContains((Resource) null, RDF.TYPE, (Value) RDFUtils.iri("http://schema.org/Answer"));
    }

    @Test
    public void testMicrodataGoogleRichSnippet() throws RDFHandlerException, RepositoryException, IOException, RDFParseException {
        extractAndVerifyAgainstNQuads("microdata-richsnippet.html", "microdata-richsnippet-expected.nquads");
        logger.debug(dumpHumanReadableTriples());
    }

    @Test
    public void testExample5221() throws RDFHandlerException, RepositoryException, IOException, RDFParseException {
        extractAndVerifyAgainstNQuads("5.2.1-non-normative-example-1.html", "5.2.1-non-normative-example-1-expected.nquads");
        logger.debug(dumpHumanReadableTriples());
    }

    @Test
    public void testExample5222() throws RDFHandlerException, RepositoryException, IOException, RDFParseException {
        extractAndVerifyAgainstNQuads("5.2.1-non-normative-example-2.html", "5.2.1-non-normative-example-2-expected.nquads");
        logger.debug(dumpHumanReadableTriples());
    }

    @Test
    public void testExampleSchemaOrg1() throws RDFHandlerException, RepositoryException, IOException, RDFParseException {
        extractAndVerifyAgainstNQuads("schemaorg-example-1.html", "schemaorg-example-1-expected.nquads");
        logger.debug(dumpHumanReadableTriples());
    }

    @Test
    public void testExampleSchemaOrg2() throws RDFHandlerException, RepositoryException, IOException, RDFParseException {
        extractAndVerifyAgainstNQuads("schemaorg-example-2.html", "schemaorg-example-2-expected.nquads");
        logger.debug(dumpHumanReadableTriples());
    }

    @Test
    public void testMicrodataNestedUrlResolving() throws IOException {
        IRI iri = baseIRI;
        try {
            baseIRI = RDFUtils.iri("https://ruben.verborgh.org/tmp/schemaorg-test.html");
            extractAndVerifyAgainstNQuads("microdata-nested-url-resolving.html", "microdata-nested-url-resolving-expected.nquads");
        } finally {
            baseIRI = iri;
        }
    }

    @Test
    public void testTel() {
        assertExtract("/microdata/tel-test.html");
        assertModelNotEmpty();
        assertContains(RDFUtils.iri("http://schema.org/telephone"), (Resource) RDFUtils.iri("tel:(909)%20484-2020"));
    }

    @Test
    public void testBadTypes() throws IOException {
        extractAndVerifyAgainstNQuads("microdata-bad-types.html", "microdata-bad-types-expected.nquads");
    }

    @Test
    public void testBadPropertyNames() throws IOException {
        extractAndVerifyAgainstNQuads("microdata-bad-properties.html", "microdata-bad-properties-expected.nquads", false);
        assertIssue(IssueReport.IssueLevel.ERROR, ".*invalid property name ''.*\"path\" : \"/HTML\\[1\\]/BODY\\[1\\]/DIV\\[1\\]/DIV\\[2\\]/DIV\\[1\\]\".*");
    }

    private void extractAndVerifyAgainstNQuads(String str, String str2) throws RepositoryException, RDFHandlerException, IOException, RDFParseException {
        extractAndVerifyAgainstNQuads(str, str2, true);
    }

    private void extractAndVerifyAgainstNQuads(String str, String str2, boolean z) throws RepositoryException, RDFHandlerException, IOException, RDFParseException {
        assertExtract("/microdata/" + str, z);
        assertModelNotEmpty();
        logger.debug(dumpModelToNQuads());
        List<Statement> loadResultStatement = loadResultStatement("/microdata/" + str2);
        Assert.assertEquals(loadResultStatement.size(), getStatementsSize(null, null, null));
        for (Statement statement : loadResultStatement) {
            assertContains(statement.getSubject() instanceof BNode ? null : statement.getSubject(), statement.getPredicate(), statement.getObject() instanceof BNode ? null : statement.getObject());
        }
        TreeModel treeModel = new TreeModel();
        for (Statement statement2 : loadResultStatement) {
            treeModel.add(statement2.getSubject(), statement2.getPredicate(), statement2.getObject(), new Resource[0]);
        }
        final TreeModel treeModel2 = new TreeModel();
        this.conn.export(new RDFHandler() { // from class: org.apache.any23.extractor.microdata.MicrodataExtractorTest.4
            public void startRDF() throws RDFHandlerException {
            }

            public void endRDF() throws RDFHandlerException {
            }

            public void handleNamespace(String str3, String str4) throws RDFHandlerException {
            }

            public void handleStatement(Statement statement3) throws RDFHandlerException {
                treeModel2.add(statement3.getSubject(), statement3.getPredicate(), statement3.getObject(), new Resource[0]);
            }

            public void handleComment(String str3) throws RDFHandlerException {
            }
        }, new Resource[0]);
        Assert.assertTrue("Models are not isomorphic", Models.isomorphic(treeModel, treeModel2));
    }

    private List<Statement> loadResultStatement(String str) throws RDFHandlerException, IOException, RDFParseException {
        RDFParser createParser = Rio.createParser(RDFFormat.NQUADS);
        TestRDFHandler testRDFHandler = new TestRDFHandler();
        createParser.setRDFHandler(testRDFHandler);
        createParser.parse(new FileReader(copyResourceToTempFile(str), StandardCharsets.UTF_8), baseIRI.stringValue());
        return testRDFHandler.getStatements();
    }
}
