package org.apache.any23.extractor;

import java.io.ByteArrayOutputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Locale;
import org.apache.any23.AbstractAny23TestBase;
import org.apache.any23.configuration.DefaultConfiguration;
import org.apache.any23.configuration.ModifiableConfiguration;
import org.apache.any23.extractor.html.HTMLFixture;
import org.apache.any23.mime.TikaMIMETypeDetector;
import org.apache.any23.mime.purifier.WhiteSpacesPurifier;
import org.apache.any23.vocab.ICAL;
import org.apache.any23.vocab.Review;
import org.apache.any23.vocab.SINDICE;
import org.apache.any23.vocab.VCard;
import org.apache.any23.writer.CompositeTripleHandler;
import org.apache.any23.writer.RDFXMLWriter;
import org.apache.any23.writer.RepositoryWriter;
import org.apache.any23.writer.TripleHandlerException;
import org.eclipse.rdf4j.model.IRI;
import org.eclipse.rdf4j.model.Resource;
import org.eclipse.rdf4j.model.Statement;
import org.eclipse.rdf4j.model.Value;
import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
import org.eclipse.rdf4j.repository.RepositoryConnection;
import org.eclipse.rdf4j.repository.RepositoryException;
import org.eclipse.rdf4j.repository.RepositoryResult;
import org.eclipse.rdf4j.repository.sail.SailRepository;
import org.eclipse.rdf4j.sail.Sail;
import org.eclipse.rdf4j.sail.SailException;
import org.eclipse.rdf4j.sail.memory.MemoryStore;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/apache/any23/extractor/SingleDocumentExtractionTest.class */
public class SingleDocumentExtractionTest extends AbstractAny23TestBase {
    private static final SINDICE vSINDICE = SINDICE.getInstance();
    private static final ICAL vICAL = ICAL.getInstance();
    private static final Review vREVIEW = Review.getInstance();
    private static final VCard vVCARD = VCard.getInstance();
    private static final Logger logger = LoggerFactory.getLogger(SingleDocumentExtractionTest.class);
    private SingleDocumentExtraction singleDocumentExtraction;
    private ExtractorGroup extractorGroup;
    private Sail store;
    private RepositoryConnection conn;
    RepositoryWriter repositoryWriter;
    ByteArrayOutputStream baos;
    RDFXMLWriter rdfxmlWriter;

    @Override // org.apache.any23.AbstractAny23TestBase
    @Before
    public void setUp() throws Exception {
        super.setUp();
        this.extractorGroup = ExtractorRegistryImpl.getInstance().getExtractorGroup();
        this.store = new MemoryStore();
        this.store.init();
        this.conn = new SailRepository(this.store).getConnection();
    }

    @After
    public void tearDown() throws SailException, RepositoryException, TripleHandlerException {
        this.rdfxmlWriter.close();
        this.repositoryWriter.close();
        logger.debug(this.baos.toString(StandardCharsets.UTF_8));
        this.singleDocumentExtraction = null;
        this.extractorGroup = null;
        this.conn.close();
        this.conn = null;
        this.store.shutDown();
        this.store = null;
    }

    @Test
    public void testMicroformatDomains() throws IOException, ExtractionException, RepositoryException {
        this.singleDocumentExtraction = getInstance("/microformats/microformat-domains.html");
        this.singleDocumentExtraction.run();
        logStorageContent();
        assertTripleCount(vSINDICE.getProperty("domain"), "nested.test.com", 1);
    }

    @Test
    public void testNestedMicroformats() throws IOException, ExtractionException, RepositoryException {
        this.singleDocumentExtraction = getInstance("/microformats/nested-microformats-a1.html");
        this.singleDocumentExtraction.run();
        logStorageContent();
        assertTripleCount(vSINDICE.getProperty("domain"), "nested.test.com", 2);
        assertTriple(vSINDICE.getProperty("nesting"), (Value) null);
        assertTriple(vSINDICE.getProperty("nesting_original"), (Value) vICAL.summary);
        assertTriple(vSINDICE.getProperty("nesting_structured"), (Value) null);
    }

    @Test
    public void testNestedVCardAdr() throws IOException, ExtractionException, RepositoryException {
        this.singleDocumentExtraction = getInstance("/microformats/nested-microformats-a3.html");
        this.singleDocumentExtraction.run();
        logStorageContent();
        assertTripleCount(vSINDICE.getProperty("nesting_original"), (Value) null, 0);
        assertTripleCount(vSINDICE.getProperty("nesting_structured"), (Value) null, 0);
    }

    @Test
    public void testNestedMicroformatsInduced() throws IOException, ExtractionException, RepositoryException {
        this.singleDocumentExtraction = getInstance("/microformats/nested-microformats-a2.html");
        this.singleDocumentExtraction.run();
        logStorageContent();
        assertTripleCount(vSINDICE.getProperty("domain"), "nested.test.com", 2);
        assertTriple(vSINDICE.getProperty("nesting"), (Value) null);
        assertTriple(vSINDICE.getProperty("nesting_original"), (Value) vICAL.summary);
        assertTriple(vSINDICE.getProperty("nesting_structured"), (Value) null);
    }

    @Test
    public void testNestedMicroformatsManaged() throws IOException, ExtractionException, RepositoryException {
        this.singleDocumentExtraction = getInstance("/microformats/nested-microformats-managed.html");
        this.singleDocumentExtraction.run();
        logStorageContent();
        assertTripleCount(vSINDICE.getProperty("domain"), "nested.test.com", 3);
        assertTripleCount(vSINDICE.getProperty("nesting"), (Value) null, 1);
        assertTripleCount(vSINDICE.getProperty("nesting_original"), (Value) vREVIEW.hasReview, 1);
        assertTripleCount(vVCARD.url, (Value) null, 1);
        assertTripleCount(vSINDICE.getProperty("nesting_structured"), getTripleObject(null, vREVIEW.hasReview), 1);
        assertTripleCount(vSINDICE.getProperty("nesting_original"), (Value) vREVIEW.hasReview, 1);
    }

    private SingleDocumentExtraction getInstance(String str) throws FileNotFoundException, IOException {
        this.baos = new ByteArrayOutputStream();
        this.rdfxmlWriter = new RDFXMLWriter(this.baos);
        this.repositoryWriter = new RepositoryWriter(this.conn);
        CompositeTripleHandler compositeTripleHandler = new CompositeTripleHandler();
        compositeTripleHandler.addChild(this.rdfxmlWriter);
        compositeTripleHandler.addChild(this.repositoryWriter);
        ModifiableConfiguration copy = DefaultConfiguration.copy();
        copy.setProperty("any23.extraction.metadata.domain.per.entity", "on");
        SingleDocumentExtraction singleDocumentExtraction = new SingleDocumentExtraction(copy, new HTMLFixture(copyResourceToTempFile(str)).getOpener("http://nested.test.com"), this.extractorGroup, compositeTripleHandler);
        singleDocumentExtraction.setMIMETypeDetector(new TikaMIMETypeDetector(new WhiteSpacesPurifier()));
        return singleDocumentExtraction;
    }

    private void logStorageContent() throws RepositoryException {
        RepositoryResult statements = this.conn.getStatements((Resource) null, (IRI) null, (Value) null, false, new Resource[0]);
        while (statements.hasNext()) {
            logger.debug(((Statement) statements.next()).toString());
        }
    }

    private void assertTripleCount(IRI iri, Value value, int i) throws RepositoryException {
        RepositoryResult statements = this.conn.getStatements((Resource) null, iri, value, false, new Resource[0]);
        int i2 = 0;
        while (statements.hasNext()) {
            statements.next();
            i2++;
        }
        Assert.assertEquals(String.format(Locale.ROOT, "Cannot find triple (* %s %s) %d times", iri, value, Integer.valueOf(i)), i, i2);
    }

    private void assertTripleCount(IRI iri, String str, int i) throws RepositoryException {
        assertTripleCount(iri, (Value) SimpleValueFactory.getInstance().createLiteral(str), i);
    }

    private void assertTriple(IRI iri, Value value) throws RepositoryException {
        assertTripleCount(iri, value, 1);
    }

    private void assertTriple(IRI iri, String str) throws RepositoryException {
        assertTriple(iri, (Value) SimpleValueFactory.getInstance().createLiteral(str));
    }

    private Value getTripleObject(Resource resource, IRI iri) throws RepositoryException {
        RepositoryResult statements = this.conn.getStatements(resource, iri, (Value) null, false, new Resource[0]);
        Assert.assertTrue(statements.hasNext());
        Value object = ((Statement) statements.next()).getObject();
        Assert.assertFalse("Expected just one result.", statements.hasNext());
        statements.close();
        return object;
    }
}
