package org.apache.tika.parser.warc;

import java.util.HashSet;
import java.util.List;
import org.apache.tika.TikaTest;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.sax.BasicContentHandlerFactory;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;

/* loaded from: input_file:org/apache/tika/parser/warc/WARCParserTest.class */
public class WARCParserTest extends TikaTest {
    @Test
    public void testBasic() throws Exception {
        List recursiveMetadata = getRecursiveMetadata("cc.warc.gz");
        Assertions.assertEquals(2, recursiveMetadata.size());
        Assertions.assertEquals("application/warc+gz", ((Metadata) recursiveMetadata.get(0)).get("Content-Type"));
        assertContains("text/html", ((Metadata) recursiveMetadata.get(1)).get("Content-Type"));
        assertContains("Common Crawl on Twitter", ((Metadata) recursiveMetadata.get(1)).get(TikaCoreProperties.TIKA_CONTENT));
        Assertions.assertEquals("<urn:uuid:c3f02271-44d2-4159-9cdb-3e3efeb16ba0>", ((Metadata) recursiveMetadata.get(1)).get("warc:WARC-Warcinfo-ID"));
        Assertions.assertEquals("http://commoncrawl.org/", ((Metadata) recursiveMetadata.get(1)).get("warc:WARC-Target-URI"));
    }

    @Test
    public void testMultipleRecords() throws Exception {
        List recursiveMetadata = getRecursiveMetadata("testWARC_multiple.warc", BasicContentHandlerFactory.HANDLER_TYPE.TEXT);
        List recursiveMetadata2 = getRecursiveMetadata("testWARC_multiple.warc.gz", BasicContentHandlerFactory.HANDLER_TYPE.TEXT);
        HashSet hashSet = new HashSet();
        hashSet.add("X-TIKA:parse_time_millis");
        hashSet.add("Content-Type");
        assertMetadataListEquals(recursiveMetadata, recursiveMetadata2, hashSet);
        Assertions.assertEquals("application/warc", ((Metadata) recursiveMetadata.get(0)).get("Content-Type"));
        Assertions.assertEquals("application/warc+gz", ((Metadata) recursiveMetadata2.get(0)).get("Content-Type"));
    }
}
