package org.apache.tika.server;

import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import javax.ws.rs.ProcessingException;
import javax.ws.rs.core.Response;
import org.apache.commons.codec.binary.Base64;
import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
import org.apache.cxf.attachment.AttachmentUtil;
import org.apache.cxf.jaxrs.JAXRSServerFactoryBean;
import org.apache.cxf.jaxrs.client.WebClient;
import org.apache.cxf.jaxrs.ext.multipart.Attachment;
import org.apache.cxf.jaxrs.lifecycle.SingletonResourceProvider;
import org.apache.tika.parser.ocr.TesseractOCRConfig;
import org.apache.tika.parser.ocr.TesseractOCRParser;
import org.apache.tika.server.resource.TikaResource;
import org.junit.Assert;
import org.junit.Test;

/* loaded from: input_file:org/apache/tika/server/TikaResourceTest.class */
public class TikaResourceTest extends CXFTestBase {
    public static final String TEST_DOC = "test.doc";
    public static final String TEST_PASSWORD_PROTECTED = "password.xls";
    private static final String TEST_RECURSIVE_DOC = "test_recursive_embedded.docx";
    private static final String TEST_OOM = "mock/fake_oom.xml";
    private static final String STREAM_CLOSED_FAULT = "java.io.IOException: Stream Closed";
    private static final String TIKA_PATH = "/tika";
    private static final int UNPROCESSEABLE = 422;

    @Override // org.apache.tika.server.CXFTestBase
    protected void setUpResources(JAXRSServerFactoryBean jAXRSServerFactoryBean) {
        jAXRSServerFactoryBean.setResourceClasses(new Class[]{TikaResource.class});
        jAXRSServerFactoryBean.setResourceProvider(TikaResource.class, new SingletonResourceProvider(new TikaResource()));
    }

    @Override // org.apache.tika.server.CXFTestBase
    protected void setUpProviders(JAXRSServerFactoryBean jAXRSServerFactoryBean) {
        ArrayList arrayList = new ArrayList();
        arrayList.add(new TikaServerParseExceptionMapper(false));
        jAXRSServerFactoryBean.setProviders(arrayList);
    }

    @Test
    public void testHelloWorld() throws Exception {
        Assert.assertEquals(TikaResource.GREETING, getStringFromInputStream((InputStream) WebClient.create("http://localhost:9998/tika").type("text/plain").accept(new String[]{"text/plain"}).get().getEntity()));
    }

    @Test
    public void testSimpleWord() throws Exception {
        Assert.assertTrue(getStringFromInputStream((InputStream) WebClient.create("http://localhost:9998/tika").type("application/msword").accept(new String[]{"text/plain"}).put(ClassLoader.getSystemResourceAsStream(TEST_DOC)).getEntity()).contains("test"));
    }

    @Test
    public void testWordGzipIn() throws Exception {
        Assert.assertTrue(getStringFromInputStream((InputStream) WebClient.create("http://localhost:9998/tika").type("application/msword").accept(new String[]{"text/plain"}).encoding("gzip").put(gzip(ClassLoader.getSystemResourceAsStream(TEST_DOC))).getEntity()).contains("test"));
    }

    @Test
    public void testLongGzipOut() throws Exception {
        Response put = WebClient.create("http://localhost:9998/tika").accept(new String[]{"text/plain"}).acceptEncoding(new String[]{"gzip"}).put(ClassLoader.getSystemResourceAsStream(TEST_RECURSIVE_DOC));
        Assert.assertTrue(put.getHeaders().containsKey("Content-Encoding"));
        Assert.assertEquals("gzip", put.getHeaderString("Content-Encoding"));
        Assert.assertTrue(getStringFromInputStream(new GzipCompressorInputStream((InputStream) put.getEntity())).contains("Course of human"));
    }

    @Test
    public void testShortGzipOut() throws Exception {
        Response put = WebClient.create("http://localhost:9998/tika").accept(new String[]{"text/plain"}).acceptEncoding(new String[]{"gzip"}).put(ClassLoader.getSystemResourceAsStream(TEST_DOC));
        Assert.assertFalse(put.getHeaders().containsKey("Content-Encoding"));
        Assert.assertTrue(getStringFromInputStream((InputStream) put.getEntity()).contains("test"));
    }

    @Test
    public void testTextMain() throws Exception {
        String stringFromInputStream = getStringFromInputStream((InputStream) WebClient.create("http://localhost:9998/tika/main").accept(new String[]{"text/plain"}).put(ClassLoader.getSystemResourceAsStream("testHTML.html")).getEntity());
        Assert.assertTrue(stringFromInputStream.contains("Title : Test Indexation Html"));
        Assert.assertFalse(stringFromInputStream.contains("Indexation du fichier"));
    }

    @Test
    public void testTextMainMultipart() throws Exception {
        String stringFromInputStream = getStringFromInputStream((InputStream) WebClient.create("http://localhost:9998/tika/form/main").type("multipart/form-data").accept(new String[]{"text/plain"}).post(new Attachment("myhtml", "text/html", ClassLoader.getSystemResourceAsStream("testHTML.html"))).getEntity());
        Assert.assertTrue(stringFromInputStream.contains("Title : Test Indexation Html"));
        Assert.assertFalse(stringFromInputStream.contains("Indexation du fichier"));
    }

    @Test
    public void testApplicationWadl() throws Exception {
        Assert.assertTrue(getStringFromInputStream((InputStream) WebClient.create("http://localhost:9998/tika?_wadl").accept(new String[]{"text/plain"}).get().getEntity()).startsWith("<application"));
    }

    @Test
    public void testPasswordXLS() throws Exception {
        Assert.assertEquals(422L, WebClient.create("http://localhost:9998/tika").type("application/vnd.ms-excel").accept(new String[]{"text/plain"}).put(ClassLoader.getSystemResourceAsStream("password.xls")).getStatus());
    }

    @Test
    public void testSimpleWordHTML() throws Exception {
        String stringFromInputStream = getStringFromInputStream((InputStream) WebClient.create("http://localhost:9998/tika").type("application/msword").accept(new String[]{"text/html"}).put(ClassLoader.getSystemResourceAsStream(TEST_DOC)).getEntity());
        Assert.assertTrue(stringFromInputStream.contains("test"));
        assertContains("<meta name=\"X-TIKA:digest:MD5\" content=\"f8be45c34e8919eedba48cc8d207fbf0\"/>", stringFromInputStream);
        assertContains("<meta name=\"X-TIKA:digest:SHA1\" content=\"N4EBCE7EGTIGZWETEJ6WD3W4KN32TLPG\"/>", stringFromInputStream);
    }

    @Test
    public void testPasswordXLSHTML() throws Exception {
        Assert.assertEquals(422L, WebClient.create("http://localhost:9998/tika").type("application/vnd.ms-excel").accept(new String[]{"text/html"}).put(ClassLoader.getSystemResourceAsStream("password.xls")).getStatus());
    }

    @Test
    public void testSimpleWordXML() throws Exception {
        Assert.assertTrue(getStringFromInputStream((InputStream) WebClient.create("http://localhost:9998/tika").type("application/msword").accept(new String[]{"text/xml"}).put(ClassLoader.getSystemResourceAsStream(TEST_DOC)).getEntity()).contains("test"));
    }

    @Test
    public void testPasswordXLSXML() throws Exception {
        Assert.assertEquals(422L, WebClient.create("http://localhost:9998/tika").type("application/vnd.ms-excel").accept(new String[]{"text/xml"}).put(ClassLoader.getSystemResourceAsStream("password.xls")).getStatus());
    }

    @Test
    public void testSimpleWordMultipartXML() throws Exception {
        ClassLoader.getSystemResourceAsStream(TEST_DOC);
        String stringFromInputStream = getStringFromInputStream((InputStream) WebClient.create("http://localhost:9998/tika/form").type("multipart/form-data").accept(new String[]{"text/xml"}).post(new Attachment("myworddoc", "application/msword", ClassLoader.getSystemResourceAsStream(TEST_DOC))).getEntity());
        Assert.assertTrue(stringFromInputStream.contains("test"));
        assertContains("<meta name=\"X-TIKA:digest:MD5\" content=\"f8be45c34e8919eedba48cc8d207fbf0\"/>", stringFromInputStream);
    }

    @Test
    public void testJAXBAndActivationDependency() {
        AttachmentUtil.getCommandMap();
    }

    @Test
    public void testEmbedded() throws Exception {
        Assert.assertTrue(getStringFromInputStream((InputStream) WebClient.create("http://localhost:9998/tika").accept(new String[]{"text/plain"}).put(ClassLoader.getSystemResourceAsStream(TEST_RECURSIVE_DOC)).getEntity()).contains("Course of human events"));
        String stringFromInputStream = getStringFromInputStream((InputStream) WebClient.create("http://localhost:9998/tika").accept(new String[]{"text/xml"}).put(ClassLoader.getSystemResourceAsStream(TEST_RECURSIVE_DOC)).getEntity());
        Assert.assertTrue(stringFromInputStream.contains("Course of human events"));
        assertContains("<meta name=\"X-TIKA:digest:MD5\" content=\"59f626e09a8c16ab6dbc2800c685f772\"/>", stringFromInputStream);
    }

    @Test
    public void testWMFInRTF() throws Exception {
        Assert.assertTrue(getStringFromInputStream((InputStream) WebClient.create("http://localhost:9998/tika").type("application/rtf").accept(new String[]{"text/plain"}).put(ClassLoader.getSystemResourceAsStream("testRTF_npeFromWMFInTikaServer.rtf")).getEntity()).contains("Example text"));
    }

    @Test
    public void testOCRLanguageConfig() throws Exception {
        if (new TesseractOCRParser().hasTesseract(new TesseractOCRConfig())) {
            assertContains("Happy New Year 2003!", getStringFromInputStream((InputStream) WebClient.create("http://localhost:9998/tika").accept(new String[]{"text/plain"}).header("X-Tika-PDFOcrStrategy", new Object[]{"ocr_only"}).header("X-Tika-OCRLanguage", new Object[]{"eng+fra"}).header("X-Tika-OCRMinFileSizeToOcr", new Object[]{"10"}).header("X-Tika-OCRMaxFileSizeToOcr", new Object[]{"1000000000"}).put(ClassLoader.getSystemResourceAsStream("testOCR.pdf")).getEntity()));
        }
    }

    @Test
    public void testPDFOCRConfig() throws Exception {
        if (new TesseractOCRParser().hasTesseract(new TesseractOCRConfig())) {
            Assert.assertTrue(getStringFromInputStream((InputStream) WebClient.create("http://localhost:9998/tika").type("application/pdf").accept(new String[]{"text/plain"}).header("X-Tika-PDFOcrStrategy", new Object[]{"no_ocr"}).put(ClassLoader.getSystemResourceAsStream("testOCR.pdf")).getEntity()).trim().equals(""));
            assertContains("Happy New Year 2003!", getStringFromInputStream((InputStream) WebClient.create("http://localhost:9998/tika").type("application/pdf").accept(new String[]{"text/plain"}).header("X-Tika-PDFOcrStrategy", new Object[]{"ocr_only"}).put(ClassLoader.getSystemResourceAsStream("testOCR.pdf")).getEntity()));
            Assert.assertEquals(400L, WebClient.create("http://localhost:9998/tika").type("application/pdf").accept(new String[]{"text/plain"}).header("X-Tika-PDFOcrStrategy", new Object[]{"non-sense-value"}).put(ClassLoader.getSystemResourceAsStream("testOCR.pdf")).getStatus());
        }
    }

    @Test
    public void testPDFConfig() throws Exception {
        Assert.assertEquals("Left column line 1 Right column line 1 Left colu mn line 2 Right column line 2", getStringFromInputStream((InputStream) WebClient.create("http://localhost:9998/tika").type("application/pdf").accept(new String[]{"text/plain"}).put(ClassLoader.getSystemResourceAsStream("testPDFTwoTextBoxes.pdf")).getEntity()).replaceAll("[\r\n ]+", " ").trim());
        Assert.assertEquals("Left column line 1 Left column line 2 Right column line 1 Right column line 2", getStringFromInputStream((InputStream) WebClient.create("http://localhost:9998/tika").type("application/pdf").accept(new String[]{"text/plain"}).header("X-Tika-PDFsortByPosition", new Object[]{"false"}).put(ClassLoader.getSystemResourceAsStream("testPDFTwoTextBoxes.pdf")).getEntity()).replaceAll("[\r\n ]+", " ").trim());
        Assert.assertEquals("Left column line 1 Right column line 1 Left colu mn line 2 Right column line 2", getStringFromInputStream((InputStream) WebClient.create("http://localhost:9998/tika").type("application/pdf").accept(new String[]{"text/plain"}).put(ClassLoader.getSystemResourceAsStream("testPDFTwoTextBoxes.pdf")).getEntity()).replaceAll("[\r\n ]+", " ").trim());
    }

    @Test
    public void testExtractTextAcceptPlainText() throws Exception {
        String stringFromInputStream = getStringFromInputStream((InputStream) WebClient.create("http://localhost:9998/tika/form").type("multipart/form-data").accept(new String[]{"text/plain"}).post(new Attachment("my-docx-file", "application/vnd.openxmlformats-officedocument.wordprocessingml.document", ClassLoader.getSystemResourceAsStream("2pic.docx"))).getEntity());
        Assert.assertTrue(stringFromInputStream.contains("P1040893.JPG"));
        assertNotFound(STREAM_CLOSED_FAULT, stringFromInputStream);
    }

    @Test
    public void testDataIntegrityCheck() throws Exception {
        try {
            Assert.assertEquals(400L, WebClient.create("http://localhost:9998/tika").type("application/pdf").accept(new String[]{"text/plain"}).header("X-Tika-OCRtesseractPath", new Object[]{"C://tmp//hello.bat��"}).put(ClassLoader.getSystemResourceAsStream("testOCR.pdf")).getStatus());
        } catch (ProcessingException e) {
        }
        Assert.assertEquals(200L, WebClient.create("http://localhost:9998/tika").type("application/pdf").accept(new String[]{"text/plain"}).header("X-Tika-OCRtesseractPath", new Object[]{"bogus path"}).put(ClassLoader.getSystemResourceAsStream("testOCR.pdf")).getStatus());
    }

    @Test
    public void testTrustedMethodPrevention() {
        Assert.assertEquals(400L, WebClient.create("http://localhost:9998/tika").type("application/pdf").accept(new String[]{"text/plain"}).header("X-Tika-OCRtrustedPageSeparator", new Object[]{" "}).put(ClassLoader.getSystemResourceAsStream("testOCR.pdf")).getStatus());
    }

    @Test
    public void testFloatInHeader() {
        Assert.assertEquals(200L, WebClient.create("http://localhost:9998/tika").type("application/pdf").accept(new String[]{"text/plain"}).header("X-Tika-PDFaverageCharTolerance", new Object[]{"2.0"}).put(ClassLoader.getSystemResourceAsStream("testOCR.pdf")).getStatus());
    }

    @Test
    public void testOOMInLegacyMode() throws Exception {
        try {
            WebClient.create("http://localhost:9998/tika").accept(new String[]{"text/plain"}).put(ClassLoader.getSystemResourceAsStream(TEST_OOM));
        } catch (Exception e) {
        }
        assertContains("plundered our seas", getStringFromInputStream((InputStream) WebClient.create("http://localhost:9998/tika").accept(new String[]{"text/plain"}).put(ClassLoader.getSystemResourceAsStream(TEST_RECURSIVE_DOC)).getEntity()));
    }

    @Test
    public void testUnicodePasswordProtectedSpaces() throws Exception {
        assertContains("Just some text.", getStringFromInputStream((InputStream) WebClient.create("http://localhost:9998/tika").accept(new String[]{"text/plain"}).header("Password_Base64_UTF-8", new Object[]{new Base64().encodeAsString("    ".getBytes(StandardCharsets.UTF_8))}).put(ClassLoader.getSystemResourceAsStream("testPassword4Spaces.pdf")).getEntity()));
    }

    @Test
    public void testUnicodePasswordProtectedUnicode() throws Exception {
        assertContains("Just some text.", getStringFromInputStream((InputStream) WebClient.create("http://localhost:9998/tika").accept(new String[]{"text/plain"}).header("Password_Base64_UTF-8", new Object[]{new Base64().encodeAsString("  ! < > \" \\ € œ ¤ 1⁄4 1⁄2 �� �� ��  ".getBytes(StandardCharsets.UTF_8))}).put(ClassLoader.getSystemResourceAsStream("testUnicodePassword.pdf")).getEntity()));
    }
}
