/*
 * Decompiled with CFR 0.152.
 */
package org.apache.tika.server.standard;

import jakarta.ws.rs.core.Response;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.io.StringReader;
import java.io.StringWriter;
import java.io.Writer;
import java.nio.charset.StandardCharsets;
import java.nio.file.FileVisitResult;
import java.nio.file.FileVisitor;
import java.nio.file.Files;
import java.nio.file.LinkOption;
import java.nio.file.OpenOption;
import java.nio.file.Path;
import java.nio.file.StandardCopyOption;
import java.nio.file.attribute.BasicFileAttributes;
import java.nio.file.attribute.FileAttribute;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.commons.io.FileUtils;
import org.apache.cxf.jaxrs.JAXRSServerFactoryBean;
import org.apache.cxf.jaxrs.client.WebClient;
import org.apache.cxf.jaxrs.lifecycle.SingletonResourceProvider;
import org.apache.tika.exception.TikaConfigException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.pdf.PDFParserConfig;
import org.apache.tika.pipes.FetchEmitTuple;
import org.apache.tika.pipes.HandlerConfig;
import org.apache.tika.pipes.emitter.EmitKey;
import org.apache.tika.pipes.extractor.EmbeddedDocumentBytesConfig;
import org.apache.tika.pipes.fetcher.FetchKey;
import org.apache.tika.pipes.fetcher.FetcherManager;
import org.apache.tika.sax.BasicContentHandlerFactory;
import org.apache.tika.serialization.JsonMetadataList;
import org.apache.tika.serialization.pipes.JsonFetchEmitTuple;
import org.apache.tika.server.core.CXFTestBase;
import org.apache.tika.server.core.FetcherStreamFactory;
import org.apache.tika.server.core.InputStreamFactory;
import org.apache.tika.server.core.TikaServerParseExceptionMapper;
import org.apache.tika.server.core.resource.PipesResource;
import org.apache.tika.server.core.writer.JSONObjWriter;
import org.apache.tika.utils.ProcessUtils;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.io.TempDir;

public class TikaPipesTest
extends CXFTestBase {
    private static final String PIPES_PATH = "/pipes";
    private static final String TEST_RECURSIVE_DOC = "test_recursive_embedded.docx";
    private static final String TEST_TWO_BOXES_PDF = "testPDFTwoTextBoxes.pdf";
    @TempDir
    private static Path TMP_WORKING_DIR;
    private static Path TMP_OUTPUT_DIR;
    private static Path TMP_BYTES_DIR;
    private static Path TIKA_PIPES_LOG4j2_PATH;
    private static Path TIKA_CONFIG_PATH;
    private static String TIKA_CONFIG_XML;
    private static FetcherManager FETCHER_MANAGER;

    @BeforeAll
    public static void setUpBeforeClass() throws Exception {
        Path inputDir = TMP_WORKING_DIR.resolve("input");
        TMP_OUTPUT_DIR = TMP_WORKING_DIR.resolve("output");
        TMP_BYTES_DIR = TMP_WORKING_DIR.resolve("bytes");
        Files.createDirectories(inputDir, new FileAttribute[0]);
        Files.createDirectories(TMP_OUTPUT_DIR, new FileAttribute[0]);
        Files.copy(TikaPipesTest.class.getResourceAsStream("/test-documents/test_recursive_embedded.docx"), inputDir.resolve(TEST_RECURSIVE_DOC), StandardCopyOption.REPLACE_EXISTING);
        Files.copy(TikaPipesTest.class.getResourceAsStream("/test-documents/testPDFTwoTextBoxes.pdf"), inputDir.resolve(TEST_TWO_BOXES_PDF), StandardCopyOption.REPLACE_EXISTING);
        TIKA_CONFIG_PATH = Files.createTempFile(TMP_WORKING_DIR, "tika-pipes-", ".xml", new FileAttribute[0]);
        TIKA_PIPES_LOG4j2_PATH = Files.createTempFile(TMP_WORKING_DIR, "log4j2-", ".xml", new FileAttribute[0]);
        Files.copy(TikaPipesTest.class.getResourceAsStream("/log4j2.xml"), TIKA_PIPES_LOG4j2_PATH, StandardCopyOption.REPLACE_EXISTING);
        TIKA_CONFIG_XML = "<?xml version=\"1.0\" encoding=\"UTF-8\"?><properties><fetchers><fetcher class=\"org.apache.tika.pipes.fetcher.fs.FileSystemFetcher\"><params><name>fsf</name><basePath>" + String.valueOf(inputDir.toAbsolutePath()) + "</basePath></params></fetcher></fetchers><emitters><emitter class=\"org.apache.tika.pipes.emitter.fs.FileSystemEmitter\"><params><name>fse</name><basePath>" + String.valueOf(TMP_OUTPUT_DIR.toAbsolutePath()) + "</basePath></params></emitter><emitter class=\"org.apache.tika.pipes.emitter.fs.FileSystemEmitter\"><params><name>bytes</name><basePath>" + String.valueOf(TMP_BYTES_DIR.toAbsolutePath()) + "</basePath></params></emitter></emitters><pipes><params><tikaConfig>" + ProcessUtils.escapeCommandLine((String)TIKA_CONFIG_PATH.toAbsolutePath().toString()) + "</tikaConfig><numClients>10</numClients><forkedJvmArgs><arg>-Xmx256m</arg><arg>-Dlog4j.configurationFile=file:" + ProcessUtils.escapeCommandLine((String)TIKA_PIPES_LOG4j2_PATH.toAbsolutePath().toString()) + "</arg></forkedJvmArgs></params></pipes></properties>";
        Files.write(TIKA_CONFIG_PATH, TIKA_CONFIG_XML.getBytes(StandardCharsets.UTF_8), new OpenOption[0]);
    }

    @BeforeEach
    public void setUpEachTest() throws Exception {
        FileUtils.deleteDirectory((File)TMP_OUTPUT_DIR.toFile());
        Assertions.assertFalse((boolean)Files.isDirectory(TMP_OUTPUT_DIR, new LinkOption[0]));
    }

    protected void setUpResources(JAXRSServerFactoryBean sf) {
        ArrayList<SingletonResourceProvider> rCoreProviders = new ArrayList<SingletonResourceProvider>();
        try {
            rCoreProviders.add(new SingletonResourceProvider((Object)new PipesResource(TIKA_CONFIG_PATH)));
        }
        catch (IOException | TikaConfigException e) {
            throw new RuntimeException(e);
        }
        sf.setResourceProviders(rCoreProviders);
    }

    protected void setUpProviders(JAXRSServerFactoryBean sf) {
        ArrayList<Object> providers = new ArrayList<Object>();
        providers.add(new TikaServerParseExceptionMapper(true));
        providers.add(new JSONObjWriter());
        sf.setProviders(providers);
    }

    protected InputStream getTikaConfigInputStream() {
        return new ByteArrayInputStream(TIKA_CONFIG_XML.getBytes(StandardCharsets.UTF_8));
    }

    protected InputStreamFactory getInputStreamFactory(InputStream is) {
        return new FetcherStreamFactory(FETCHER_MANAGER);
    }

    @Test
    public void testBasic() throws Exception {
        FetchEmitTuple t = new FetchEmitTuple("myId", new FetchKey("fsf", TEST_RECURSIVE_DOC), new EmitKey("fse", ""));
        StringWriter writer = new StringWriter();
        JsonFetchEmitTuple.toJson((FetchEmitTuple)t, (Writer)writer);
        String getUrl = "http://localhost:9998/pipes";
        Response response = WebClient.create((String)getUrl).accept(new String[]{"application/json"}).post((Object)writer.toString());
        Assertions.assertEquals((int)200, (int)response.getStatus());
        List metadataList = null;
        try (BufferedReader reader = Files.newBufferedReader(TMP_OUTPUT_DIR.resolve("test_recursive_embedded.docx.json"));){
            metadataList = JsonMetadataList.fromJson((Reader)reader);
        }
        Assertions.assertEquals((int)12, (int)metadataList.size());
        TikaPipesTest.assertContains((String)"When in the Course", (String)((Metadata)metadataList.get(6)).get(TikaCoreProperties.TIKA_CONTENT));
    }

    @Test
    public void testConcatenated() throws Exception {
        ParseContext parseContext = new ParseContext();
        HandlerConfig handlerConfig = new HandlerConfig(BasicContentHandlerFactory.HANDLER_TYPE.TEXT, HandlerConfig.PARSE_MODE.CONCATENATE, -1, -1, true);
        parseContext.set(HandlerConfig.class, (Object)handlerConfig);
        FetchEmitTuple t = new FetchEmitTuple("myId", new FetchKey("fsf", TEST_RECURSIVE_DOC), new EmitKey("fse", ""), new Metadata(), parseContext, FetchEmitTuple.ON_PARSE_EXCEPTION.EMIT);
        StringWriter writer = new StringWriter();
        JsonFetchEmitTuple.toJson((FetchEmitTuple)t, (Writer)writer);
        FetchEmitTuple deserialized = JsonFetchEmitTuple.fromJson((Reader)new StringReader(writer.toString()));
        Assertions.assertEquals((Object)t, (Object)deserialized);
        String getUrl = "http://localhost:9998/pipes";
        Response response = WebClient.create((String)getUrl).accept(new String[]{"application/json"}).post((Object)writer.toString());
        Assertions.assertEquals((int)200, (int)response.getStatus());
        List metadataList = null;
        try (BufferedReader reader = Files.newBufferedReader(TMP_OUTPUT_DIR.resolve("test_recursive_embedded.docx.json"));){
            metadataList = JsonMetadataList.fromJson((Reader)reader);
        }
        Assertions.assertEquals((int)1, (int)metadataList.size());
        TikaPipesTest.assertContains((String)"When in the Course", (String)((Metadata)metadataList.get(0)).get(TikaCoreProperties.TIKA_CONTENT));
    }

    @Test
    public void testPDFConfig() throws Exception {
        Metadata metadata = new Metadata();
        ParseContext parseContext = new ParseContext();
        PDFParserConfig pdfParserConfig = new PDFParserConfig();
        pdfParserConfig.setSortByPosition(true);
        parseContext.set(PDFParserConfig.class, (Object)pdfParserConfig);
        FetchEmitTuple t = new FetchEmitTuple("myId", new FetchKey("fsf", TEST_TWO_BOXES_PDF), new EmitKey("fse", ""), metadata, parseContext);
        StringWriter writer = new StringWriter();
        JsonFetchEmitTuple.toJson((FetchEmitTuple)t, (Writer)writer);
        String getUrl = "http://localhost:9998/pipes";
        Response response = WebClient.create((String)getUrl).accept(new String[]{"application/json"}).post((Object)writer.toString());
        Assertions.assertEquals((int)200, (int)response.getStatus());
        List metadataList = null;
        Path outputFile = TMP_OUTPUT_DIR.resolve("testPDFTwoTextBoxes.pdf.json");
        try (BufferedReader reader = Files.newBufferedReader(outputFile);){
            metadataList = JsonMetadataList.fromJson((Reader)reader);
        }
        String content = ((Metadata)metadataList.get(0)).get(TikaCoreProperties.TIKA_CONTENT);
        content = content.replaceAll("\\s+", " ");
        TikaPipesTest.assertContains((String)"Left column line 1 Right column line 1 Left colu mn line 2 Right column line 2", (String)content);
    }

    @Test
    public void testBytes() throws Exception {
        EmbeddedDocumentBytesConfig config = new EmbeddedDocumentBytesConfig(true);
        config.setEmitter("bytes");
        config.setIncludeOriginal(true);
        config.setEmbeddedIdPrefix("-");
        config.setZeroPadName(10);
        config.setSuffixStrategy(EmbeddedDocumentBytesConfig.SUFFIX_STRATEGY.EXISTING);
        ParseContext parseContext = new ParseContext();
        parseContext.set(HandlerConfig.class, (Object)HandlerConfig.DEFAULT_HANDLER_CONFIG);
        parseContext.set(EmbeddedDocumentBytesConfig.class, (Object)config);
        FetchEmitTuple t = new FetchEmitTuple("myId", new FetchKey("fsf", TEST_RECURSIVE_DOC), new EmitKey("fse", TEST_RECURSIVE_DOC), new Metadata(), parseContext, FetchEmitTuple.ON_PARSE_EXCEPTION.EMIT);
        StringWriter writer = new StringWriter();
        JsonFetchEmitTuple.toJson((FetchEmitTuple)t, (Writer)writer);
        FetchEmitTuple deserialized = JsonFetchEmitTuple.fromJson((Reader)new StringReader(writer.toString()));
        Assertions.assertEquals((Object)t, (Object)deserialized);
        String getUrl = "http://localhost:9998/pipes";
        Response response = WebClient.create((String)getUrl).accept(new String[]{"application/json"}).post((Object)writer.toString());
        Assertions.assertEquals((int)200, (int)response.getStatus());
        List metadataList = null;
        try (BufferedReader reader = Files.newBufferedReader(TMP_OUTPUT_DIR.resolve("test_recursive_embedded.docx.json"));){
            metadataList = JsonMetadataList.fromJson((Reader)reader);
        }
        Assertions.assertEquals((int)12, (int)metadataList.size());
        TikaPipesTest.assertContains((String)"When in the Course", (String)((Metadata)metadataList.get(6)).get(TikaCoreProperties.TIKA_CONTENT));
        Map<String, Long> expected = this.loadExpected();
        Map<String, Long> byteFileNames = this.getFileNames(TMP_BYTES_DIR);
        Assertions.assertEquals(expected, byteFileNames);
    }

    private Map<String, Long> loadExpected() {
        HashMap<String, Long> m = new HashMap<String, Long>();
        m.put("test_recursive_embedded.docx-0000000009.txt", 8151L);
        m.put("test_recursive_embedded.docx-0000000007.txt", 8L);
        m.put("test_recursive_embedded.docx-0000000006.txt", 8L);
        m.put("test_recursive_embedded.docx-0000000002.zip", 4827L);
        m.put("test_recursive_embedded.docx-0000000001.emf", 4992L);
        m.put("test_recursive_embedded.docx-0000000008.zip", 4048L);
        m.put("test_recursive_embedded.docx-0000000004.txt", 8L);
        m.put("test_recursive_embedded.docx-0000000000.docx", 27082L);
        m.put("test_recursive_embedded.docx-0000000003.txt", 8L);
        m.put("test_recursive_embedded.docx-0000000011.txt", 7L);
        m.put("test_recursive_embedded.docx-0000000005.zip", 4492L);
        m.put("test_recursive_embedded.docx-0000000010.zip", 163L);
        return m;
    }

    private Map<String, Long> getFileNames(Path p) throws Exception {
        final HashMap<String, Long> ret = new HashMap<String, Long>();
        Files.walkFileTree(TMP_BYTES_DIR, (FileVisitor<? super Path>)new FileVisitor<Path>(){

            @Override
            public FileVisitResult preVisitDirectory(Path dir, BasicFileAttributes attrs) throws IOException {
                return FileVisitResult.CONTINUE;
            }

            @Override
            public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException {
                ret.put(file.getFileName().toString(), Files.size(file));
                return FileVisitResult.CONTINUE;
            }

            @Override
            public FileVisitResult visitFileFailed(Path file, IOException exc) throws IOException {
                return FileVisitResult.CONTINUE;
            }

            @Override
            public FileVisitResult postVisitDirectory(Path dir, IOException exc) throws IOException {
                return FileVisitResult.CONTINUE;
            }
        });
        return ret;
    }
}

