/*
 * Decompiled with CFR 0.152.
 */
package org.apache.tika.parser.csv;

import java.io.ByteArrayInputStream;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.util.HashMap;
import org.apache.commons.io.ByteOrderMark;
import org.apache.tika.TikaTest;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.csv.TextAndCSVParser;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;

public class TextAndCSVParserTest
extends TikaTest {
    private static byte[] CSV_UTF8 = "the,quick,brown\tfox\njumped \tover,the\tlazy,\tdog\nand then,ran,down\tthe\tstreet".getBytes(StandardCharsets.UTF_8);
    private static byte[] CSV_UTF_16LE = "the,quick,brown\tfox\njumped \tover,the\tlazy,\tdog\nand then,ran,down\tthe\tstreet".getBytes(StandardCharsets.UTF_16LE);
    private static byte[] TSV_UTF8 = "the\tquick\tbrown,fox\njumped ,over\tthe,lazy\t,dog\nand then\tran\tdown,the,street".getBytes(StandardCharsets.UTF_8);
    private static byte[] TSV_UTF_16LE = "the\tquick\tbrown,fox\njumped ,over\tthe,lazy\t,dog\nand then\tran\tdown,the,street".getBytes(StandardCharsets.UTF_16LE);
    private static String EXPECTED_TSV = "<table><tr> <td>the</td> <td>quick</td> <td>brown,fox</td></tr>\n<tr> <td>jumped ,over</td> <td>the,lazy</td> <td>,dog</td></tr>\n<tr> <td>and then</td> <td>ran</td> <td>down,the,street</td></tr>\n</table>".replaceAll("[\r\n\t ]+", " ");
    private static String EXPECTED_CSV = EXPECTED_TSV.replaceAll(",+", " ");
    private static Parser PARSER;

    @BeforeAll
    public static void setUp() throws Exception {
        try (InputStream is = Thread.currentThread().getContextClassLoader().getResourceAsStream("org/apache/tika/parser/csv/tika-config.xml");){
            PARSER = new AutoDetectParser(new TikaConfig(is));
        }
    }

    private static void assertMediaTypeEquals(String csv, String charset, String delimiter, String mediaTypeString) {
        if (mediaTypeString == null) {
            Assertions.fail((String)"media type string must not be null");
        }
        MediaType expected = TextAndCSVParserTest.mediaType(csv, charset, delimiter);
        MediaType observed = MediaType.parse((String)mediaTypeString);
        Assertions.assertEquals((Object)expected, (Object)observed);
    }

    private static MediaType mediaType(String csv, String charset, String delimiter) {
        HashMap<String, String> attrs = new HashMap<String, String>();
        attrs.put("charset", charset);
        attrs.put("delimiter", delimiter);
        return new MediaType(MediaType.text((String)csv), attrs);
    }

    private static byte[] concat(byte[] bytesA, byte[] bytesB) {
        byte[] ret = new byte[bytesA.length + bytesB.length];
        System.arraycopy(bytesA, 0, ret, 0, bytesA.length);
        System.arraycopy(bytesB, 0, ret, bytesA.length, bytesB.length);
        return ret;
    }

    @Test
    public void testCSV_UTF8() throws Exception {
        Metadata metadata = new Metadata();
        metadata.set("resourceName", "test.csv");
        TikaTest.XMLResult xmlResult = this.getXML(new ByteArrayInputStream(CSV_UTF8), PARSER, metadata);
        Assertions.assertEquals((Object)"comma", (Object)xmlResult.metadata.get(TextAndCSVParser.DELIMITER_PROPERTY));
        TextAndCSVParserTest.assertMediaTypeEquals("csv", "ISO-8859-1", "comma", xmlResult.metadata.get("Content-Type"));
        this.assertContainsIgnoreWhiteSpaceDiffs(EXPECTED_CSV, xmlResult.xml);
    }

    @Test
    public void testCSV_UTF8_TypeOverride() throws Exception {
        Metadata metadata = new Metadata();
        metadata.set(TikaCoreProperties.CONTENT_TYPE_USER_OVERRIDE, "text/csv; charset=UTF-8");
        TikaTest.XMLResult xmlResult = this.getXML(new ByteArrayInputStream(CSV_UTF8), PARSER, metadata);
        Assertions.assertEquals((Object)"comma", (Object)xmlResult.metadata.get(TextAndCSVParser.DELIMITER_PROPERTY));
        TextAndCSVParserTest.assertMediaTypeEquals("csv", "UTF-8", "comma", xmlResult.metadata.get("Content-Type"));
        this.assertContainsIgnoreWhiteSpaceDiffs(EXPECTED_CSV, xmlResult.xml);
    }

    @Test
    public void testCSV_UTF8_Type() throws Exception {
        Metadata metadata = new Metadata();
        metadata.set("Content-Type", "text/csv");
        TikaTest.XMLResult xmlResult = this.getXML(new ByteArrayInputStream(CSV_UTF8), PARSER, metadata);
        Assertions.assertEquals((Object)"comma", (Object)xmlResult.metadata.get(TextAndCSVParser.DELIMITER_PROPERTY));
        TextAndCSVParserTest.assertMediaTypeEquals("csv", "ISO-8859-1", "comma", xmlResult.metadata.get("Content-Type"));
        this.assertContainsIgnoreWhiteSpaceDiffs(EXPECTED_CSV, xmlResult.xml);
    }

    @Test
    public void testCSV_UTF16LE() throws Exception {
        Metadata metadata = new Metadata();
        metadata.set("resourceName", "test.csv");
        TikaTest.XMLResult xmlResult = this.getXML(new ByteArrayInputStream(CSV_UTF_16LE), PARSER, metadata);
        Assertions.assertEquals((Object)"comma", (Object)xmlResult.metadata.get(TextAndCSVParser.DELIMITER_PROPERTY));
        TextAndCSVParserTest.assertMediaTypeEquals("csv", "UTF-16LE", "comma", xmlResult.metadata.get("Content-Type"));
        this.assertContainsIgnoreWhiteSpaceDiffs(EXPECTED_CSV, xmlResult.xml);
    }

    @Test
    public void testCSV_UTF16LE_BOM() throws Exception {
        Metadata metadata = new Metadata();
        metadata.set("resourceName", "test.csv");
        TikaTest.XMLResult xmlResult = this.getXML(new ByteArrayInputStream(TextAndCSVParserTest.concat(ByteOrderMark.UTF_16LE.getBytes(), CSV_UTF_16LE)), PARSER, metadata);
        Assertions.assertEquals((Object)"comma", (Object)xmlResult.metadata.get(TextAndCSVParser.DELIMITER_PROPERTY));
        TextAndCSVParserTest.assertMediaTypeEquals("csv", "UTF-16LE", "comma", xmlResult.metadata.get("Content-Type"));
        this.assertContainsIgnoreWhiteSpaceDiffs(EXPECTED_CSV, xmlResult.xml);
    }

    @Test
    public void testTSV_UTF8() throws Exception {
        Metadata metadata = new Metadata();
        metadata.set("resourceName", "test.csv");
        TikaTest.XMLResult xmlResult = this.getXML(new ByteArrayInputStream(TSV_UTF8), PARSER, metadata);
        Assertions.assertEquals((Object)"tab", (Object)xmlResult.metadata.get(TextAndCSVParser.DELIMITER_PROPERTY));
        TextAndCSVParserTest.assertMediaTypeEquals("tsv", "ISO-8859-1", "tab", xmlResult.metadata.get("Content-Type"));
        this.assertContainsIgnoreWhiteSpaceDiffs(EXPECTED_TSV, xmlResult.xml);
    }

    @Test
    public void testTSV_UTF16LE() throws Exception {
        Metadata metadata = new Metadata();
        metadata.set("resourceName", "test.csv");
        TikaTest.XMLResult xmlResult = this.getXML(new ByteArrayInputStream(TSV_UTF_16LE), PARSER, metadata);
        Assertions.assertEquals((Object)"tab", (Object)xmlResult.metadata.get(TextAndCSVParser.DELIMITER_PROPERTY));
        TextAndCSVParserTest.assertMediaTypeEquals("tsv", "UTF-16LE", "tab", xmlResult.metadata.get("Content-Type"));
        this.assertContainsIgnoreWhiteSpaceDiffs(EXPECTED_TSV, xmlResult.xml);
    }

    @Test
    public void testBadCsv() throws Exception {
        byte[] csv = "the,quick\nbrown,\"la\"zy\"\nbrown,\"dog\n".getBytes(StandardCharsets.UTF_8);
        Metadata metadata = new Metadata();
        metadata.set("resourceName", "test.csv");
        TikaTest.XMLResult xmlResult = this.getXML(new ByteArrayInputStream(csv), PARSER, metadata);
        Assertions.assertNull((Object)xmlResult.metadata.get(TextAndCSVParser.DELIMITER_PROPERTY));
        Assertions.assertEquals((Object)"text/plain; charset=ISO-8859-1", (Object)xmlResult.metadata.get("Content-Type"));
        TextAndCSVParserTest.assertContains((String)"the,quick", (String)xmlResult.xml);
    }

    @Test
    public void testNonCSV() throws Exception {
        byte[] bytes = "testcsv\ntestcsv testcsv;;; testcsv".getBytes(StandardCharsets.UTF_8);
        Metadata metadata = new Metadata();
        metadata.set("resourceName", "test.csv");
        TikaTest.XMLResult xmlResult = this.getXML(new ByteArrayInputStream(bytes), PARSER, metadata);
        TextAndCSVParserTest.assertContains((String)"text/plain", (String)xmlResult.metadata.get("Content-Type"));
        metadata.set("resourceName", "test.txt");
        xmlResult = this.getXML(new ByteArrayInputStream(bytes), PARSER, metadata);
        TextAndCSVParserTest.assertContains((String)"text/plain", (String)xmlResult.metadata.get("Content-Type"));
    }

    @Test
    public void testLong() throws Exception {
        StringBuilder sb = new StringBuilder();
        for (int rows = 0; rows < 1000; ++rows) {
            for (int cols = 0; cols < 10; ++cols) {
                sb.append("2").append(",");
            }
            sb.append("\n");
        }
        Metadata metadata = new Metadata();
        TikaTest.XMLResult xmlResult = this.getXML(new ByteArrayInputStream(sb.toString().getBytes(StandardCharsets.UTF_8)), PARSER, metadata);
        TextAndCSVParserTest.assertMediaTypeEquals("csv", "ISO-8859-1", "comma", xmlResult.metadata.get("Content-Type"));
    }

    @Test
    public void testSubclassingMimeTypesRemain() throws Exception {
        TikaTest.XMLResult r = this.getXML("testVCalendar.vcs");
        Assertions.assertEquals((Object)"text/x-vcalendar; charset=ISO-8859-1", (Object)r.metadata.get("Content-Type"));
    }

    private void assertContainsIgnoreWhiteSpaceDiffs(String expected, String xml) {
        TextAndCSVParserTest.assertContains((String)expected, (String)xml.replaceAll("[\r\n\t ]", " "));
    }
}

