/*
 * Decompiled with CFR 0.152.
 */
package org.apache.tika.parser.txt;

import java.io.ByteArrayInputStream;
import java.io.InputStream;
import java.io.StringWriter;
import java.io.Writer;
import java.nio.charset.StandardCharsets;
import org.apache.tika.TikaTest;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.txt.TXTParser;
import org.apache.tika.sax.BodyContentHandler;
import org.apache.tika.sax.WriteOutContentHandler;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
import org.xml.sax.ContentHandler;
import org.xml.sax.helpers.DefaultHandler;

public class TXTParserTest
extends TikaTest {
    private Parser parser = new TXTParser();

    @Test
    public void testEnglishText() throws Exception {
        String text = "Hello, World! This is simple UTF-8 text content written in English to test autodetection of both the character encoding and the language of the input stream.";
        Metadata metadata = new Metadata();
        StringWriter writer = new StringWriter();
        this.parser.parse((InputStream)new ByteArrayInputStream(text.getBytes(StandardCharsets.ISO_8859_1)), (ContentHandler)new WriteOutContentHandler((Writer)writer), metadata, new ParseContext());
        String content = writer.toString();
        Assertions.assertEquals((Object)"text/plain; charset=ISO-8859-1", (Object)metadata.get("Content-Type"));
        Assertions.assertNull((Object)metadata.get("Content-Language"));
        Assertions.assertNull((Object)metadata.get(TikaCoreProperties.LANGUAGE));
        TikaTest.assertContains((String)"Hello", (String)content);
        TikaTest.assertContains((String)"World", (String)content);
        TikaTest.assertContains((String)"autodetection", (String)content);
        TikaTest.assertContains((String)"stream", (String)content);
    }

    @Test
    public void testUTF8Text() throws Exception {
        String text = "I\u00f1t\u00ebrn\u00e2ti\u00f4n\u00e0liz\u00e6ti\u00f8n";
        BodyContentHandler handler = new BodyContentHandler();
        Metadata metadata = new Metadata();
        this.parser.parse((InputStream)new ByteArrayInputStream(text.getBytes(StandardCharsets.UTF_8)), (ContentHandler)handler, metadata, new ParseContext());
        Assertions.assertEquals((Object)"text/plain; charset=UTF-8", (Object)metadata.get("Content-Type"));
        Assertions.assertEquals((Object)"UTF-8", (Object)metadata.get("Content-Encoding"));
        TikaTest.assertContains((String)text, (String)handler.toString());
    }

    @Test
    public void testEmptyText() throws Exception {
        BodyContentHandler handler = new BodyContentHandler();
        Metadata metadata = new Metadata();
        this.parser.parse((InputStream)new ByteArrayInputStream(new byte[0]), (ContentHandler)handler, metadata, new ParseContext());
        Assertions.assertEquals((Object)"text/plain; charset=UTF-8", (Object)metadata.get("Content-Type"));
        Assertions.assertEquals((Object)"\n", (Object)handler.toString());
    }

    @Test
    public void testLatinDetectionHeuristics() throws Exception {
        String windows = "test\r\n";
        String unix = "test\n";
        String euro = "test \u20ac\n";
        Metadata metadata = new Metadata();
        this.parser.parse((InputStream)new ByteArrayInputStream(windows.getBytes("ISO-8859-15")), (ContentHandler)new DefaultHandler(), metadata, new ParseContext());
        Assertions.assertEquals((Object)"text/plain; charset=windows-1252", (Object)metadata.get("Content-Type"));
        metadata = new Metadata();
        this.parser.parse((InputStream)new ByteArrayInputStream(unix.getBytes("ISO-8859-15")), (ContentHandler)new DefaultHandler(), metadata, new ParseContext());
        Assertions.assertEquals((Object)"text/plain; charset=ISO-8859-1", (Object)metadata.get("Content-Type"));
        metadata = new Metadata();
        this.parser.parse((InputStream)new ByteArrayInputStream(euro.getBytes("ISO-8859-15")), (ContentHandler)new DefaultHandler(), metadata, new ParseContext());
        Assertions.assertEquals((Object)"text/plain; charset=ISO-8859-15", (Object)metadata.get("Content-Type"));
    }

    @Test
    public void testDropByteOrderMark() throws Exception {
        this.assertExtractText("UTF-8 BOM", "test", new byte[]{-17, -69, -65, 116, 101, 115, 116});
        this.assertExtractText("UTF-16 BE BOM", "test", new byte[]{-2, -1, 0, 116, 0, 101, 0, 115, 0, 116});
        this.assertExtractText("UTF-16 LE BOM", "test", new byte[]{-1, -2, 116, 0, 101, 0, 115, 0, 116, 0});
    }

    @Test
    public void testUseIncomingCharsetAsHint() throws Exception {
        String test2 = "the name is \u00e1ndre";
        Metadata metadata = new Metadata();
        this.parser.parse((InputStream)new ByteArrayInputStream("the name is \u00e1ndre".getBytes(StandardCharsets.ISO_8859_1)), (ContentHandler)new BodyContentHandler(), metadata, new ParseContext());
        Assertions.assertEquals((Object)"text/plain; charset=ISO-8859-1", (Object)metadata.get("Content-Type"));
        Assertions.assertEquals((Object)"ISO-8859-1", (Object)metadata.get("Content-Encoding"));
        metadata.set("Content-Type", "text/plain; charset=ISO-8859-15");
        this.parser.parse((InputStream)new ByteArrayInputStream("the name is \u00e1ndre".getBytes(StandardCharsets.ISO_8859_1)), (ContentHandler)new BodyContentHandler(), metadata, new ParseContext());
        Assertions.assertEquals((Object)"text/plain; charset=ISO-8859-15", (Object)metadata.get("Content-Type"));
        Assertions.assertEquals((Object)"ISO-8859-15", (Object)metadata.get("Content-Encoding"));
    }

    @Test
    public void testUsingCharsetInContentTypeHeader() throws Exception {
        String test2 = "the name is \u00e1ndre";
        Metadata metadata = new Metadata();
        this.parser.parse((InputStream)new ByteArrayInputStream("the name is \u00e1ndre".getBytes(StandardCharsets.ISO_8859_1)), (ContentHandler)new BodyContentHandler(), metadata, new ParseContext());
        Assertions.assertEquals((Object)"text/plain; charset=ISO-8859-1", (Object)metadata.get("Content-Type"));
        Assertions.assertEquals((Object)"ISO-8859-1", (Object)metadata.get("Content-Encoding"));
        metadata = new Metadata();
        metadata.set("Content-Type", "text/html; charset=ISO-8859-15");
        this.parser.parse((InputStream)new ByteArrayInputStream("the name is \u00e1ndre".getBytes(StandardCharsets.ISO_8859_1)), (ContentHandler)new BodyContentHandler(), metadata, new ParseContext());
        Assertions.assertEquals((Object)"text/html; charset=ISO-8859-15", (Object)metadata.get("Content-Type"));
        Assertions.assertEquals((Object)"ISO-8859-15", (Object)metadata.get("Content-Encoding"));
    }

    private void assertExtractText(String msg, String expected, byte[] input) throws Exception {
        BodyContentHandler handler = new BodyContentHandler(){

            public void ignorableWhitespace(char[] ch, int off, int len) {
            }
        };
        Metadata metadata = new Metadata();
        this.parser.parse((InputStream)new ByteArrayInputStream(input), (ContentHandler)handler, metadata, new ParseContext());
        Assertions.assertEquals((Object)expected, (Object)handler.toString(), (String)msg);
    }

    @Test
    public void testRetainIncomingLanguage() throws Exception {
        String test = "Simple Content";
        Metadata metadata = new Metadata();
        metadata.set(TikaCoreProperties.LANGUAGE, "en");
        this.parser.parse((InputStream)new ByteArrayInputStream("Simple Content".getBytes(StandardCharsets.UTF_8)), (ContentHandler)new BodyContentHandler(), metadata, new ParseContext());
        Assertions.assertEquals((Object)"en", (Object)metadata.get(TikaCoreProperties.LANGUAGE));
    }

    @Test
    public void testCP866() throws Exception {
        Metadata metadata = new Metadata();
        StringWriter writer = new StringWriter();
        this.parser.parse(this.getResourceAsStream("/test-documents/russian.cp866.txt"), (ContentHandler)new WriteOutContentHandler((Writer)writer), metadata, new ParseContext());
        Assertions.assertEquals((Object)"text/plain; charset=IBM866", (Object)metadata.get("Content-Type"));
    }

    @Test
    public void testEBCDIC_CP500() throws Exception {
        Metadata metadata = new Metadata();
        StringWriter writer = new StringWriter();
        this.parser.parse(this.getResourceAsStream("/test-documents/english.cp500.txt"), (ContentHandler)new WriteOutContentHandler((Writer)writer), metadata, new ParseContext());
        Assertions.assertEquals((Object)"text/plain; charset=IBM500", (Object)metadata.get("Content-Type"));
        metadata = new Metadata();
        writer = new StringWriter();
        this.parser.parse((InputStream)new ByteArrayInputStream("<html><body>hello world</body></html>".getBytes(StandardCharsets.ISO_8859_1)), (ContentHandler)new WriteOutContentHandler((Writer)writer), metadata, new ParseContext());
        Assertions.assertEquals((Object)"text/plain; charset=ISO-8859-1", (Object)metadata.get("Content-Type"));
    }

    @Test
    public void testCharsetDetectionWithShortSnipet() throws Exception {
        String text = "Hello, World!";
        Metadata metadata = new Metadata();
        this.parser.parse((InputStream)new ByteArrayInputStream("Hello, World!".getBytes(StandardCharsets.UTF_8)), (ContentHandler)new BodyContentHandler(), metadata, new ParseContext());
        Assertions.assertEquals((Object)"text/plain; charset=ISO-8859-1", (Object)metadata.get("Content-Type"));
        metadata.set("Content-Type", "application/binary; charset=UTF-8");
        this.parser.parse((InputStream)new ByteArrayInputStream("Hello, World!".getBytes(StandardCharsets.UTF_8)), (ContentHandler)new BodyContentHandler(), metadata, new ParseContext());
        Assertions.assertEquals((Object)"application/binary; charset=UTF-8", (Object)metadata.get("Content-Type"));
    }

    @Test
    public void testSubclassingMimeTypesRemain() throws Exception {
        TikaTest.XMLResult r = this.getXML("testVCalendar.vcs");
        Assertions.assertEquals((Object)"text/x-vcalendar; charset=ISO-8859-1", (Object)r.metadata.get("Content-Type"));
    }
}

