package org.apache.tika.parser.txt;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Files;
import org.apache.tika.TikaTest;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.AutoDetectParser;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;

/* loaded from: input_file:org/apache/tika/parser/txt/CharsetDetectorTest.class */
public class CharsetDetectorTest extends TikaTest {
    @Test
    public void testTagDropper() throws IOException {
        InputStream resourceAsStream = getResourceAsStream("/test-documents/resume.html");
        Throwable th = null;
        try {
            CharsetDetector charsetDetector = new CharsetDetector();
            charsetDetector.enableInputFilter(true);
            charsetDetector.setText(resourceAsStream);
            CharsetMatch charsetMatch = null;
            for (CharsetMatch charsetMatch2 : charsetDetector.detectAll()) {
                if (charsetMatch == null || charsetMatch.getConfidence() < charsetMatch2.getConfidence()) {
                    charsetMatch = charsetMatch2;
                }
            }
            Assertions.assertTrue(charsetMatch != null);
            Assertions.assertEquals("UTF-8", charsetMatch.getName());
            if (resourceAsStream != null) {
                if (0 == 0) {
                    resourceAsStream.close();
                    return;
                }
                try {
                    resourceAsStream.close();
                } catch (Throwable th2) {
                    th.addSuppressed(th2);
                }
            }
        } catch (Throwable th3) {
            if (resourceAsStream != null) {
                if (0 != 0) {
                    try {
                        resourceAsStream.close();
                    } catch (Throwable th4) {
                        th.addSuppressed(th4);
                    }
                } else {
                    resourceAsStream.close();
                }
            }
            throw th3;
        }
    }

    @Test
    public void testEmptyOrNullDeclaredCharset() throws IOException {
        InputStream resourceAsStream = getResourceAsStream("/test-documents/resume.html");
        Throwable th = null;
        try {
            CharsetDetector charsetDetector = new CharsetDetector();
            Assertions.assertTrue(charsetDetector.getReader(resourceAsStream, (String) null).ready());
            Assertions.assertTrue(charsetDetector.getReader(resourceAsStream, "").ready());
            if (resourceAsStream != null) {
                if (0 == 0) {
                    resourceAsStream.close();
                    return;
                }
                try {
                    resourceAsStream.close();
                } catch (Throwable th2) {
                    th.addSuppressed(th2);
                }
            }
        } catch (Throwable th3) {
            if (resourceAsStream != null) {
                if (0 != 0) {
                    try {
                        resourceAsStream.close();
                    } catch (Throwable th4) {
                        th.addSuppressed(th4);
                    }
                } else {
                    resourceAsStream.close();
                }
            }
            throw th3;
        }
    }

    @Test
    public void testWin125XHeuristics() throws Exception {
        CharsetDetector charsetDetector = new CharsetDetector();
        InputStream resourceAsStream = getResourceAsStream("/test-documents/testTXT_win-1252.txt");
        Throwable th = null;
        try {
            charsetDetector.setText(resourceAsStream);
            if (resourceAsStream != null) {
                if (0 != 0) {
                    try {
                        resourceAsStream.close();
                    } catch (Throwable th2) {
                        th.addSuppressed(th2);
                    }
                } else {
                    resourceAsStream.close();
                }
            }
            Assertions.assertEquals("windows-1252", charsetDetector.detect().getName());
        } catch (Throwable th3) {
            if (resourceAsStream != null) {
                if (0 != 0) {
                    try {
                        resourceAsStream.close();
                    } catch (Throwable th4) {
                        th.addSuppressed(th4);
                    }
                } else {
                    resourceAsStream.close();
                }
            }
            throw th3;
        }
    }

    @Test
    public void testSetTextConsistency() throws Exception {
        byte[] readAllBytes = Files.readAllBytes(getResourceAsFile("/test-documents/multi-language.txt").toPath());
        ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(readAllBytes);
        CharsetDetector charsetDetector = new CharsetDetector();
        charsetDetector.setText(readAllBytes);
        CharsetDetector charsetDetector2 = new CharsetDetector();
        charsetDetector2.setText(byteArrayInputStream);
        Assertions.assertEquals("ISO-8859-1", charsetDetector.detect().getName());
        Assertions.assertEquals("ISO-8859-1", charsetDetector2.detect().getName());
    }

    @Test
    public void testZeroLength() throws Exception {
        CharsetDetector charsetDetector = new CharsetDetector();
        charsetDetector.setText(new byte[0]);
        Assertions.assertEquals("UTF-8", charsetDetector.detect().getName());
    }

    @Test
    public void testLengthResetCorrectly() throws IOException {
        StringBuilder sb = new StringBuilder();
        CharsetDetector charsetDetector = new CharsetDetector();
        for (int i = 0; i < 5000; i++) {
            sb.append("الحاسوب");
        }
        charsetDetector.setText(sb.toString().getBytes("windows-1256"));
        Assertions.assertEquals("windows-1256", charsetDetector.detect().getName());
        sb.setLength(0);
        for (int i2 = 0; i2 < 5; i2++) {
            sb.append("الحاسوب");
        }
        charsetDetector.setText(sb.toString().getBytes("UTF-8"));
        Assertions.assertEquals("UTF-8", charsetDetector.detect().getName());
    }

    @Test
    public void testIgnoreCharset() throws Exception {
        AutoDetectParser autoDetectParser = new AutoDetectParser(new TikaConfig(getResourceAsStream("/test-configs/tika-config-ignore-charset.xml")));
        Metadata metadata = new Metadata();
        metadata.set("resourceName", "texty-text.txt");
        assertContains("ACTIVE AGE", getXML("testIgnoreCharset.txt", autoDetectParser, metadata).xml);
        Metadata metadata2 = new Metadata();
        metadata2.set("resourceName", "texty-text.txt");
        assertContains("Please check your email", getXML("test_ignore_IBM420.html", autoDetectParser, metadata2).xml);
    }
}
