package org.apache.tika.parser.microsoft.rtf;

import java.io.File;
import java.io.InputStream;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import org.apache.tika.Tika;
import org.apache.tika.TikaTest;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.extractor.ParserContainerExtractor;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.DublinCore;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.Office;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AutoDetectParser;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;

/* loaded from: input_file:org/apache/tika/parser/microsoft/rtf/RTFParserTest.class */
public class RTFParserTest extends TikaTest {
    @Test
    public void testBasicExtraction() throws Exception {
        Metadata metadata = new Metadata();
        String text = getText("testRTF.rtf", metadata);
        Assertions.assertEquals("application/rtf", metadata.get("Content-Type"));
        Assertions.assertEquals(1, metadata.getValues("Content-Type").length);
        assertContains("Test", text);
        assertContains("indexation Word", text);
    }

    @Test
    public void testUmlautSpacesExtraction2() throws Exception {
        Assertions.assertEquals("Übersicht", getText("testRTFUmlautSpaces2.rtf").replaceAll("\\s+", ""));
    }

    @Test
    public void testUnicodeUCNControlWordCharacterDoublingExtraction() throws Exception {
        String text = getText("testRTFUnicodeUCNControlWordCharacterDoubling.rtf");
        assertContains("年", text);
        assertContains("念", text);
        assertContains("0 ", text);
        assertContains("abc", text);
        Assertions.assertFalse(text.contains("年年"), "Doubled character 年");
    }

    @Test
    public void testHexEscapeInsideWord() throws Exception {
        assertContains("ESPÍRITO", getText("testRTFHexEscapeInsideWord.rtf"));
    }

    @Test
    public void testWindowsCodepage1250() throws Exception {
        String text = getText("testRTFWindowsCodepage1250.rtf");
        assertContains("zażółć gęślą jaźń", text);
        assertContains("ZAŻÓŁĆ GĘŚLĄ JAŹŃ", text);
    }

    @Test
    public void testTableCellSeparation() throws Exception {
        String replaceAll = getText("testRTFTableCellSeparation.rtf").replaceAll("\\s+", " ");
        assertContains("a b c d ä ë ö ü", replaceAll);
        assertContains("a b c d ä ë ö ü", replaceAll);
    }

    @Test
    public void testTableCellSeparation2() throws Exception {
        assertContains("Station Fax", getText("testRTFTableCellSeparation2.rtf").replaceAll("\\s+", " "));
    }

    @Test
    public void testWordPadCzechCharactersExtraction() throws Exception {
        String text = getText("testRTFWordPadCzechCharacters.rtf");
        Assertions.assertTrue(text.contains("Článek týdne"));
        Assertions.assertTrue(text.contains("starověké židovské náboženské texty"));
    }

    @Test
    public void testWord2010CzechCharactersExtraction() throws Exception {
        String text = getText("testRTFWord2010CzechCharacters.rtf");
        Assertions.assertTrue(text.contains("Článek týdne"));
        Assertions.assertTrue(text.contains("starověké židovské náboženské texty"));
    }

    @Test
    public void testMS932Extraction() throws Exception {
        TikaTest.XMLResult xml = getXML("testRTF-ms932.rtf");
        Assertions.assertTrue(xml.xml.contains("こんにちは"));
        Assertions.assertEquals("タイトル", xml.metadata.get(TikaCoreProperties.TITLE));
    }

    @Test
    public void testUmlautSpacesExtraction() throws Exception {
        assertContains("Übersicht", getText("testRTFUmlautSpaces.rtf"));
    }

    @Test
    public void testGothic() throws Exception {
        assertContains("������������", getText("testRTFUnicodeGothic.rtf"));
    }

    @Test
    public void testJapaneseText() throws Exception {
        TikaTest.XMLResult xml = getXML("testRTFJapanese.rtf");
        String str = xml.xml;
        Assertions.assertEquals("ゾルゲと尾崎、淡々と最期\u3000", xml.metadata.get(TikaCoreProperties.TITLE));
        Assertions.assertEquals("VMazel", xml.metadata.get(TikaCoreProperties.CREATOR));
        Assertions.assertEquals("StarWriter", xml.metadata.get(TikaCoreProperties.COMMENTS));
        assertContains("（ＧＨＱ）", str);
        assertContains("東京都三鷹市", str);
    }

    @Test
    public void testMaxLength() throws Exception {
        File resourceAsFile = getResourceAsFile("/test-documents/testRTFJapanese.rtf");
        Metadata metadata = new Metadata();
        TikaInputStream tikaInputStream = TikaInputStream.get(resourceAsFile, metadata);
        Tika tika = new Tika();
        Assertions.assertTrue(tika.parseToString(tikaInputStream, metadata).length() > 500);
        tika.setMaxStringLength(200);
        Assertions.assertTrue(tika.parseToString(TikaInputStream.get(resourceAsFile, metadata), metadata).length() <= 200);
        Assertions.assertTrue(tika.parseToString(TikaInputStream.get(resourceAsFile, metadata), metadata, 100).length() <= 100);
    }

    @Test
    public void testTextWithCurlyBraces() throws Exception {
        assertContains("{ some text inside curly brackets }", getText("testRTFWithCurlyBraces.rtf"));
    }

    @Test
    public void testControls() throws Exception {
        String text = getText("testRTFControls.rtf");
        assertContains("Thiswordhasanem—dash", text);
        assertContains("Thiswordhasanen–dash", text);
        assertContains("Thiswordhasanon‑breakinghyphen", text);
        assertContains("Thiswordhasanonbreaking space", text);
        assertContains("Thiswordhasanoptional\u00adhyphen", text);
        assertContains("‘Single quoted text’", text);
        assertContains("“Double quoted text”", text);
        assertContains("“Double quoted text again”", text);
    }

    @Test
    public void testInvalidUnicode() throws Exception {
        String text = getText("testRTFInvalidUnicode.rtf");
        assertContains("Unpaired hi � here", text);
        assertContains("Unpaired lo � here", text);
        assertContains("Mismatched pair �� here", text);
    }

    @Test
    public void testVarious() throws Exception {
        Metadata metadata = new Metadata();
        String text = getText("testRTFVarious.rtf", metadata);
        assertContains("Footnote appears here", text);
        assertContains("This is a footnote.", text);
        assertContains("This is the header text.", text);
        assertContains("This is the footer text.", text);
        assertContains("Here is a text box", text);
        assertContains("Bold", text);
        assertContains("italic", text);
        assertContains("underline", text);
        assertContains("superscript", text);
        assertContains("subscript", text);
        assertContains("Here is a citation:", text);
        assertContains("Figure 1 This is a caption for Figure 1", text);
        assertContains("(Kramer)", text);
        assertContains("Row 1 Col 1 Row 1 Col 2 Row 1 Col 3 Row 2 Col 1 Row 2 Col 2 Row 2 Col 3", text.replaceAll("\\s+", " "));
        assertContains("Row 1 column 1 Row 2 column 1 Row 1 column 2 Row 2 column 2", text.replaceAll("\\s+", " "));
        assertContains("This is a hyperlink", text);
        assertContains("Here is a list:", text);
        for (int i = 1; i <= 3; i++) {
            assertContains("Bullet " + i, text);
        }
        assertContains("Here is a numbered list:", text);
        for (int i2 = 1; i2 <= 3; i2++) {
            assertContains("Number bullet " + i2, text);
        }
        for (int i3 = 1; i3 <= 2; i3++) {
            for (int i4 = 1; i4 <= 3; i4++) {
                assertContains("Row " + i3 + " Col " + i4, text);
            }
        }
        assertContains("Keyword1 Keyword2", text);
        assertContains("Keyword1 Keyword2", Arrays.asList(metadata.getValues(Office.KEYWORDS)));
        assertContains("Subject is here", text);
        Assertions.assertEquals("Subject is here", metadata.get(DublinCore.SUBJECT));
        assertContains("Suddenly some Japanese text:", text);
        assertContains("（ＧＨＱ）", text);
        assertContains("ゾルゲと尾崎、淡々と最期", text);
        assertContains("And then some Gothic text:", text);
        assertContains("������������", text);
    }

    @Test
    public void testVariousStyle() throws Exception {
        String str = getXML("testRTFVarious.rtf").xml;
        assertContains("<b>Bold</b>", str);
        assertContains("<i>italic</i>", str);
    }

    @Test
    public void testBoldItalic() throws Exception {
        String str = getXML("testRTFBoldItalic.rtf").xml;
        assertContains("<b>bold</b>", str);
        assertContains("<b>bold </b><b><i>italic</i></b>", str);
        assertContains("<b><i>italic </i></b><b>bold</b>", str);
        assertContains("<i>italic</i>", str);
        assertContains("<b>bold then </b><b><i>italic then</i></b><i> not bold</i>", str);
        assertContains("<i>italic then </i><b><i>bold then</i></b><b> not italic</b>", str);
    }

    @Test
    public void testHyperlink() throws Exception {
        String str = getXML("testRTFHyperlink.rtf").xml;
        assertContains("our most <a href=\"http://r.office.microsoft.com/r/rlidwelcomeFAQ?clid=1033\">frequently asked questions</a>", str);
        Assertions.assertEquals(-1, str.indexOf("<p>\t\t</p>"));
    }

    @Test
    public void testHyperLinkAndStyles() throws Exception {
        assertContains("<b><i>DIP</i>: <a href=\"..\\\\..\\\\SAUCES\\\\Dips\\\\Dip, Caesar.doc\">Dip, Caesar.doc</a></b>", getXML("testRTFHyperlinkAndStyles.rtf").xml);
    }

    @Test
    public void testIgnoredControlWord() throws Exception {
        assertContains("<p>The quick brown fox jumps over the lazy dog</p>", getXML("testRTFIgnoredControlWord.rtf").xml);
    }

    @Test
    public void testFontAfterBufferedText() throws Exception {
        assertContains("Уважаемый клиент!", getXML("testFontAfterBufferedText.rtf").xml);
    }

    @Test
    public void testListMicrosoftWord() throws Exception {
        String str = getXML("testRTFListMicrosoftWord.rtf").xml;
        assertContains("<ol>\t<li>one</li>", str);
        assertContains("</ol>", str);
        assertContains("<ul>\t<li>first</li>", str);
        assertContains("</ul>", str);
    }

    @Test
    public void testTurningOffList() throws Exception {
        InputStream resourceAsStream = getResourceAsStream("/org/apache/tika/parser/microsoft/rtf/ignoreListMarkup-tika-config.xml");
        try {
            Assertions.assertNotNull(resourceAsStream);
            String str = getXML("testRTFListMicrosoftWord.rtf", new AutoDetectParser(new TikaConfig(resourceAsStream))).xml;
            assertNotContained("<ol>", str);
            assertNotContained("<ul>", str);
            assertNotContained("<li>", str);
            if (resourceAsStream != null) {
                resourceAsStream.close();
            }
        } catch (Throwable th) {
            if (resourceAsStream != null) {
                try {
                    resourceAsStream.close();
                } catch (Throwable th2) {
                    th.addSuppressed(th2);
                }
            }
            throw th;
        }
    }

    @Test
    public void testListLibreOffice() throws Exception {
        String str = getXML("testRTFListLibreOffice.rtf").xml;
        assertContains("<ol>\t<li>one</li>", str);
        assertContains("</ol>", str);
        assertContains("<ul>\t<li>first</li>", str);
        assertContains("</ul>", str);
    }

    @Test
    public void testBinControlWord() throws Exception {
        TikaTest.ByteCopyingHandler byteCopyingHandler = new TikaTest.ByteCopyingHandler();
        TikaInputStream tikaInputStream = TikaInputStream.get(getResourceAsStream("/test-documents/testBinControlWord.rtf"));
        try {
            ParserContainerExtractor parserContainerExtractor = new ParserContainerExtractor();
            Assertions.assertEquals(true, Boolean.valueOf(parserContainerExtractor.isSupported(tikaInputStream)));
            parserContainerExtractor.extract(tikaInputStream, parserContainerExtractor, byteCopyingHandler);
            if (tikaInputStream != null) {
                tikaInputStream.close();
            }
            Assertions.assertEquals(1, byteCopyingHandler.bytes.size());
            byte[] bArr = (byte[]) byteCopyingHandler.bytes.get(0);
            Assertions.assertEquals(10, bArr.length);
            Assertions.assertEquals(125, bArr[4]);
            Assertions.assertEquals(-1, bArr[9]);
        } catch (Throwable th) {
            if (tikaInputStream != null) {
                try {
                    tikaInputStream.close();
                } catch (Throwable th2) {
                    th.addSuppressed(th2);
                }
            }
            throw th;
        }
    }

    @Test
    public void testMetaDataCounts() throws Exception {
        TikaTest.XMLResult xml = getXML("testRTFWord2010CzechCharacters.rtf");
        Assertions.assertEquals("1", xml.metadata.get(Office.PAGE_COUNT));
        Assertions.assertEquals("70", xml.metadata.get(Office.WORD_COUNT));
        Assertions.assertEquals("401", xml.metadata.get(Office.CHARACTER_COUNT));
        Assertions.assertTrue(xml.metadata.get(TikaCoreProperties.CREATED).startsWith("2010-10-"));
    }

    @Test
    public void testListOverride() throws Exception {
        assertContains("Body", getText("testRTFListOverride.rtf"));
    }

    @Test
    public void testCorruptListOverride() throws Exception {
        assertContains("apple", getText("testRTFCorruptListOverride.rtf"));
    }

    @Test
    public void testMultipleNewlines() throws Exception {
        assertContains("<body><p>one</p> <p /> <p>two</p> <p /> <p /> <p>three</p> <p /> <p /> <p /> <p>four</p>", getXML("testRTFNewlines.rtf").xml.replaceAll("[\r\n]+", " "));
    }

    @Test
    public void testEmbeddedLinkedDocument() throws Exception {
        HashSet hashSet = new HashSet();
        hashSet.add(MediaType.parse("image/emf"));
        hashSet.add(MediaType.parse("image/wmf"));
        TikaTest.TrackingHandler trackingHandler = new TikaTest.TrackingHandler(hashSet);
        TikaInputStream tikaInputStream = TikaInputStream.get(getResourceAsStream("/test-documents/testRTFEmbeddedLink.rtf"));
        try {
            ParserContainerExtractor parserContainerExtractor = new ParserContainerExtractor();
            Assertions.assertEquals(true, Boolean.valueOf(parserContainerExtractor.isSupported(tikaInputStream)));
            parserContainerExtractor.extract(tikaInputStream, parserContainerExtractor, trackingHandler);
            if (tikaInputStream != null) {
                tikaInputStream.close();
            }
            Assertions.assertEquals(0, trackingHandler.filenames.size());
            TikaTest.TrackingHandler trackingHandler2 = new TikaTest.TrackingHandler();
            tikaInputStream = TikaInputStream.get(getResourceAsStream("/test-documents/testRTFEmbeddedLink.rtf"));
            try {
                ParserContainerExtractor parserContainerExtractor2 = new ParserContainerExtractor();
                Assertions.assertEquals(true, Boolean.valueOf(parserContainerExtractor2.isSupported(tikaInputStream)));
                parserContainerExtractor2.extract(tikaInputStream, parserContainerExtractor2, trackingHandler2);
                if (tikaInputStream != null) {
                    tikaInputStream.close();
                }
                Assertions.assertEquals(2, trackingHandler2.filenames.size());
            } finally {
            }
        } finally {
        }
    }

    @Test
    public void testConfig() throws Exception {
        InputStream resourceAsStream = getResourceAsStream("/org/apache/tika/parser/microsoft/rtf/tika-config.xml");
        try {
            Assertions.assertNotNull(resourceAsStream);
            List recursiveMetadata = getRecursiveMetadata("testBinControlWord.rtf", new AutoDetectParser(new TikaConfig(resourceAsStream)));
            Assertions.assertEquals(1, recursiveMetadata.size());
            assertContains("TikaMemoryLimitException", ((Metadata) recursiveMetadata.get(0)).get(TikaCoreProperties.TIKA_META_EXCEPTION_EMBEDDED_STREAM));
            if (resourceAsStream != null) {
                resourceAsStream.close();
            }
        } catch (Throwable th) {
            if (resourceAsStream != null) {
                try {
                    resourceAsStream.close();
                } catch (Throwable th2) {
                    th.addSuppressed(th2);
                }
            }
            throw th;
        }
    }

    @Test
    public void testBoldPlain() throws Exception {
        TikaTest.XMLResult xml = getXML("testRTFBoldPlain.rtf");
        assertContains("<b>Hank</b>", xml.xml);
        assertNotContained("<b>Anna Smith", xml.xml);
    }

    @Test
    public void testSpacingInAnnotations() throws Exception {
        assertContains("supercali ATB Allison, Timothy B.  This is a comment fragilistic", getXML("testRTF_annotation_spacing.rtf").xml);
    }

    @Test
    public void testTIKA1713() throws Exception {
        assertContains("For discussion", getXML("testRTFTIKA_1713.rtf").xml);
    }

    @Test
    public void testTIKA2150() throws Exception {
        assertContains("TO\tFROM", getXML("testRTFTIKA_2150.rtf").xml);
    }

    @Test
    public void testTIKA2500() throws Exception {
        assertContains("Level1", getXML("testRTFTIKA_2500.rtf").xml);
    }

    @Test
    public void testTIKA2883() throws Exception {
        assertContains("This message has been archived.", getXML("testRTFTIKA_2883.rtf").xml);
    }

    @Test
    public void testTIKA2899() throws Exception {
        assertContains("this Agreement on today", getXML("testRTFTIKA_2899.rtf").xml);
    }
}
