/*
 * Decompiled with CFR 0.152.
 */
package opennlp.tools.tokenize;

import java.io.IOException;
import java.nio.charset.StandardCharsets;
import opennlp.tools.formats.ResourceAsStreamFactory;
import opennlp.tools.tokenize.TokenSampleStream;
import opennlp.tools.tokenize.TokenizerFactory;
import opennlp.tools.tokenize.TokenizerME;
import opennlp.tools.tokenize.TokenizerModel;
import opennlp.tools.tokenize.TokenizerTestUtil;
import opennlp.tools.util.InputStreamFactory;
import opennlp.tools.util.InsufficientTrainingDataException;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.PlainTextByLineStream;
import opennlp.tools.util.TrainingParameters;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;

public class TokenizerMETest {
    @Test
    void testTokenizerSimpleModel() throws IOException {
        TokenizerModel model = TokenizerTestUtil.createSimpleMaxentTokenModel();
        TokenizerME tokenizer = new TokenizerME(model);
        String[] tokens = tokenizer.tokenize("test,");
        Assertions.assertEquals((int)2, (int)tokens.length);
        Assertions.assertEquals((Object)"test", (Object)tokens[0]);
        Assertions.assertEquals((Object)",", (Object)tokens[1]);
    }

    @Test
    void testTokenizer() throws IOException {
        TokenizerModel model = TokenizerTestUtil.createMaxentTokenModel();
        TokenizerME tokenizer = new TokenizerME(model);
        String[] tokens = tokenizer.tokenize("Sounds like it's not properly thought through!");
        Assertions.assertEquals((int)9, (int)tokens.length);
        Assertions.assertEquals((Object)"Sounds", (Object)tokens[0]);
        Assertions.assertEquals((Object)"like", (Object)tokens[1]);
        Assertions.assertEquals((Object)"it", (Object)tokens[2]);
        Assertions.assertEquals((Object)"'s", (Object)tokens[3]);
        Assertions.assertEquals((Object)"not", (Object)tokens[4]);
        Assertions.assertEquals((Object)"properly", (Object)tokens[5]);
        Assertions.assertEquals((Object)"thought", (Object)tokens[6]);
        Assertions.assertEquals((Object)"through", (Object)tokens[7]);
        Assertions.assertEquals((Object)"!", (Object)tokens[8]);
    }

    @Test
    void testInsufficientData() {
        Assertions.assertThrows(InsufficientTrainingDataException.class, () -> {
            ResourceAsStreamFactory trainDataIn = new ResourceAsStreamFactory(TokenizerModel.class, "/opennlp/tools/tokenize/token-insufficient.train");
            TokenSampleStream samples = new TokenSampleStream((ObjectStream)new PlainTextByLineStream((InputStreamFactory)trainDataIn, StandardCharsets.UTF_8));
            TrainingParameters mlParams = new TrainingParameters();
            mlParams.put("Iterations", 100);
            mlParams.put("Cutoff", 5);
            TokenizerME.train((ObjectStream)samples, (TokenizerFactory)TokenizerFactory.create(null, (String)"eng", null, (boolean)true, null), (TrainingParameters)mlParams);
        });
    }

    @Test
    void testNewLineAwareTokenization() throws IOException {
        TokenizerModel model = TokenizerTestUtil.createMaxentTokenModel();
        TokenizerME tokenizer = new TokenizerME(model);
        tokenizer.setKeepNewLines(true);
        Assertions.assertEquals((int)2, (int)tokenizer.tokenize("a\n").length);
        Assertions.assertArrayEquals((Object[])new String[]{"a", "\n"}, (Object[])tokenizer.tokenize("a\n"));
        Assertions.assertEquals((int)3, (int)tokenizer.tokenize("a\nb").length);
        Assertions.assertArrayEquals((Object[])new String[]{"a", "\n", "b"}, (Object[])tokenizer.tokenize("a\nb"));
        Assertions.assertEquals((int)4, (int)tokenizer.tokenize("a\n\n b").length);
        Assertions.assertArrayEquals((Object[])new String[]{"a", "\n", "\n", "b"}, (Object[])tokenizer.tokenize("a\n\n b"));
        Assertions.assertEquals((int)7, (int)tokenizer.tokenize("a\n\n b\n\n c").length);
        Assertions.assertArrayEquals((Object[])new String[]{"a", "\n", "\n", "b", "\n", "\n", "c"}, (Object[])tokenizer.tokenize("a\n\n b\n\n c"));
    }

    @Test
    void testTokenizationOfStringWithWindowsNewLineTokens() throws IOException {
        TokenizerModel model = TokenizerTestUtil.createMaxentTokenModel();
        TokenizerME tokenizer = new TokenizerME(model);
        tokenizer.setKeepNewLines(true);
        Assertions.assertEquals((int)3, (int)tokenizer.tokenize("a\r\n").length);
        Assertions.assertArrayEquals((Object[])new String[]{"a", "\r", "\n"}, (Object[])tokenizer.tokenize("a\r\n"));
        Assertions.assertEquals((int)4, (int)tokenizer.tokenize("a\r\nb").length);
        Assertions.assertArrayEquals((Object[])new String[]{"a", "\r", "\n", "b"}, (Object[])tokenizer.tokenize("a\r\nb"));
        Assertions.assertEquals((int)6, (int)tokenizer.tokenize("a\r\n\r\n b").length);
        Assertions.assertArrayEquals((Object[])new String[]{"a", "\r", "\n", "\r", "\n", "b"}, (Object[])tokenizer.tokenize("a\r\n\r\n b"));
        Assertions.assertEquals((int)11, (int)tokenizer.tokenize("a\r\n\r\n b\r\n\r\n c").length);
        Assertions.assertArrayEquals((Object[])new String[]{"a", "\r", "\n", "\r", "\n", "b", "\r", "\n", "\r", "\n", "c"}, (Object[])tokenizer.tokenize("a\r\n\r\n b\r\n\r\n c"));
    }
}

