package opennlp.tools.tokenize;

import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.regex.Pattern;
import opennlp.tools.dictionary.Dictionary;
import opennlp.tools.formats.ResourceAsStreamFactory;
import opennlp.tools.util.InsufficientTrainingDataException;
import opennlp.tools.util.PlainTextByLineStream;
import opennlp.tools.util.TrainingParameters;
import org.junit.Assert;
import org.junit.Test;

/* loaded from: input_file:opennlp/tools/tokenize/TokenizerMETest.class */
public class TokenizerMETest {
    @Test
    public void testTokenizerSimpleModel() throws IOException {
        String[] strArr = new TokenizerME(TokenizerTestUtil.createSimpleMaxentTokenModel()).tokenize("test,");
        Assert.assertEquals(2L, strArr.length);
        Assert.assertEquals("test", strArr[0]);
        Assert.assertEquals(",", strArr[1]);
    }

    @Test
    public void testTokenizer() throws IOException {
        String[] strArr = new TokenizerME(TokenizerTestUtil.createMaxentTokenModel()).tokenize("Sounds like it's not properly thought through!");
        Assert.assertEquals(9L, strArr.length);
        Assert.assertEquals("Sounds", strArr[0]);
        Assert.assertEquals("like", strArr[1]);
        Assert.assertEquals("it", strArr[2]);
        Assert.assertEquals("'s", strArr[3]);
        Assert.assertEquals("not", strArr[4]);
        Assert.assertEquals("properly", strArr[5]);
        Assert.assertEquals("thought", strArr[6]);
        Assert.assertEquals("through", strArr[7]);
        Assert.assertEquals("!", strArr[8]);
    }

    @Test(expected = InsufficientTrainingDataException.class)
    public void testInsufficientData() throws IOException {
        TokenSampleStream tokenSampleStream = new TokenSampleStream(new PlainTextByLineStream(new ResourceAsStreamFactory(TokenizerModel.class, "/opennlp/tools/tokenize/token-insufficient.train"), StandardCharsets.UTF_8));
        TrainingParameters trainingParameters = new TrainingParameters();
        trainingParameters.put("Iterations", 100);
        trainingParameters.put("Cutoff", 5);
        TokenizerME.train(tokenSampleStream, TokenizerFactory.create((String) null, "eng", (Dictionary) null, true, (Pattern) null), trainingParameters);
    }
}
