package opennlp.tools.tokenize;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.regex.Pattern;
import opennlp.tools.dictionary.Dictionary;
import opennlp.tools.formats.ResourceAsStreamFactory;
import opennlp.tools.tokenize.DummyTokenizerFactory;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.PlainTextByLineStream;
import opennlp.tools.util.TrainingParameters;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;

/* loaded from: input_file:opennlp/tools/tokenize/TokenizerFactoryTest.class */
public class TokenizerFactoryTest {
    private static ObjectStream<TokenSample> createSampleStream() throws IOException {
        return new TokenSampleStream(new PlainTextByLineStream(new ResourceAsStreamFactory(TokenizerFactoryTest.class, "/opennlp/tools/tokenize/token.train"), StandardCharsets.UTF_8));
    }

    private static TokenizerModel train(TokenizerFactory tokenizerFactory) throws IOException {
        return TokenizerME.train(createSampleStream(), tokenizerFactory, TrainingParameters.defaultParams());
    }

    private static Dictionary loadAbbDictionary() throws IOException {
        return new Dictionary(TokenizerFactoryTest.class.getClassLoader().getResourceAsStream("opennlp/tools/sentdetect/abb.xml"));
    }

    @Test
    void testDefault() throws IOException {
        TokenizerModel train = train(new TokenizerFactory("spa", loadAbbDictionary(), false, (Pattern) null));
        TokenizerFactory factory = train.getFactory();
        Assertions.assertTrue(factory.getAbbreviationDictionary() != null);
        Assertions.assertTrue(factory.getContextGenerator() instanceof DefaultTokenContextGenerator);
        Assertions.assertEquals("^[A-Za-z0-9]+$", factory.getAlphaNumericPattern().pattern());
        Assertions.assertEquals("spa", factory.getLanguageCode());
        Assertions.assertEquals("spa", train.getLanguage());
        Assertions.assertFalse(factory.isUseAlphaNumericOptimization());
        ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
        train.serialize(byteArrayOutputStream);
        TokenizerFactory factory2 = new TokenizerModel(new ByteArrayInputStream(byteArrayOutputStream.toByteArray())).getFactory();
        Assertions.assertTrue(factory2.getAbbreviationDictionary() != null);
        Assertions.assertTrue(factory2.getContextGenerator() instanceof DefaultTokenContextGenerator);
        Assertions.assertEquals("^[A-Za-z0-9]+$", factory2.getAlphaNumericPattern().pattern());
        Assertions.assertEquals("spa", factory2.getLanguageCode());
        Assertions.assertEquals("spa", train.getLanguage());
        Assertions.assertFalse(factory2.isUseAlphaNumericOptimization());
    }

    @Test
    void testNullDict() throws IOException {
        TokenizerModel train = train(new TokenizerFactory("spa", (Dictionary) null, false, (Pattern) null));
        TokenizerFactory factory = train.getFactory();
        Assertions.assertNull(factory.getAbbreviationDictionary());
        Assertions.assertTrue(factory.getContextGenerator() instanceof DefaultTokenContextGenerator);
        Assertions.assertEquals("^[A-Za-z0-9]+$", factory.getAlphaNumericPattern().pattern());
        Assertions.assertEquals("spa", factory.getLanguageCode());
        Assertions.assertEquals("spa", train.getLanguage());
        Assertions.assertFalse(factory.isUseAlphaNumericOptimization());
        ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
        train.serialize(byteArrayOutputStream);
        TokenizerFactory factory2 = new TokenizerModel(new ByteArrayInputStream(byteArrayOutputStream.toByteArray())).getFactory();
        Assertions.assertNull(factory2.getAbbreviationDictionary());
        Assertions.assertTrue(factory2.getContextGenerator() instanceof DefaultTokenContextGenerator);
        Assertions.assertEquals("^[A-Za-z0-9]+$", factory2.getAlphaNumericPattern().pattern());
        Assertions.assertEquals("spa", factory2.getLanguageCode());
        Assertions.assertEquals("spa", train.getLanguage());
        Assertions.assertFalse(factory2.isUseAlphaNumericOptimization());
    }

    @Test
    void testCustomPatternAndAlphaOpt() throws IOException {
        TokenizerModel train = train(new TokenizerFactory("spa", (Dictionary) null, true, Pattern.compile("^[0-9A-Za-z]+$")));
        TokenizerFactory factory = train.getFactory();
        Assertions.assertNull(factory.getAbbreviationDictionary());
        Assertions.assertTrue(factory.getContextGenerator() instanceof DefaultTokenContextGenerator);
        Assertions.assertEquals("^[0-9A-Za-z]+$", factory.getAlphaNumericPattern().pattern());
        Assertions.assertEquals("spa", factory.getLanguageCode());
        Assertions.assertEquals("spa", train.getLanguage());
        Assertions.assertTrue(factory.isUseAlphaNumericOptimization());
        ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
        train.serialize(byteArrayOutputStream);
        TokenizerFactory factory2 = new TokenizerModel(new ByteArrayInputStream(byteArrayOutputStream.toByteArray())).getFactory();
        Assertions.assertNull(factory2.getAbbreviationDictionary());
        Assertions.assertTrue(factory2.getContextGenerator() instanceof DefaultTokenContextGenerator);
        Assertions.assertEquals("^[0-9A-Za-z]+$", factory2.getAlphaNumericPattern().pattern());
        Assertions.assertEquals("spa", factory2.getLanguageCode());
        Assertions.assertEquals("spa", train.getLanguage());
        Assertions.assertTrue(factory2.isUseAlphaNumericOptimization());
    }

    @Test
    void testDummyFactory() throws IOException {
        TokenizerModel train = train(new DummyTokenizerFactory("spa", loadAbbDictionary(), true, Pattern.compile("^[0-9A-Za-z]+$")));
        TokenizerFactory factory = train.getFactory();
        Assertions.assertTrue(factory.getAbbreviationDictionary() instanceof DummyTokenizerFactory.DummyDictionary);
        Assertions.assertTrue(factory.getContextGenerator() instanceof DummyTokenizerFactory.DummyContextGenerator);
        Assertions.assertEquals("^[0-9A-Za-z]+$", factory.getAlphaNumericPattern().pattern());
        Assertions.assertEquals("spa", factory.getLanguageCode());
        Assertions.assertEquals("spa", train.getLanguage());
        Assertions.assertTrue(factory.isUseAlphaNumericOptimization());
        ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
        train.serialize(byteArrayOutputStream);
        TokenizerFactory factory2 = new TokenizerModel(new ByteArrayInputStream(byteArrayOutputStream.toByteArray())).getFactory();
        Assertions.assertTrue(factory2.getAbbreviationDictionary() instanceof DummyTokenizerFactory.DummyDictionary);
        Assertions.assertTrue(factory2.getContextGenerator() instanceof DummyTokenizerFactory.DummyContextGenerator);
        Assertions.assertEquals("^[0-9A-Za-z]+$", factory2.getAlphaNumericPattern().pattern());
        Assertions.assertEquals("spa", factory2.getLanguageCode());
        Assertions.assertEquals("spa", train.getLanguage());
        Assertions.assertTrue(factory2.isUseAlphaNumericOptimization());
    }

    @Test
    void testCreateDummyFactory() throws IOException {
        TokenizerFactory create = TokenizerFactory.create(DummyTokenizerFactory.class.getCanonicalName(), "spa", loadAbbDictionary(), true, Pattern.compile("^[0-9A-Za-z]+$"));
        Assertions.assertTrue(create.getAbbreviationDictionary() instanceof DummyTokenizerFactory.DummyDictionary);
        Assertions.assertTrue(create.getContextGenerator() instanceof DummyTokenizerFactory.DummyContextGenerator);
        Assertions.assertEquals("^[0-9A-Za-z]+$", create.getAlphaNumericPattern().pattern());
        Assertions.assertEquals("spa", create.getLanguageCode());
        Assertions.assertTrue(create.isUseAlphaNumericOptimization());
    }
}
