/*
 * Decompiled with CFR 0.152.
 */
package opennlp.tools.tokenize;

import java.util.HashSet;
import java.util.Set;
import opennlp.tools.tokenize.WordpieceTokenizer;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;

public class WordpieceTokenizerTest {
    @Test
    void testSentence() {
        WordpieceTokenizer tokenizer = new WordpieceTokenizer(this.getVocabulary());
        Object[] tokens = tokenizer.tokenize("the quick brown fox jumps over the very lazy dog");
        Object[] expected = new String[]{"[CLS]", "the", "quick", "brown", "fox", "jumps", "over", "the", "[UNK]", "lazy", "dog", "[SEP]"};
        Assertions.assertArrayEquals((Object[])expected, (Object[])tokens);
    }

    @Test
    void testSentenceWithPunctuation() {
        WordpieceTokenizer tokenizer = new WordpieceTokenizer(this.getVocabulary());
        Object[] tokens = tokenizer.tokenize("The quick brown fox jumps over the very lazy dog.");
        Object[] expected = new String[]{"[CLS]", "[UNK]", "quick", "brown", "fox", "jumps", "over", "the", "[UNK]", "lazy", "dog", "[UNK]", "[SEP]"};
        Assertions.assertArrayEquals((Object[])expected, (Object[])tokens);
    }

    private Set<String> getVocabulary() {
        HashSet<String> vocabulary = new HashSet<String>();
        vocabulary.add("the");
        vocabulary.add("quick");
        vocabulary.add("brown");
        vocabulary.add("fox");
        vocabulary.add("jumps");
        vocabulary.add("over");
        vocabulary.add("the");
        vocabulary.add("lazy");
        vocabulary.add("dog");
        return vocabulary;
    }
}

