package opennlp.tools.languagemodel;

import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.Iterator;
import opennlp.tools.ngram.NGramGenerator;
import org.apache.commons.io.IOUtils;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;

/* loaded from: input_file:opennlp/tools/languagemodel/NgramLanguageModelTest.class */
public class NgramLanguageModelTest {
    @Test
    public void testEmptyVocabularyProbability() {
        NGramLanguageModel nGramLanguageModel = new NGramLanguageModel();
        Assertions.assertEquals(0.0d, nGramLanguageModel.calculateProbability(new String[]{""}), 0.0d, "probability with an empty vocabulary is always 0");
        Assertions.assertEquals(0.0d, nGramLanguageModel.calculateProbability(new String[]{"1", "2", "3"}), 0.0d, "probability with an empty vocabulary is always 0");
    }

    @Test
    public void testRandomVocabularyAndSentence() {
        NGramLanguageModel nGramLanguageModel = new NGramLanguageModel();
        Iterator<String[]> it = LanguageModelTestUtils.generateRandomVocabulary(10).iterator();
        while (it.hasNext()) {
            nGramLanguageModel.add(it.next());
        }
        double calculateProbability = nGramLanguageModel.calculateProbability(LanguageModelTestUtils.generateRandomSentence());
        Assertions.assertTrue(calculateProbability >= 0.0d && calculateProbability <= 1.0d, "a probability measure should be between 0 and 1 [was " + calculateProbability + "]");
    }

    @Test
    public void testNgramModel() {
        NGramLanguageModel nGramLanguageModel = new NGramLanguageModel(4);
        nGramLanguageModel.add(new String[]{"I", "saw", "the", "fox"});
        nGramLanguageModel.add(new String[]{"the", "red", "house"});
        nGramLanguageModel.add(new String[]{"I", "saw", "something", "nice"});
        double calculateProbability = nGramLanguageModel.calculateProbability(new String[]{"I", "saw", "the", "red", "house"});
        Assertions.assertTrue(calculateProbability >= 0.0d && calculateProbability <= 1.0d, "a probability measure should be between 0 and 1 [was " + calculateProbability + "]");
        String[] predictNextTokens = nGramLanguageModel.predictNextTokens(new String[]{"I", "saw"});
        Assertions.assertNotNull(predictNextTokens);
        Assertions.assertArrayEquals(new String[]{"the", "fox"}, predictNextTokens);
    }

    @Test
    public void testBigramProbability() {
        NGramLanguageModel nGramLanguageModel = new NGramLanguageModel(2);
        nGramLanguageModel.add(new String[]{"<s>", "I", "am", "Sam", "</s>"});
        nGramLanguageModel.add(new String[]{"<s>", "Sam", "I", "am", "</s>"});
        nGramLanguageModel.add(new String[]{"<s>", "I", "do", "not", "like", "green", "eggs", "and", "ham", "</s>"});
        Assertions.assertEquals(0.666d, nGramLanguageModel.calculateProbability(new String[]{"<s>", "I"}), 0.001d);
        Assertions.assertEquals(0.5d, nGramLanguageModel.calculateProbability(new String[]{"Sam", "</s>"}), 0.001d);
        Assertions.assertEquals(0.333d, nGramLanguageModel.calculateProbability(new String[]{"<s>", "Sam"}), 0.001d);
        Assertions.assertEquals(0.5d, nGramLanguageModel.calculateProbability(new String[]{"am", "Sam"}), 0.001d);
        Assertions.assertEquals(0.666d, nGramLanguageModel.calculateProbability(new String[]{"I", "am"}), 0.001d);
        Assertions.assertEquals(0.333d, nGramLanguageModel.calculateProbability(new String[]{"I", "do"}), 0.001d);
        Assertions.assertEquals(0.333d, nGramLanguageModel.calculateProbability(new String[]{"I", "am", "Sam"}), 0.001d);
    }

    @Test
    public void testTrigram() {
        NGramLanguageModel nGramLanguageModel = new NGramLanguageModel(3);
        nGramLanguageModel.add(new String[]{"I", "see", "the", "fox"});
        nGramLanguageModel.add(new String[]{"the", "red", "house"});
        nGramLanguageModel.add(new String[]{"I", "saw", "something", "nice"});
        double calculateProbability = nGramLanguageModel.calculateProbability(new String[]{"I", "saw", "the", "red", "house"});
        Assertions.assertTrue(calculateProbability >= 0.0d && calculateProbability <= 1.0d, "a probability measure should be between 0 and 1 [was " + calculateProbability + "]");
        String[] predictNextTokens = nGramLanguageModel.predictNextTokens(new String[]{"I", "saw"});
        Assertions.assertNotNull(predictNextTokens);
        Assertions.assertArrayEquals(new String[]{"something"}, predictNextTokens);
    }

    @Test
    public void testBigram() {
        NGramLanguageModel nGramLanguageModel = new NGramLanguageModel(2);
        nGramLanguageModel.add(new String[]{"I", "see", "the", "fox"});
        nGramLanguageModel.add(new String[]{"the", "red", "house"});
        nGramLanguageModel.add(new String[]{"I", "saw", "something", "nice"});
        double calculateProbability = nGramLanguageModel.calculateProbability(new String[]{"I", "saw", "the", "red", "house"});
        Assertions.assertTrue(calculateProbability >= 0.0d && calculateProbability <= 1.0d, "a probability measure should be between 0 and 1 [was " + calculateProbability + "]");
        String[] predictNextTokens = nGramLanguageModel.predictNextTokens(new String[]{"I", "saw"});
        Assertions.assertNotNull(predictNextTokens);
        Assertions.assertArrayEquals(new String[]{"something"}, predictNextTokens);
    }

    @Test
    public void testSerializedNGramLanguageModel() throws Exception {
        NGramLanguageModel nGramLanguageModel = new NGramLanguageModel(getClass().getResourceAsStream("/opennlp/tools/ngram/ngram-model.xml"), 3);
        double calculateProbability = nGramLanguageModel.calculateProbability(new String[]{"The", "brown", "fox", "jumped"});
        Assertions.assertTrue(calculateProbability >= 0.0d && calculateProbability <= 1.0d, "a probability measure should be between 0 and 1 [was " + calculateProbability + "]");
        String[] predictNextTokens = nGramLanguageModel.predictNextTokens(new String[]{"the", "brown", "fox"});
        Assertions.assertNotNull(predictNextTokens);
        Assertions.assertArrayEquals(new String[]{"jumped"}, predictNextTokens);
    }

    @Test
    public void testTrigramLanguageModelCreationFromText() throws Exception {
        NGramLanguageModel nGramLanguageModel = new NGramLanguageModel(3);
        InputStream resourceAsStream = getClass().getResourceAsStream("/opennlp/tools/languagemodel/sentences.txt");
        try {
            Iterator it = IOUtils.readLines(resourceAsStream, StandardCharsets.UTF_8).iterator();
            while (it.hasNext()) {
                Iterator it2 = NGramGenerator.generate(Arrays.asList(((String) it.next()).split(" ")), 3, " ").iterator();
                while (it2.hasNext()) {
                    String[] split = ((String) it2.next()).split(" ");
                    if (split.length > 0) {
                        nGramLanguageModel.add(split);
                    }
                }
            }
            String[] predictNextTokens = nGramLanguageModel.predictNextTokens(new String[]{"neural", "network", "language"});
            Assertions.assertNotNull(predictNextTokens);
            Assertions.assertArrayEquals(new String[]{"models"}, predictNextTokens);
            Assertions.assertTrue(nGramLanguageModel.calculateProbability(new String[]{"neural", "network", "language", "models"}) > nGramLanguageModel.calculateProbability(new String[]{"neural", "network", "language", "model"}));
            if (resourceAsStream != null) {
                resourceAsStream.close();
            }
        } catch (Throwable th) {
            if (resourceAsStream != null) {
                try {
                    resourceAsStream.close();
                } catch (Throwable th2) {
                    th.addSuppressed(th2);
                }
            }
            throw th;
        }
    }
}
