package org.apache.tika.eval;

import java.util.HashSet;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.tika.eval.tokens.AlphaIdeographFilterFactory;
import org.apache.tika.eval.tokens.AnalyzerManager;
import org.junit.Assert;
import org.junit.Test;

/* loaded from: input_file:org/apache/tika/eval/AnalyzerManagerTest.class */
public class AnalyzerManagerTest {
    @Test
    public void testGeneral() throws Exception {
        TokenStream tokenStream = AnalyzerManager.newInstance(100000).getGeneralAnalyzer().tokenStream("f", "tHe quick aaaa aaa anD dirty dog");
        tokenStream.reset();
        CharTermAttribute attribute = tokenStream.getAttribute(CharTermAttribute.class);
        HashSet hashSet = new HashSet();
        while (tokenStream.incrementToken()) {
            hashSet.add(attribute.toString());
        }
        tokenStream.end();
        tokenStream.close();
        Assert.assertTrue(hashSet.contains("the"));
        Assert.assertTrue(hashSet.contains("and"));
        Assert.assertTrue(hashSet.contains("dog"));
    }

    @Test
    public void testCommon() throws Exception {
        TokenStream tokenStream = AnalyzerManager.newInstance(100000).getCommonTokensAnalyzer().tokenStream("f", "the 5,000.12 and dirty dog");
        tokenStream.reset();
        CharTermAttribute attribute = tokenStream.getAttribute(CharTermAttribute.class);
        HashSet hashSet = new HashSet();
        while (tokenStream.incrementToken()) {
            String obj = attribute.toString();
            if (AlphaIdeographFilterFactory.isAlphabetic(obj.toCharArray()) && obj.contains("5")) {
                Assert.fail("Shouldn't have found a numeric");
            }
            hashSet.add(attribute.toString());
        }
        tokenStream.end();
        tokenStream.close();
        Assert.assertTrue(hashSet.contains("dirty"));
        Assert.assertFalse(hashSet.contains("the"));
    }

    @Test
    public void testTokenCountFilter() throws Exception {
        AnalyzerManager newInstance = AnalyzerManager.newInstance(1000000);
        StringBuilder sb = new StringBuilder();
        for (int i = 0; i < 1001000; i++) {
            sb.append("the ");
        }
        TokenStream tokenStream = newInstance.getGeneralAnalyzer().tokenStream("f", sb.toString());
        tokenStream.reset();
        tokenStream.getAttribute(CharTermAttribute.class);
        int i2 = 0;
        while (tokenStream.incrementToken()) {
            i2++;
        }
        Assert.assertEquals(1000000L, i2);
    }
}
