package org.apache.tika.eval.core.textstats;

import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import org.apache.commons.codec.binary.Base32;
import org.apache.commons.codec.digest.DigestUtils;
import org.apache.tika.eval.core.langid.LanguageIDWrapper;
import org.apache.tika.eval.core.tokens.CommonTokenResult;
import org.apache.tika.language.detect.LanguageResult;
import org.junit.Assert;
import org.junit.Test;

/* loaded from: input_file:org/apache/tika/eval/core/textstats/TextStatsTest.class */
public class TextStatsTest {
    @Test
    public void testBasic() throws Exception {
        ArrayList arrayList = new ArrayList();
        arrayList.add(new TextProfileSignature());
        arrayList.add(new ContentLengthCalculator());
        arrayList.add(new TokenEntropy());
        arrayList.add(new CommonTokens());
        arrayList.add(new TextSha256Signature());
        Map calculate = new CompositeTextStatsCalculator(arrayList).calculate("The quick brown fox &&^&%@! ; ; ; ;;; ;;; 8675309 jumped over tHe lazy wombat");
        CommonTokenResult commonTokenResult = (CommonTokenResult) calculate.get(CommonTokens.class);
        Assert.assertEquals("eng", commonTokenResult.getLangCode());
        Assert.assertEquals(9L, commonTokenResult.getAlphabeticTokens());
        Assert.assertEquals(8L, commonTokenResult.getCommonTokens());
        Assert.assertEquals(7L, commonTokenResult.getUniqueCommonTokens());
        Assert.assertEquals(8L, commonTokenResult.getUniqueAlphabeticTokens());
        Assert.assertEquals(0.11d, commonTokenResult.getOOV(), 0.02d);
        Assert.assertEquals(77L, ((Integer) calculate.get(ContentLengthCalculator.class)).intValue());
        Assert.assertEquals(3.12d, ((Double) calculate.get(TokenEntropy.class)).doubleValue(), 0.01d);
        Assert.assertEquals("eng", ((LanguageResult) ((List) calculate.get(LanguageIDWrapper.class)).get(0)).getLanguage());
        Assert.assertEquals(0.02d, ((LanguageResult) r0.get(1)).getRawScore(), 0.01d);
        Assert.assertEquals("XF3W27O7IWOJVVNQ4HLKYYPCPPX3L2M72YSEMZ3WADL4VTXVITIA====", (String) calculate.get(TextProfileSignature.class));
        Assert.assertEquals(new Base32().encodeAsString(DigestUtils.sha256("the quick brown fox 8675309 jumped over the lazy wombat".getBytes(StandardCharsets.UTF_8))), calculate.get(TextSha256Signature.class));
    }

    @Test
    public void testCJK() throws Exception {
        ArrayList arrayList = new ArrayList();
        arrayList.add(new TextProfileSignature());
        arrayList.add(new CommonTokens());
        Map calculate = new CompositeTextStatsCalculator(arrayList).calculate("普林斯顿大学");
        Assert.assertEquals("cmn", ((LanguageResult) ((List) calculate.get(LanguageIDWrapper.class)).get(0)).getLanguage());
        Assert.assertEquals(0.009d, ((LanguageResult) r0.get(1)).getRawScore(), 0.01d);
        Assert.assertEquals("XKXLY6FNIGK2KGEF6HOSKSVGYDLLOFIAGO73RLMJ22PZVXBTXFFA====", (String) calculate.get(TextProfileSignature.class));
        arrayList.clear();
        arrayList.add(new TextProfileSignature());
        String str = (String) new CompositeTextStatsCalculator(arrayList).calculate("").get(TextProfileSignature.class);
        arrayList.clear();
        TextProfileSignature textProfileSignature = new TextProfileSignature();
        textProfileSignature.setMinTokenLength(3);
        arrayList.add(textProfileSignature);
        Assert.assertEquals(str, (String) new CompositeTextStatsCalculator(arrayList).calculate("普林斯顿大学").get(TextProfileSignature.class));
    }
}
