package org.apache.asterix.fuzzyjoin.tests;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import junit.framework.Assert;
import org.apache.asterix.fuzzyjoin.tokenizer.DelimitedUTF8StringBinaryTokenizer;
import org.apache.asterix.fuzzyjoin.tokenizer.HashedUTF8WordTokenFactory;
import org.apache.asterix.fuzzyjoin.tokenizer.UTF8WordTokenFactory;
import org.junit.Before;
import org.junit.Test;

/* loaded from: input_file:org/apache/asterix/fuzzyjoin/tests/WordTokenizerTest.class */
public class WordTokenizerTest {
    private byte[] inputBuffer;
    private String text = "Hello World, I would like to inform you of the importance of Foo Bar. Yes, Foo Bar. Jürgen.";
    private ArrayList<String> expectedUTF8Tokens = new ArrayList<>();
    private ArrayList<Integer> expectedHashedUTF8Tokens = new ArrayList<>();
    private ArrayList<Integer> expectedCountedHashedUTF8Tokens = new ArrayList<>();

    @Before
    public void init() throws IOException {
        Integer valueOf;
        ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
        new DataOutputStream(byteArrayOutputStream).writeUTF(this.text);
        this.inputBuffer = byteArrayOutputStream.toByteArray();
        this.expectedUTF8Tokens.add("hello");
        this.expectedUTF8Tokens.add("world");
        this.expectedUTF8Tokens.add("i");
        this.expectedUTF8Tokens.add("would");
        this.expectedUTF8Tokens.add("like");
        this.expectedUTF8Tokens.add("to");
        this.expectedUTF8Tokens.add("inform");
        this.expectedUTF8Tokens.add("you");
        this.expectedUTF8Tokens.add("of");
        this.expectedUTF8Tokens.add("the");
        this.expectedUTF8Tokens.add("importance");
        this.expectedUTF8Tokens.add("of");
        this.expectedUTF8Tokens.add("foo");
        this.expectedUTF8Tokens.add("bar");
        this.expectedUTF8Tokens.add("yes");
        this.expectedUTF8Tokens.add("foo");
        this.expectedUTF8Tokens.add("bar");
        this.expectedUTF8Tokens.add("jürgen");
        for (int i = 0; i < this.expectedUTF8Tokens.size(); i++) {
            this.expectedHashedUTF8Tokens.add(Integer.valueOf(tokenHash(this.expectedUTF8Tokens.get(i), 1)));
        }
        HashMap hashMap = new HashMap();
        for (int i2 = 0; i2 < this.expectedUTF8Tokens.size(); i2++) {
            Integer num = (Integer) hashMap.get(this.expectedUTF8Tokens.get(i2));
            if (num == null) {
                valueOf = 1;
                hashMap.put(this.expectedUTF8Tokens.get(i2), 1);
            } else {
                valueOf = Integer.valueOf(num.intValue() + 1);
            }
            this.expectedCountedHashedUTF8Tokens.add(Integer.valueOf(tokenHash(this.expectedUTF8Tokens.get(i2), valueOf.intValue())));
        }
    }

    @Test
    public void testWordTokenizerWithCountedHashedUTF8Tokens() throws IOException {
        DelimitedUTF8StringBinaryTokenizer delimitedUTF8StringBinaryTokenizer = new DelimitedUTF8StringBinaryTokenizer(false, false, new HashedUTF8WordTokenFactory());
        delimitedUTF8StringBinaryTokenizer.reset(this.inputBuffer, 0, this.inputBuffer.length);
        int i = 0;
        while (delimitedUTF8StringBinaryTokenizer.hasNext()) {
            delimitedUTF8StringBinaryTokenizer.next();
            ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
            delimitedUTF8StringBinaryTokenizer.getToken().serializeToken(new DataOutputStream(byteArrayOutputStream));
            Assert.assertEquals(Integer.valueOf(new DataInputStream(new ByteArrayInputStream(byteArrayOutputStream.toByteArray())).readInt()), this.expectedCountedHashedUTF8Tokens.get(i));
            i++;
        }
    }

    @Test
    public void testWordTokenizerWithHashedUTF8Tokens() throws IOException {
        DelimitedUTF8StringBinaryTokenizer delimitedUTF8StringBinaryTokenizer = new DelimitedUTF8StringBinaryTokenizer(true, false, new HashedUTF8WordTokenFactory());
        delimitedUTF8StringBinaryTokenizer.reset(this.inputBuffer, 0, this.inputBuffer.length);
        int i = 0;
        while (delimitedUTF8StringBinaryTokenizer.hasNext()) {
            delimitedUTF8StringBinaryTokenizer.next();
            ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
            delimitedUTF8StringBinaryTokenizer.getToken().serializeToken(new DataOutputStream(byteArrayOutputStream));
            Assert.assertEquals(this.expectedHashedUTF8Tokens.get(i), Integer.valueOf(new DataInputStream(new ByteArrayInputStream(byteArrayOutputStream.toByteArray())).readInt()));
            i++;
        }
    }

    @Test
    public void testWordTokenizerWithUTF8Tokens() throws IOException {
        DelimitedUTF8StringBinaryTokenizer delimitedUTF8StringBinaryTokenizer = new DelimitedUTF8StringBinaryTokenizer(true, false, new UTF8WordTokenFactory());
        delimitedUTF8StringBinaryTokenizer.reset(this.inputBuffer, 0, this.inputBuffer.length);
        int i = 0;
        while (delimitedUTF8StringBinaryTokenizer.hasNext()) {
            delimitedUTF8StringBinaryTokenizer.next();
            ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
            delimitedUTF8StringBinaryTokenizer.getToken().serializeToken(new DataOutputStream(byteArrayOutputStream));
            Assert.assertEquals(this.expectedUTF8Tokens.get(i), new DataInputStream(new ByteArrayInputStream(byteArrayOutputStream.toByteArray())).readUTF());
            i++;
        }
    }

    public int tokenHash(String str, int i) {
        int i2 = -1640531527;
        for (int i3 = 0; i3 < str.length(); i3++) {
            i2 = (i2 ^ str.charAt(i3)) * (-1640531527);
        }
        return i2 + i;
    }
}
