package org.apache.asterix.fuzzyjoin.tokenizer;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import org.apache.asterix.fuzzyjoin.FuzzyJoinConfig;

/* loaded from: input_file:org/apache/asterix/fuzzyjoin/tokenizer/WordTokenizer.class */
public class WordTokenizer implements Tokenizer {
    private static final long serialVersionUID = 1;
    private final String wordSeparator;
    private final char tokenSeparator;

    public static void main(String[] strArr) {
        System.out.println("hadoop_rocks_in_java" + FuzzyJoinConfig.RECORD_SEPARATOR_REGEX + new WordTokenizer("_", '_').tokenize("hadoop_rocks_in_java"));
    }

    public WordTokenizer() {
        this(FuzzyJoinConfig.RIDPAIRS_SEPARATOR_REGEX, '_');
    }

    public WordTokenizer(String str, char c) {
        this.wordSeparator = str;
        this.tokenSeparator = c;
    }

    @Override // org.apache.asterix.fuzzyjoin.tokenizer.Tokenizer
    public List<String> tokenize(String str) {
        ArrayList arrayList = new ArrayList();
        HashMap hashMap = new HashMap();
        for (String str2 : str.split(this.wordSeparator)) {
            if (str2.length() != 0) {
                Integer num = (Integer) hashMap.get(str2);
                if (num == null) {
                    num = 0;
                }
                Integer valueOf = Integer.valueOf(num.intValue() + 1);
                hashMap.put(str2, valueOf);
                arrayList.add(str2 + this.tokenSeparator + valueOf);
            }
        }
        return arrayList;
    }
}
