package org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers;

import org.apache.hyracks.data.std.primitive.UTF8StringPointable;

/* loaded from: input_file:org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/DelimitedUTF8StringBinaryTokenizer.class */
public class DelimitedUTF8StringBinaryTokenizer extends AbstractUTF8StringBinaryTokenizer {
    public DelimitedUTF8StringBinaryTokenizer(boolean z, boolean z2, ITokenFactory iTokenFactory) {
        super(z, z2, iTokenFactory);
    }

    @Override // org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.IBinaryTokenizer
    public boolean hasNext() {
        while (this.index < this.length && isSeparator(UTF8StringPointable.charAt(this.data, this.index))) {
            this.index += UTF8StringPointable.charSize(this.data, this.index);
        }
        return this.index < this.length;
    }

    private boolean isSeparator(char c) {
        return (Character.isLetterOrDigit(c) || Character.getType(c) == 5 || Character.getType(c) == 11) ? false : true;
    }

    @Override // org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.IBinaryTokenizer
    public void next() {
        this.tokenLength = 0;
        int i = this.index;
        while (this.index < this.length && !isSeparator(UTF8StringPointable.charAt(this.data, this.index))) {
            this.index += UTF8StringPointable.charSize(this.data, this.index);
            this.tokenLength++;
        }
        int i2 = 1;
        if (this.tokenLength > 0 && !this.ignoreTokenCount) {
            for (int i3 = 0; i3 < this.tokensStart.length(); i3++) {
                if (this.tokenLength == this.tokensLength.get(i3)) {
                    int i4 = this.tokensStart.get(i3);
                    i2++;
                    int i5 = 0;
                    int i6 = 0;
                    while (true) {
                        if (i6 >= this.tokenLength) {
                            break;
                        }
                        if (Character.toLowerCase(UTF8StringPointable.charAt(this.data, i + i5)) != Character.toLowerCase(UTF8StringPointable.charAt(this.data, i4 + i5))) {
                            i2--;
                            break;
                        } else {
                            i5 += UTF8StringPointable.charSize(this.data, i + i5);
                            i6++;
                        }
                    }
                }
            }
            this.tokensStart.add(i);
            this.tokensLength.add(this.tokenLength);
        }
        this.token.reset(this.data, i, this.index, this.tokenLength, i2);
    }

    @Override // org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.IBinaryTokenizer
    public short getTokensCount() {
        if (!this.tokenCountCalculated) {
            this.tokenCount = (short) 0;
            boolean z = true;
            while (this.originalIndex < this.length) {
                if (isSeparator(UTF8StringPointable.charAt(this.data, this.originalIndex))) {
                    z = true;
                } else if (z) {
                    this.tokenCount = (short) (this.tokenCount + 1);
                    z = false;
                }
                this.originalIndex += UTF8StringPointable.charSize(this.data, this.originalIndex);
            }
        }
        return this.tokenCount;
    }
}
