package org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers;

import org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.TokenizerInfo;
import org.apache.hyracks.util.string.UTF8StringUtil;

/* loaded from: input_file:org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/NGramUTF8StringBinaryTokenizer.class */
public class NGramUTF8StringBinaryTokenizer extends AbstractUTF8StringBinaryTokenizer {
    private int gramLength;
    private boolean usePrePost;
    private int gramNum;
    private int totalGrams;
    private final INGramToken concreteToken;

    public NGramUTF8StringBinaryTokenizer(int i, boolean z, boolean z2, boolean z3, ITokenFactory iTokenFactory) {
        super(z2, z3, iTokenFactory);
        this.gramLength = i;
        this.usePrePost = z;
        this.concreteToken = (INGramToken) this.token;
    }

    @Override // org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.IBinaryTokenizer
    public boolean hasNext() {
        return this.gramNum < this.totalGrams;
    }

    @Override // org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.IBinaryTokenizer
    public void next() {
        int i = this.byteIndex;
        int i2 = 1;
        int i3 = 0;
        int i4 = 0;
        if (this.usePrePost) {
            i3 = Math.max((this.gramLength - this.gramNum) - 1, 0);
            i4 = this.gramNum > this.totalGrams - this.gramLength ? (this.gramLength - this.totalGrams) + this.gramNum : 0;
        }
        this.gramNum++;
        this.concreteToken.setNumPrePostChars(i3, i4);
        if (i3 == 0) {
            this.byteIndex += UTF8StringUtil.charSize(this.sentenceBytes, this.byteIndex);
        }
        if (!this.ignoreTokenCount && i3 == 0 && i4 == 0) {
            int i5 = this.sentenceStartOffset;
            if (this.sourceHasTypeTag) {
                i5++;
            }
            int i6 = i5;
            int numBytesToStoreLength = UTF8StringUtil.getNumBytesToStoreLength(UTF8StringUtil.getUTFLength(this.sentenceBytes, i5));
            while (true) {
                int i7 = i6 + numBytesToStoreLength;
                if (i7 >= i) {
                    break;
                }
                i2++;
                int i8 = 0;
                int i9 = 0;
                while (true) {
                    if (i9 >= this.gramLength) {
                        break;
                    }
                    if (Character.toLowerCase(UTF8StringUtil.charAt(this.sentenceBytes, i + i8)) != Character.toLowerCase(UTF8StringUtil.charAt(this.sentenceBytes, i7 + i8))) {
                        i2--;
                        break;
                    } else {
                        i8 += UTF8StringUtil.charSize(this.sentenceBytes, i7 + i8);
                        i9++;
                    }
                }
                i6 = i7;
                numBytesToStoreLength = UTF8StringUtil.charSize(this.sentenceBytes, i7);
            }
        }
        this.token.reset(this.sentenceBytes, i, this.sentenceEndOffset, this.gramLength, i2);
    }

    @Override // org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.AbstractUTF8StringBinaryTokenizer, org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.IBinaryTokenizer
    public void reset(byte[] bArr, int i, int i2) {
        super.reset(bArr, i, i2);
        this.gramNum = 0;
        int i3 = 0;
        int i4 = this.byteIndex;
        int i5 = i4 + this.sentenceUtf8Length;
        while (i4 < i5) {
            i3++;
            i4 += UTF8StringUtil.charSize(bArr, i4);
        }
        if (this.usePrePost) {
            this.totalGrams = (i3 + this.gramLength) - 1;
        } else {
            this.totalGrams = (i3 - this.gramLength) + 1;
        }
    }

    public void setGramlength(int i) {
        this.gramLength = i;
    }

    public void setPrePost(boolean z) {
        this.usePrePost = z;
    }

    @Override // org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.IBinaryTokenizer
    public short getTokensCount() {
        return (short) this.totalGrams;
    }

    @Override // org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.IBinaryTokenizer
    public TokenizerInfo.TokenizerType getTokenizerType() {
        return TokenizerInfo.TokenizerType.STRING;
    }
}
