package org.apache.jackrabbit.oak.plugins.index.lucene.util.fv;

import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.shingle.ShingleFilter;
import org.apache.lucene.util.Version;

/* loaded from: input_file:oak-lucene-1.22.13.jar:org/apache/jackrabbit/oak/plugins/index/lucene/util/fv/LSHAnalyzer.class */
public class LSHAnalyzer extends Analyzer {
    private static final int DEFAULT_SHINGLE_SIZE = 5;
    private final int min;
    private final int max;
    private final int hashCount;
    private final int bucketCount;
    private final int hashSetSize;

    private LSHAnalyzer(int i, int i2, int i3, int i4, int i5) {
        this.min = i;
        this.max = i2;
        this.hashCount = i3;
        this.bucketCount = i4;
        this.hashSetSize = i5;
    }

    public LSHAnalyzer() {
        this(5, 5, 1, 512, 1);
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // org.apache.lucene.analysis.Analyzer
    public Analyzer.TokenStreamComponents createComponents(String str, Reader reader) {
        FVTokenizer fVTokenizer = new FVTokenizer(Version.LUCENE_47, reader);
        ShingleFilter shingleFilter = new ShingleFilter(new FeaturePositionTokenFilter(new TruncateTokenFilter(fVTokenizer, 3)), this.min, this.max);
        shingleFilter.setTokenSeparator(" ");
        shingleFilter.setOutputUnigrams(false);
        shingleFilter.setOutputUnigramsIfNoShingles(false);
        return new Analyzer.TokenStreamComponents(fVTokenizer, new MinHashFilter(shingleFilter, this.hashCount, this.bucketCount, this.hashSetSize, this.bucketCount > 1));
    }
}
