package org.apache.jackrabbit.oak.index.indexer.document.flatfile.analysis.modules;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.stream.Collectors;
import org.apache.jackrabbit.oak.index.indexer.document.flatfile.analysis.stream.NodeData;
import org.apache.jackrabbit.oak.index.indexer.document.flatfile.analysis.stream.NodeProperty;
import org.apache.jackrabbit.oak.index.indexer.document.flatfile.analysis.utils.BloomFilter;
import org.apache.jackrabbit.oak.index.indexer.document.flatfile.analysis.utils.HyperLogLog;
import org.apache.jackrabbit.oak.index.indexer.document.flatfile.pipelined.PipelinedMergeSortTask;

/* loaded from: input_file:org/apache/jackrabbit/oak/index/indexer/document/flatfile/analysis/modules/DistinctBinarySize.class */
public class DistinctBinarySize implements StatsCollector {
    private static final double BLOOM_FILTER_FPP = 0.01d;
    private final Storage storage = new Storage();
    private final HashSet<BinaryId> largeBinaries = new HashSet<>();
    private final HyperLogLog hll = new HyperLogLog(PipelinedMergeSortTask.DEFAULT_OAK_INDEXER_PIPELINED_EXTERNAL_MERGE_READ_BUFFER_SIZE, 0);
    private final BloomFilter bloomFilter;
    private final long largeBinariesMB;
    private final long bloomFilterMB;
    private long largeBinarySizeThreshold;
    private int largeBinariesCountMax;
    private long bloomFilterMinCount;
    private long bloomFilterMinSize;
    private long bloomFilterIgnoredSize;
    private long referenceCount;
    private long referenceSize;

    public DistinctBinarySize(long j, long j2) {
        this.largeBinariesMB = j;
        this.bloomFilterMB = j2;
        this.largeBinariesCountMax = (int) ((j * 1000000) / 64);
        this.bloomFilter = BloomFilter.construct(BloomFilter.calculateN(j2 * 1000000 * 8, BLOOM_FILTER_FPP), BLOOM_FILTER_FPP);
    }

    @Override // org.apache.jackrabbit.oak.index.indexer.document.flatfile.analysis.modules.StatsCollector
    public void add(NodeData nodeData) {
        ArrayList arrayList = new ArrayList();
        for (NodeProperty nodeProperty : nodeData.getProperties()) {
            if (nodeProperty.getType() == NodeProperty.ValueType.BINARY) {
                for (String str : nodeProperty.getValues()) {
                    if (str.startsWith(":blobId:")) {
                        String substring = str.substring(":blobId:".length());
                        if (!substring.startsWith("0x")) {
                            arrayList.add(new BinaryId(substring));
                        }
                    }
                }
            }
        }
        this.referenceCount += arrayList.size();
        Iterator it = arrayList.iterator();
        while (it.hasNext()) {
            BinaryId binaryId = (BinaryId) it.next();
            this.referenceSize += binaryId.getLength();
            if (this.largeBinariesCountMax <= 0 || binaryId.getLength() < this.largeBinarySizeThreshold) {
                addToBloomFilter(binaryId);
            } else {
                this.largeBinaries.add(binaryId);
                truncateLargeBinariesSet();
            }
        }
    }

    private void addToBloomFilter(BinaryId binaryId) {
        this.hll.add(binaryId.getLongHash());
        if (this.bloomFilter.mayContain(binaryId.getLongHash())) {
            this.bloomFilterIgnoredSize += binaryId.getLength();
            return;
        }
        this.bloomFilter.add(binaryId.getLongHash());
        this.bloomFilterMinCount++;
        this.bloomFilterMinSize += binaryId.getLength();
    }

    private void truncateLargeBinariesSet() {
        if (this.largeBinaries.size() < this.largeBinariesCountMax * 2) {
            return;
        }
        long[] jArr = new long[this.largeBinaries.size()];
        int i = 0;
        Iterator<BinaryId> it = this.largeBinaries.iterator();
        while (it.hasNext()) {
            int i2 = i;
            i++;
            jArr[i2] = it.next().getLength();
        }
        Arrays.sort(jArr);
        this.largeBinarySizeThreshold = jArr[this.largeBinariesCountMax];
        Iterator<BinaryId> it2 = this.largeBinaries.iterator();
        while (it2.hasNext()) {
            BinaryId next = it2.next();
            if (next.getLength() < this.largeBinarySizeThreshold) {
                addToBloomFilter(next);
                it2.remove();
            }
        }
    }

    @Override // org.apache.jackrabbit.oak.index.indexer.document.flatfile.analysis.modules.StatsCollector
    public void end() {
        this.storage.add("config Bloom filter memory MB", this.bloomFilterMB);
        this.storage.add("config large binaries set memory MB", this.largeBinariesMB);
        this.storage.add("large binaries count", this.largeBinaries.size());
        this.storage.add("large binaries count max", this.largeBinariesCountMax * 2);
        this.storage.add("large binaries size threshold", this.largeBinarySizeThreshold);
        long j = 0;
        Iterator<BinaryId> it = this.largeBinaries.iterator();
        while (it.hasNext()) {
            j += it.next().getLength();
        }
        this.storage.add("large binaries size", j);
        this.storage.add("small binaries min count", this.bloomFilterMinCount);
        this.storage.add("small binaries min size", this.bloomFilterMinSize);
        long estimate = this.hll.estimate();
        long estimatedEntryCount = this.bloomFilter.getEstimatedEntryCount();
        if (estimatedEntryCount == Long.MAX_VALUE) {
            estimatedEntryCount = estimate;
        }
        long calculateFpp = (long) (this.bloomFilterMinSize + (BloomFilter.calculateFpp(estimatedEntryCount, this.bloomFilter.getBitCount(), this.bloomFilter.getK()) * this.bloomFilterIgnoredSize));
        this.storage.add("small binaries count", estimatedEntryCount);
        this.storage.add("small binaries HLL count", estimate);
        this.storage.add("small binaries size", calculateFpp);
        this.storage.add("total distinct count", this.largeBinaries.size() + estimatedEntryCount);
        this.storage.add("total distinct size", j + calculateFpp);
        this.storage.add("total reference count", this.referenceCount);
        this.storage.add("total reference size", this.referenceSize);
    }

    @Override // org.apache.jackrabbit.oak.index.indexer.document.flatfile.analysis.modules.StatsCollector
    public List<String> getRecords() {
        return BinarySizeHistogram.getRecordsWithSizeAndCount(this.storage);
    }

    public String toString() {
        return "DistinctBinarySize\n" + ((String) getRecords().stream().map(str -> {
            return str + "\n";
        }).collect(Collectors.joining())) + this.storage;
    }
}
