package org.apache.jackrabbit.oak.index.indexer.document.flatfile.analysis.modules;

import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.stream.Collectors;
import org.apache.jackrabbit.oak.index.indexer.document.flatfile.analysis.stream.NodeData;
import org.apache.jackrabbit.oak.index.indexer.document.flatfile.analysis.stream.NodeProperty;
import org.apache.jackrabbit.oak.index.indexer.document.flatfile.analysis.utils.Hash;
import org.apache.jackrabbit.oak.index.indexer.document.flatfile.analysis.utils.HyperLogLog3Linear64;
import org.apache.jackrabbit.oak.index.indexer.document.flatfile.analysis.utils.TopKValues;

/* loaded from: input_file:org/apache/jackrabbit/oak/index/indexer/document/flatfile/analysis/modules/PropertyStats.class */
public class PropertyStats implements StatsCollector {
    private static final long MIN_PROPERTY_COUNT = 500;
    private static final long MIN_TOP_K = 10000;
    private static final int TOP_K = 8;
    private static final long MAX_SIZE = 100000;
    private final boolean indexedPropertiesOnly;
    private HashMap<String, ArrayList<IndexedProperty>> indexexPropertyMap;
    private final long seed;
    private int skipRemaining;
    private HashSet<String> indexedProperties = new HashSet<>();
    private final TreeMap<String, Stats> statsMap = new TreeMap<>();
    private int skip = 1;

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:org/apache/jackrabbit/oak/index/indexer/document/flatfile/analysis/modules/PropertyStats$Stats.class */
    public static class Stats {
        String name;
        long count;
        long values;
        long hll;
        long size;
        long maxSize;
        TopKValues topValues;

        public Stats(String str) {
            this.name = str;
        }
    }

    public PropertyStats(boolean z, long j) {
        this.indexedPropertiesOnly = z;
        this.seed = j;
    }

    public void setSkip(int i) {
        this.skip = i;
    }

    public void setIndexedPropertiesSet(Set<String> set) {
        this.indexedProperties.addAll(set);
    }

    public void setIndexedProperties(Map<String, ArrayList<IndexedProperty>> map) {
        this.indexexPropertyMap.putAll(map);
        this.indexedProperties.addAll(map.keySet());
    }

    @Override // org.apache.jackrabbit.oak.index.indexer.document.flatfile.analysis.modules.StatsCollector
    public void add(NodeData nodeData) {
        ArrayList<IndexedProperty> arrayList;
        if (this.skipRemaining > 0) {
            this.skipRemaining--;
            return;
        }
        this.skipRemaining = this.skip;
        for (NodeProperty nodeProperty : nodeData.getProperties()) {
            String name = nodeProperty.getName();
            if (!this.indexedPropertiesOnly) {
                if (this.indexexPropertyMap != null && (arrayList = this.indexexPropertyMap.get(name)) != null) {
                    Iterator<IndexedProperty> it = arrayList.iterator();
                    while (it.hasNext()) {
                        IndexedProperty next = it.next();
                        if (next.matches(name, nodeData)) {
                            add(name + " " + next.toString(), nodeProperty);
                        }
                    }
                }
                add(name, nodeProperty);
            } else if (this.indexedProperties == null || this.indexedProperties.contains(name)) {
                add(name, nodeProperty);
            }
        }
    }

    private void add(String str, NodeProperty nodeProperty) {
        Stats stats = (Stats) this.statsMap.computeIfAbsent(str, str2 -> {
            return new Stats(str);
        });
        stats.count++;
        stats.values += nodeProperty.getValues().length;
        if (stats.count > MIN_PROPERTY_COUNT) {
            for (String str3 : nodeProperty.getValues()) {
                stats.hll = HyperLogLog3Linear64.add(stats.hll, Hash.hash64(str3.hashCode(), this.seed));
                stats.size += str3.length();
                stats.maxSize = Math.max(stats.maxSize, str3.length());
            }
        }
        if (stats.count >= MIN_TOP_K) {
            TopKValues topKValues = stats.topValues;
            if (topKValues == null) {
                topKValues = new TopKValues(TOP_K);
                stats.topValues = topKValues;
            }
            for (String str4 : nodeProperty.getValues()) {
                topKValues.add(str4);
            }
        }
        removeRareEntries();
    }

    private void removeRareEntries() {
        if (this.statsMap.size() < 200000) {
            return;
        }
        ArrayList arrayList = new ArrayList(this.statsMap.entrySet());
        ArrayList arrayList2 = new ArrayList(arrayList.size());
        Iterator it = arrayList.iterator();
        while (it.hasNext()) {
            arrayList2.add(Long.valueOf(((Stats) ((Map.Entry) it.next()).getValue()).count));
        }
        Collections.sort(arrayList2);
        long longValue = ((Long) arrayList2.get(100000)).longValue();
        Iterator it2 = arrayList.iterator();
        while (it2.hasNext()) {
            Map.Entry entry = (Map.Entry) it2.next();
            if (((Stats) entry.getValue()).count <= longValue) {
                this.statsMap.remove(entry.getKey());
            }
        }
    }

    @Override // org.apache.jackrabbit.oak.index.indexer.document.flatfile.analysis.modules.StatsCollector
    public List<String> getRecords() {
        ArrayList arrayList = new ArrayList();
        for (Stats stats : this.statsMap.values()) {
            StringBuilder sb = new StringBuilder();
            if (stats.count >= MIN_PROPERTY_COUNT) {
                long j = stats.count;
                long estimate = HyperLogLog3Linear64.estimate(stats.hll);
                long j2 = estimate;
                TopKValues topKValues = stats.topValues;
                if (topKValues != null && !topKValues.isNotSkewed()) {
                    j2 = Math.min(estimate, topKValues.getCount() / Math.max(1L, (topKValues.getTopCount() + topKValues.getSecondCount()) / 2));
                }
                if (j2 >= MIN_TOP_K) {
                    j2 = 10000;
                } else if (j2 >= MIN_PROPERTY_COUNT) {
                    j2 = (j2 / 100) * 100;
                } else if (j2 >= 40) {
                    j2 = (j2 / 10) * 10;
                } else if (j2 == 0) {
                    j2 = 1;
                }
                sb.append(stats.name);
                sb.append(" weight ").append(j2);
                sb.append(" count ").append(j);
                sb.append(" distinct ").append(estimate);
                sb.append(" avgSize ").append(stats.size / Math.max(1L, stats.values));
                sb.append(" maxSize ").append(stats.maxSize);
                if (stats.count != stats.values) {
                    sb.append(" values ").append(stats.values);
                }
                if (topKValues != null) {
                    sb.append(" top ").append(topKValues.toString());
                }
                arrayList.add(sb.toString());
            }
        }
        return arrayList;
    }

    public String toString() {
        return "PropertyStats\n" + ((String) getRecords().stream().map(str -> {
            return str + "\n";
        }).collect(Collectors.joining()));
    }
}
