package org.apache.kylin.engine.spark.builder;

import java.io.IOException;
import org.apache.kylin.engine.spark.metadata.ColumnDesc;
import org.apache.kylin.engine.spark.metadata.SegmentInfo;
import org.apache.spark.dict.NGlobalDictBuilderAssist;
import org.apache.spark.dict.NGlobalDictMetaInfo;
import org.apache.spark.dict.NGlobalDictionary;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:WEB-INF/lib/kylin-spark-engine-4.0.0.jar:org/apache/kylin/engine/spark/builder/DictionaryBuilderHelper.class */
public class DictionaryBuilderHelper {
    protected static final Logger logger = LoggerFactory.getLogger((Class<?>) DictionaryBuilderHelper.class);

    public static int calculateBucketSize(SegmentInfo segmentInfo, ColumnDesc columnDesc, Dataset<Row> dataset) throws IOException {
        int max;
        NGlobalDictionary nGlobalDictionary = new NGlobalDictionary(segmentInfo.project(), columnDesc.tableAliasName(), columnDesc.columnName(), segmentInfo.kylinconf().getHdfsWorkingDirectory());
        int bucketSizeOrDefault = nGlobalDictionary.getBucketSizeOrDefault(segmentInfo.kylinconf().getGlobalDictV2MinHashPartitions());
        int globalDictV2ThresholdBucketSize = segmentInfo.kylinconf().getGlobalDictV2ThresholdBucketSize();
        if (nGlobalDictionary.isFirst()) {
            max = Math.max(Math.toIntExact(dataset.count() / ((int) (globalDictV2ThresholdBucketSize * segmentInfo.kylinconf().getGlobalDictV2InitLoadFactor()))), bucketSizeOrDefault);
            logger.info("Building a global dictionary column first for  {} , the size of the bucket is set to {}", columnDesc.columnName(), Integer.valueOf(bucketSizeOrDefault));
        } else {
            long count = dataset.count();
            NGlobalDictMetaInfo metaInfo = nGlobalDictionary.getMetaInfo();
            long[] bucketCount = metaInfo.getBucketCount();
            double globalDictV2InitLoadFactor = segmentInfo.kylinconf().getGlobalDictV2InitLoadFactor();
            double globalDictV2BucketOverheadFactor = segmentInfo.kylinconf().getGlobalDictV2BucketOverheadFactor();
            int i = 0;
            int intExact = Math.toIntExact(count / globalDictV2ThresholdBucketSize);
            if (intExact > metaInfo.getBucketSize()) {
                intExact = Math.toIntExact(count / ((int) (globalDictV2ThresholdBucketSize * globalDictV2InitLoadFactor)));
            }
            if (metaInfo.getDictCount() >= globalDictV2ThresholdBucketSize * metaInfo.getBucketSize()) {
                i = Math.toIntExact(metaInfo.getDictCount() / ((int) (globalDictV2ThresholdBucketSize * globalDictV2InitLoadFactor)));
            }
            int i2 = 0;
            int length = bucketCount.length;
            int i3 = 0;
            while (true) {
                if (i3 >= length) {
                    break;
                }
                if (bucketCount[i3] > globalDictV2ThresholdBucketSize * globalDictV2BucketOverheadFactor) {
                    i2 = bucketSizeOrDefault * 2;
                    break;
                }
                i3++;
            }
            max = Math.max(Math.max(intExact, i), Math.max(i2, bucketSizeOrDefault));
            if (max != bucketSizeOrDefault) {
                logger.info("Start building a global dictionary column for {}, need resize from {} to {} ", columnDesc.columnName(), Integer.valueOf(bucketSizeOrDefault), Integer.valueOf(max));
                NGlobalDictBuilderAssist.resize(columnDesc, segmentInfo, max, dataset.sparkSession());
                logger.info("End building a global dictionary column for {}, need resize from {} to {} ", columnDesc.columnName(), Integer.valueOf(bucketSizeOrDefault), Integer.valueOf(max));
            }
        }
        return max;
    }
}
