package org.apache.kylin.engine.spark.builder;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
import org.apache.kylin.guava30.shaded.common.base.Preconditions;
import org.apache.kylin.guava30.shaded.common.collect.Lists;
import org.apache.kylin.guava30.shaded.common.collect.Sets;
import org.apache.kylin.guava30.shaded.common.collect.UnmodifiableIterator;
import org.apache.kylin.metadata.cube.model.IndexEntity;
import org.apache.kylin.metadata.cube.model.LayoutEntity;
import org.apache.kylin.metadata.cube.model.NDataLayout;
import org.apache.kylin.metadata.cube.model.NDataSegment;
import org.apache.kylin.metadata.model.MeasureDesc;
import org.apache.kylin.metadata.model.NDataModel;
import org.apache.kylin.metadata.model.TblColRef;
import org.apache.spark.dict.NGlobalDictBuilderAssist;
import org.apache.spark.dict.NGlobalDictMetaInfo;
import org.apache.spark.dict.NGlobalDictionaryV2;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/apache/kylin/engine/spark/builder/DictionaryBuilderHelper.class */
public class DictionaryBuilderHelper {
    protected static final Logger logger = LoggerFactory.getLogger(DictionaryBuilderHelper.class);

    public static int calculateBucketSize(NDataSegment nDataSegment, TblColRef tblColRef, Dataset<Row> dataset) throws IOException {
        int max;
        NGlobalDictionaryV2 nGlobalDictionaryV2 = new NGlobalDictionaryV2(nDataSegment.getProject(), tblColRef.getTable(), tblColRef.getName(), nDataSegment.getConfig().getHdfsWorkingDirectory());
        int bucketSizeOrDefault = nGlobalDictionaryV2.getBucketSizeOrDefault(nDataSegment.getConfig().getGlobalDictV2MinHashPartitions());
        int globalDictV2ThresholdBucketSize = nDataSegment.getConfig().getGlobalDictV2ThresholdBucketSize();
        if (nGlobalDictionaryV2.isFirst()) {
            max = Math.max(Math.toIntExact(dataset.count() / ((int) (globalDictV2ThresholdBucketSize * nDataSegment.getConfig().getGlobalDictV2InitLoadFactor()))), bucketSizeOrDefault);
            logger.info("Building a global dictionary column first for  {} , the size of the bucket is set to {}", tblColRef.getName(), Integer.valueOf(bucketSizeOrDefault));
        } else {
            long count = dataset.count();
            NGlobalDictMetaInfo metaInfo = nGlobalDictionaryV2.getMetaInfo();
            long[] bucketCount = metaInfo.getBucketCount();
            double globalDictV2InitLoadFactor = nDataSegment.getConfig().getGlobalDictV2InitLoadFactor();
            double globalDictV2BucketOverheadFactor = nDataSegment.getConfig().getGlobalDictV2BucketOverheadFactor();
            int i = 0;
            int intExact = Math.toIntExact(count / globalDictV2ThresholdBucketSize);
            if (intExact > metaInfo.getBucketSize()) {
                intExact = Math.toIntExact(count / ((int) (globalDictV2ThresholdBucketSize * globalDictV2InitLoadFactor)));
            }
            if (metaInfo.getDictCount() >= globalDictV2ThresholdBucketSize * metaInfo.getBucketSize()) {
                i = Math.toIntExact(metaInfo.getDictCount() / ((int) (globalDictV2ThresholdBucketSize * globalDictV2InitLoadFactor)));
            }
            int i2 = 0;
            int length = bucketCount.length;
            int i3 = 0;
            while (true) {
                if (i3 >= length) {
                    break;
                }
                if (bucketCount[i3] > globalDictV2ThresholdBucketSize * globalDictV2BucketOverheadFactor) {
                    i2 = bucketSizeOrDefault * 2;
                    break;
                }
                i3++;
            }
            max = Math.max(Math.max(intExact, i), Math.max(i2, bucketSizeOrDefault));
            if (max != bucketSizeOrDefault) {
                logger.info("Start building a global dictionary column for {}, need resize from {} to {} ", new Object[]{tblColRef.getName(), Integer.valueOf(bucketSizeOrDefault), Integer.valueOf(max)});
                NGlobalDictBuilderAssist.resize(tblColRef, nDataSegment, max, dataset.sparkSession());
                logger.info("End building a global dictionary column for {}, need resize from {} to {} ", new Object[]{tblColRef.getName(), Integer.valueOf(bucketSizeOrDefault), Integer.valueOf(max)});
            }
        }
        return max;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public static Set<TblColRef> findNeedDictCols(List<LayoutEntity> list) {
        HashSet newHashSet = Sets.newHashSet();
        Iterator<LayoutEntity> it2 = list.iterator();
        while (it2.hasNext()) {
            UnmodifiableIterator<NDataModel.Measure> it3 = it2.next().getIndex().getEffectiveMeasures().values().iterator();
            while (it3.hasNext()) {
                NDataModel.Measure next = it3.next();
                if (needGlobalDict(next) != null) {
                    newHashSet.add(next.getFunction().getParameters().get(0).getColRef());
                }
            }
        }
        return newHashSet;
    }

    public static Set<TblColRef> extractTreeRelatedGlobalDictToBuild(NDataSegment nDataSegment, Collection<IndexEntity> collection) {
        ArrayList newArrayList = Lists.newArrayList();
        Iterator<IndexEntity> it2 = collection.iterator();
        while (it2.hasNext()) {
            newArrayList.addAll(it2.next().getLayouts());
        }
        ArrayList newArrayList2 = Lists.newArrayList();
        if (nDataSegment.getSegDetails() != null) {
            Iterator<NDataLayout> it3 = nDataSegment.getSegDetails().getEffectiveLayouts().iterator();
            while (it3.hasNext()) {
                newArrayList2.add(it3.next().getLayout());
            }
        }
        Set<TblColRef> findNeedDictCols = findNeedDictCols(newArrayList2);
        Set<TblColRef> findNeedDictCols2 = findNeedDictCols(newArrayList);
        findNeedDictCols2.removeIf(tblColRef -> {
            return findNeedDictCols.contains(tblColRef);
        });
        return findNeedDictCols2;
    }

    public static Set<TblColRef> extractTreeRelatedGlobalDicts(NDataSegment nDataSegment, Collection<IndexEntity> collection) {
        return findNeedDictCols((List) collection.stream().flatMap(indexEntity -> {
            return indexEntity.getLayouts().stream();
        }).collect(Collectors.toList()));
    }

    public static TblColRef needGlobalDict(MeasureDesc measureDesc) {
        if (!measureDesc.getFunction().getReturnDataType().getName().equalsIgnoreCase("bitmap")) {
            return null;
        }
        List<TblColRef> colRefs = measureDesc.getFunction().getColRefs();
        Preconditions.checkArgument(colRefs.size() >= 1);
        return colRefs.get(0);
    }
}
