package org.apache.kylin.engine.spark.model.planner;

import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.hash.HashFunction;
import com.google.common.hash.Hasher;
import com.google.common.hash.Hashing;
import java.io.IOException;
import java.math.BigInteger;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import lombok.Generated;
import org.apache.kylin.common.KylinConfig;
import org.apache.kylin.common.util.ByteArray;
import org.apache.kylin.common.util.Bytes;
import org.apache.kylin.engine.spark.model.SegmentFlatTableDesc;
import org.apache.kylin.measure.hllc.HLLCounter;
import org.apache.kylin.measure.hllc.RegisterType;
import org.apache.kylin.metadata.cube.model.IndexPlan;
import org.apache.kylin.metadata.cube.model.LayoutEntity;
import org.apache.kylin.metadata.cube.model.RuleBasedIndex;
import org.apache.kylin.metadata.cube.planner.CostBasePlannerUtils;
import org.apache.kylin.metadata.datatype.DataType;
import org.apache.kylin.metadata.model.MeasureDesc;
import org.apache.kylin.metadata.model.NDataModel;
import org.apache.kylin.metadata.model.TblColRef;
import org.apache.spark.Partitioner;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.Function2;
import org.apache.spark.api.java.function.PairFlatMapFunction;
import org.apache.spark.sql.Row;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import scala.Tuple2;

/* loaded from: input_file:org/apache/kylin/engine/spark/model/planner/FlatTableToCostUtils.class */
public class FlatTableToCostUtils {

    @Generated
    private static final Logger log = LoggerFactory.getLogger(FlatTableToCostUtils.class);

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:org/apache/kylin/engine/spark/model/planner/FlatTableToCostUtils$CuboidStatCalculator.class */
    public static class CuboidStatCalculator {
        private final int nRowKey;
        private final int[] rowkeyColIndex;
        private final BigInteger[] cuboidIds;
        private final Integer[][] cuboidsBitSet;
        private HLLCounter[] cuboidsHLL;
        private final boolean isNewAlgorithm;
        private final HashFunction hf;
        private long[] rowHashCodesLong;

        public CuboidStatCalculator(int[] iArr, BigInteger[] bigIntegerArr, Integer[][] numArr, boolean z, HLLCounter[] hLLCounterArr) {
            this.nRowKey = iArr.length;
            this.rowkeyColIndex = iArr;
            this.cuboidIds = bigIntegerArr;
            this.cuboidsBitSet = numArr;
            this.isNewAlgorithm = z;
            if (this.isNewAlgorithm) {
                this.rowHashCodesLong = new long[this.nRowKey];
                this.hf = Hashing.murmur3_128();
            } else {
                this.hf = Hashing.murmur3_32();
            }
            this.cuboidsHLL = hLLCounterArr;
        }

        public void putRow(String[] strArr) {
            String[] strArr2 = (String[]) Arrays.copyOf(strArr, strArr.length);
            if (this.isNewAlgorithm) {
                putRowKeyToHLLNew(strArr2);
            } else {
                putRowKeyToHLLOld(strArr2);
            }
        }

        /* JADX WARN: Multi-variable type inference failed */
        private void putRowKeyToHLLOld(String[] strArr) {
            byte[] bArr = new byte[this.nRowKey];
            for (int i = 0; i < this.nRowKey; i++) {
                Hasher newHasher = this.hf.newHasher();
                String str = strArr[this.rowkeyColIndex[i]];
                if (str != null) {
                    bArr[i] = newHasher.putString(str).hash().asBytes();
                } else {
                    bArr[i] = newHasher.putInt(0).hash().asBytes();
                }
            }
            int length = this.cuboidsBitSet.length;
            for (int i2 = 0; i2 < length; i2++) {
                Hasher newHasher2 = this.hf.newHasher();
                for (int i3 = 0; i3 < this.cuboidsBitSet[i2].length; i3++) {
                    newHasher2.putBytes(bArr[this.cuboidsBitSet[i2][i3].intValue()]);
                }
                this.cuboidsHLL[i2].add(newHasher2.hash().asBytes());
            }
        }

        private void putRowKeyToHLLNew(String[] strArr) {
            for (int i = 0; i < this.nRowKey; i++) {
                Hasher newHasher = this.hf.newHasher();
                String str = strArr[this.rowkeyColIndex[i]];
                if (str == null) {
                    str = "0";
                }
                this.rowHashCodesLong[i] = Bytes.toLong(newHasher.putString(str).hash().asBytes()) + i;
            }
            int length = this.cuboidsBitSet.length;
            for (int i2 = 0; i2 < length; i2++) {
                long j = 0;
                for (int i3 = 0; i3 < this.cuboidsBitSet[i2].length; i3++) {
                    j += this.rowHashCodesLong[this.cuboidsBitSet[i2][i3].intValue()];
                }
                this.cuboidsHLL[i2].addHashDirectly(j);
            }
        }

        public HLLCounter[] getHLLCounters() {
            return this.cuboidsHLL;
        }

        public BigInteger[] getCuboidIds() {
            return this.cuboidIds;
        }
    }

    /* loaded from: input_file:org/apache/kylin/engine/spark/model/planner/FlatTableToCostUtils$FlatOutputFunction.class */
    private static class FlatOutputFunction implements PairFlatMapFunction<Iterator<String[]>, BigInteger, byte[]> {
        private transient CuboidStatCalculator cuboidStatCalculator;
        private final int hllPrecision;
        private final int rowKeyCount;
        private final BigInteger[] cuboidIds;
        private final int[] rowkeyColumnIndexes;
        private volatile transient boolean initialized = false;
        private final int samplingPercent = 100;

        public FlatOutputFunction(int i, int i2, BigInteger[] bigIntegerArr, int[] iArr) {
            this.hllPrecision = i;
            this.rowKeyCount = i2;
            this.cuboidIds = bigIntegerArr;
            this.rowkeyColumnIndexes = iArr;
        }

        /* JADX WARN: Multi-variable type inference failed */
        /* JADX WARN: Type inference failed for: r0v2, types: [java.lang.Integer[], java.lang.Integer[][]] */
        private Integer[][] getCuboidBitSet(BigInteger[] bigIntegerArr, int i) {
            ?? r0 = new Integer[bigIntegerArr.length];
            for (int i2 = 0; i2 < bigIntegerArr.length; i2++) {
                BigInteger bigInteger = bigIntegerArr[i2];
                r0[i2] = new Integer[bigInteger.bitCount()];
                int i3 = 0;
                for (int i4 = 0; i4 < i; i4++) {
                    if (BigInteger.ZERO.setBit((i - 1) - i4).and(bigInteger).compareTo(BigInteger.ZERO) > 0) {
                        r0[i2][i3] = Integer.valueOf(i4);
                        i3++;
                    }
                }
            }
            return r0;
        }

        private HLLCounter[] getInitCuboidsHLL(int i, int i2) {
            HLLCounter[] hLLCounterArr = new HLLCounter[i];
            for (int i3 = 0; i3 < i; i3++) {
                hLLCounterArr[i3] = new HLLCounter(i2, RegisterType.DENSE);
            }
            return hLLCounterArr;
        }

        private void init() {
            this.cuboidStatCalculator = new CuboidStatCalculator(this.rowkeyColumnIndexes, this.cuboidIds, getCuboidBitSet(this.cuboidIds, this.rowKeyCount), true, getInitCuboidsHLL(this.cuboidIds.length, this.hllPrecision));
            this.initialized = true;
        }

        public Iterator<Tuple2<BigInteger, byte[]>> call(Iterator<String[]> it) throws Exception {
            if (!this.initialized) {
                synchronized (this) {
                    if (!this.initialized) {
                        init();
                    }
                }
            }
            int i = 0;
            while (it.hasNext()) {
                String[] next = it.next();
                if (i % 100 < 100) {
                    this.cuboidStatCalculator.putRow(next);
                }
                i++;
            }
            ArrayList newArrayList = Lists.newArrayList();
            ByteBuffer allocate = ByteBuffer.allocate(1048576);
            BigInteger[] cuboidIds = this.cuboidStatCalculator.getCuboidIds();
            HLLCounter[] hLLCounters = this.cuboidStatCalculator.getHLLCounters();
            for (int i2 = 0; i2 < cuboidIds.length; i2++) {
                BigInteger bigInteger = cuboidIds[i2];
                HLLCounter hLLCounter = hLLCounters[i2];
                allocate.clear();
                hLLCounter.writeRegisters(allocate);
                byte[] bArr = new byte[allocate.position()];
                System.arraycopy(allocate.array(), 0, bArr, 0, allocate.position());
                newArrayList.add(new Tuple2(bigInteger, bArr));
            }
            return newArrayList.iterator();
        }
    }

    private static LayoutEntity createMockRuleBaseLayout(RuleBasedIndex ruleBasedIndex) {
        LayoutEntity layoutEntity = new LayoutEntity();
        layoutEntity.setColOrder(ruleBasedIndex.getDimensions());
        return layoutEntity;
    }

    /* JADX WARN: Type inference failed for: r3v1, types: [java.lang.Object[], int[]] */
    public static Map<BigInteger, HLLCounter> generateCost(JavaRDD<Row> javaRDD, KylinConfig kylinConfig, RuleBasedIndex ruleBasedIndex, SegmentFlatTableDesc segmentFlatTableDesc) throws IOException {
        JavaRDD map = javaRDD.map(new Function<Row, String[]>() { // from class: org.apache.kylin.engine.spark.model.planner.FlatTableToCostUtils.1
            public String[] call(Row row) throws Exception {
                String[] strArr = new String[row.length()];
                for (int i = 0; i < row.length(); i++) {
                    Object obj = row.get(i);
                    if (obj != null) {
                        strArr[i] = obj.toString();
                    } else {
                        strArr[i] = null;
                    }
                }
                return strArr;
            }
        });
        int countOfIncludeDimension = ruleBasedIndex.countOfIncludeDimension();
        Set genCuboidLayouts = ruleBasedIndex.genCuboidLayouts();
        genCuboidLayouts.add(createMockRuleBaseLayout(ruleBasedIndex));
        BigInteger[] cuboIdsFromLayouts = getCuboIdsFromLayouts(Lists.newArrayList(genCuboidLayouts), countOfIncludeDimension, ruleBasedIndex.getColumnIdToRowKeyId());
        int[] rowkeyColumnIndexes = getRowkeyColumnIndexes(ruleBasedIndex, segmentFlatTableDesc);
        final int statsHLLPrecision = kylinConfig.getStatsHLLPrecision();
        log.info("The row key count is {}, and the index/column map is {}", Integer.valueOf(countOfIncludeDimension), Lists.newArrayList((Object[]) new int[]{rowkeyColumnIndexes}));
        JavaPairRDD mapPartitionsToPair = map.mapPartitionsToPair(new FlatOutputFunction(statsHLLPrecision, countOfIncludeDimension, cuboIdsFromLayouts, rowkeyColumnIndexes));
        final int cuboidHLLCounterReducerNum = getCuboidHLLCounterReducerNum(cuboIdsFromLayouts.length, kylinConfig);
        log.info("Get the partition count for the HLL reducer: {}", Integer.valueOf(cuboidHLLCounterReducerNum));
        JavaPairRDD reduceByKey = mapPartitionsToPair.reduceByKey(new Partitioner() { // from class: org.apache.kylin.engine.spark.model.planner.FlatTableToCostUtils.2
            private int num;
            private BigInteger bigIntegerMod;

            {
                this.num = cuboidHLLCounterReducerNum;
                this.bigIntegerMod = BigInteger.valueOf(this.num);
            }

            public int numPartitions() {
                return this.num;
            }

            public int getPartition(Object obj) {
                return ((BigInteger) obj).mod(this.bigIntegerMod).intValue();
            }
        }, new Function2<byte[], byte[], byte[]>() { // from class: org.apache.kylin.engine.spark.model.planner.FlatTableToCostUtils.3
            private int precision;

            {
                this.precision = statsHLLPrecision;
            }

            public byte[] call(byte[] bArr, byte[] bArr2) throws Exception {
                HLLCounter hLLCounter = new HLLCounter(this.precision);
                hLLCounter.readRegisters(ByteBuffer.wrap(bArr, 0, bArr.length));
                HLLCounter hLLCounter2 = new HLLCounter(this.precision);
                hLLCounter2.readRegisters(ByteBuffer.wrap(bArr2, 0, bArr2.length));
                hLLCounter.merge(hLLCounter2);
                ByteBuffer allocate = ByteBuffer.allocate(1048576);
                hLLCounter.writeRegisters(allocate);
                byte[] bArr3 = new byte[allocate.position()];
                System.arraycopy(allocate.array(), 0, bArr3, 0, allocate.position());
                return bArr3;
            }
        });
        HashMap newHashMap = Maps.newHashMap();
        for (Tuple2 tuple2 : reduceByKey.collect()) {
            HLLCounter hLLCounter = new HLLCounter(kylinConfig.getStatsHLLPrecision());
            hLLCounter.readRegisters(new ByteArray((byte[]) tuple2._2).asBuffer());
            newHashMap.put(tuple2._1, hLLCounter);
        }
        if (log.isDebugEnabled()) {
            logMapperAndCuboidStatistics(newHashMap, 100);
        }
        return newHashMap;
    }

    private static int getCuboidHLLCounterReducerNum(int i, KylinConfig kylinConfig) {
        int jobPerReducerHLLCuboidNumber = ((i - 1) / kylinConfig.getJobPerReducerHLLCuboidNumber()) + 1;
        int jobHLLMaxReducerNumber = kylinConfig.getJobHLLMaxReducerNumber();
        if (jobPerReducerHLLCuboidNumber > jobHLLMaxReducerNumber) {
            jobPerReducerHLLCuboidNumber = jobHLLMaxReducerNumber;
        }
        return Math.max(jobPerReducerHLLCuboidNumber, 1);
    }

    private static BigInteger[] getCuboIdsFromLayouts(List<LayoutEntity> list, int i, Map<Integer, Integer> map) {
        HashSet hashSet = new HashSet();
        Iterator<LayoutEntity> it = list.iterator();
        while (it.hasNext()) {
            hashSet.add(CostBasePlannerUtils.convertDimensionsToCuboId(it.next().getDimsIds(), i, map));
        }
        return (BigInteger[]) hashSet.toArray(new BigInteger[hashSet.size()]);
    }

    private static int[] getRowkeyColumnIndexes(RuleBasedIndex ruleBasedIndex, SegmentFlatTableDesc segmentFlatTableDesc) {
        int countOfIncludeDimension = ruleBasedIndex.countOfIncludeDimension();
        List<Integer> columnIds = segmentFlatTableDesc.getColumnIds();
        int[] iArr = new int[countOfIncludeDimension];
        Map rowKeyIdToColumnId = ruleBasedIndex.getRowKeyIdToColumnId();
        for (int i = 0; i < countOfIncludeDimension; i++) {
            if (!rowKeyIdToColumnId.containsKey(Integer.valueOf(i))) {
                throw new RuntimeException("Can't find the column id from the rowkey id");
            }
            int intValue = ((Integer) rowKeyIdToColumnId.get(Integer.valueOf(i))).intValue();
            int indexOf = columnIds.indexOf(Integer.valueOf(intValue));
            if (indexOf < 0) {
                throw new RuntimeException(String.format("Can't find the column id %d, column ids %s", Integer.valueOf(intValue), columnIds.toString()));
            }
            iArr[i] = indexOf;
        }
        return iArr;
    }

    private static void logMapperAndCuboidStatistics(Map<BigInteger, HLLCounter> map, int i) {
        log.debug("Total cuboid number: \t" + map.size());
        log.debug("Sampling percentage: \t" + i);
        log.debug("The following statistics are collected based on sampling data.");
        ArrayList<BigInteger> newArrayList = Lists.newArrayList(map.keySet());
        Collections.sort(newArrayList);
        for (BigInteger bigInteger : newArrayList) {
            log.debug("Cuboid " + bigInteger + " row count is: \t " + map.get(bigInteger).getCountEstimate());
        }
    }

    public static Map<BigInteger, Long> getCuboidRowCountMapFromSampling(Map<BigInteger, HLLCounter> map) {
        HashMap newHashMap = Maps.newHashMap();
        for (Map.Entry<BigInteger, HLLCounter> entry : map.entrySet()) {
            newHashMap.put(entry.getKey(), Long.valueOf(entry.getValue().getCountEstimate()));
        }
        return newHashMap;
    }

    private static Map<BigInteger, Double> getCuboidSizeMapFromSamplingByCount(Map<BigInteger, Long> map, long j, RuleBasedIndex ruleBasedIndex, KylinConfig kylinConfig, SegmentFlatTableDesc segmentFlatTableDesc) {
        HashMap newHashMap = Maps.newHashMap();
        Iterator<Map.Entry<BigInteger, Long>> it = map.entrySet().iterator();
        while (it.hasNext()) {
            newHashMap.put(it.next().getKey(), Double.valueOf(r0.getValue().longValue()));
        }
        return newHashMap;
    }

    public static Map<BigInteger, Double> getCuboidSizeMapFromSampling(Map<BigInteger, Long> map, long j, RuleBasedIndex ruleBasedIndex, KylinConfig kylinConfig, SegmentFlatTableDesc segmentFlatTableDesc) {
        return getCuboidSizeMapFromSamplingByCount(map, j, ruleBasedIndex, kylinConfig, segmentFlatTableDesc);
    }

    private static List<Integer> getRowkeyColumnSize(IndexPlan indexPlan, SegmentFlatTableDesc segmentFlatTableDesc) {
        int countOfIncludeDimension = indexPlan.getRuleBasedIndex().countOfIncludeDimension();
        List<Integer> columnIds = segmentFlatTableDesc.getColumnIds();
        List<TblColRef> columns = segmentFlatTableDesc.getColumns();
        ArrayList newArrayList = Lists.newArrayList();
        for (int i = 0; i < countOfIncludeDimension; i++) {
            int indexOf = columnIds.indexOf(Integer.valueOf(i));
            if (indexOf < 0) {
                throw new RuntimeException(String.format("Can't find the column id %d, column ids %s", Integer.valueOf(i), columnIds.toString()));
            }
            columns.get(indexOf);
            newArrayList.add(0);
        }
        return newArrayList;
    }

    private static double estimateCuboidStorageSize(Set<NDataModel.Measure> set, long j, long j2, long j3, long j4, List<Integer> list, long j5, KylinConfig kylinConfig) {
        int i = 8;
        long highestOneBit = Long.highestOneBit(j3);
        long numberOfLeadingZeros = 64 - Long.numberOfLeadingZeros(j3);
        for (int i2 = 0; i2 < numberOfLeadingZeros; i2++) {
            if ((highestOneBit & j) > 0) {
                i += list.get(i2).intValue();
            }
            highestOneBit >>= 1;
        }
        int i3 = i;
        int i4 = 0;
        double d = 0.0d;
        int i5 = 0;
        for (MeasureDesc measureDesc : set) {
            if (j2 == 0) {
                break;
            }
            DataType returnDataType = measureDesc.getFunction().getReturnDataType();
            if (measureDesc.getFunction().getExpression().equals("COUNT_DISTINCT")) {
                i4 = (int) (i4 + returnDataType.getStorageBytesEstimate(j5 / j2 == 0 ? 1L : r0));
            } else if (measureDesc.getFunction().getExpression().equals("PERCENTILE_APPROX")) {
                d += returnDataType.getStorageBytesEstimate((j4 * 1.0d) / j2);
            } else if (measureDesc.getFunction().getExpression().equals("TOP_N")) {
                i5 = (int) (i5 + returnDataType.getStorageBytesEstimate(j5 / j2 == 0 ? 1L : r0));
            } else {
                i3 += returnDataType.getStorageBytesEstimate();
            }
        }
        return ((((((1.0d * i3) * j2) * kylinConfig.getJobCuboidSizeRatio()) + (((1.0d * i4) * j2) * kylinConfig.getJobCuboidSizeCountDistinctRatio())) + ((1.0d * d) * j2)) + (((1.0d * i5) * j2) * kylinConfig.getJobCuboidSizeTopNRatio())) / 1048576.0d;
    }
}
