package org.apache.kylin.engine.mr.steps;

import com.google.common.collect.Lists;
import com.google.common.hash.HashFunction;
import com.google.common.hash.Hasher;
import com.google.common.hash.Hashing;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.kylin.common.KylinVersion;
import org.apache.kylin.common.util.ByteArray;
import org.apache.kylin.common.util.Bytes;
import org.apache.kylin.common.util.StringUtil;
import org.apache.kylin.cube.cuboid.CuboidScheduler;
import org.apache.kylin.engine.mr.common.BatchConstants;
import org.apache.kylin.measure.hllc.HLLCounter;
import org.apache.kylin.measure.hllc.RegisterType;
import org.apache.kylin.metadata.model.TblColRef;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/apache/kylin/engine/mr/steps/FactDistinctColumnsMapper.class */
public class FactDistinctColumnsMapper<KEYIN> extends FactDistinctColumnsMapperBase<KEYIN, Object> {
    protected int nRowKey;
    private Long[] cuboidIds;
    private int samplingPercentage;
    private ByteBuffer tmpbuf;
    public static final byte MARK_FOR_PARTITION_COL = -2;
    public static final byte MARK_FOR_HLL = -1;
    private boolean isUsePutRowKeyToHllNewAlgorithm;
    private static final Logger logger = LoggerFactory.getLogger((Class<?>) FactDistinctColumnsMapper.class);
    private static final Text EMPTY_TEXT = new Text();
    protected boolean collectStatistics = false;
    protected CuboidScheduler cuboidScheduler = null;
    private Integer[][] allCuboidsBitSet = (Integer[][]) null;
    private HLLCounter[] allCuboidsHLL = null;
    private HashFunction hf = null;
    private int rowCount = 0;
    private long[] rowHashCodesLong = null;
    private ByteArray[] row_hashcodes = null;
    private int partitionColumnIndex = -1;
    private boolean needFetchPartitionCol = true;
    private SelfDefineSortableKey sortableKey = new SelfDefineSortableKey();

    /* loaded from: input_file:org/apache/kylin/engine/mr/steps/FactDistinctColumnsMapper$RawDataCounter.class */
    public enum RawDataCounter {
        BYTES
    }

    /* JADX INFO: Access modifiers changed from: protected */
    /* JADX WARN: Multi-variable type inference failed */
    @Override // org.apache.kylin.engine.mr.steps.FactDistinctColumnsMapperBase
    public void setup(Mapper<KEYIN, Object, SelfDefineSortableKey, Text>.Context context) throws IOException {
        super.setup(context);
        this.tmpbuf = ByteBuffer.allocate(4096);
        this.collectStatistics = Boolean.parseBoolean(context.getConfiguration().get(BatchConstants.CFG_STATISTICS_ENABLED));
        if (this.collectStatistics) {
            this.samplingPercentage = Integer.parseInt(context.getConfiguration().get(BatchConstants.CFG_STATISTICS_SAMPLING_PERCENT));
            this.cuboidScheduler = this.cubeDesc.getCuboidScheduler();
            this.nRowKey = this.cubeDesc.getRowkey().getRowKeyColumns().length;
            ArrayList newArrayList = Lists.newArrayList();
            ArrayList newArrayList2 = Lists.newArrayList();
            addCuboidBitSet(this.baseCuboidId, newArrayList2, newArrayList);
            this.allCuboidsBitSet = (Integer[][]) newArrayList2.toArray(new Integer[newArrayList.size()]);
            this.cuboidIds = (Long[]) newArrayList.toArray(new Long[newArrayList.size()]);
            this.allCuboidsHLL = new HLLCounter[this.cuboidIds.length];
            for (int i = 0; i < this.cuboidIds.length; i++) {
                this.allCuboidsHLL[i] = new HLLCounter(this.cubeDesc.getConfig().getCubeStatsHLLPrecision(), RegisterType.DENSE);
            }
            TblColRef partitionDateColumnRef = this.cubeDesc.getModel().getPartitionDesc().getPartitionDateColumnRef();
            if (partitionDateColumnRef != null) {
                this.partitionColumnIndex = this.intermediateTableDesc.getColumnIndex(partitionDateColumnRef);
            }
            if (this.partitionColumnIndex < 0) {
                this.needFetchPartitionCol = false;
            } else {
                this.needFetchPartitionCol = true;
            }
            if (!KylinVersion.isBefore200(this.cubeDesc.getVersion())) {
                this.isUsePutRowKeyToHllNewAlgorithm = true;
                this.rowHashCodesLong = new long[this.nRowKey];
                this.hf = Hashing.murmur3_128();
                logger.info("Found KylinVersion : {}. Use new algorithm for cuboid sampling. About the details of the new algorithm, please refer to KYLIN-2518", this.cubeDesc.getVersion());
                return;
            }
            this.isUsePutRowKeyToHllNewAlgorithm = false;
            this.row_hashcodes = new ByteArray[this.nRowKey];
            for (int i2 = 0; i2 < this.nRowKey; i2++) {
                this.row_hashcodes[i2] = new ByteArray();
            }
            this.hf = Hashing.murmur3_32();
            logger.info("Found KylinVersion : {}. Use old algorithm for cuboid sampling.", this.cubeDesc.getVersion());
        }
    }

    private void addCuboidBitSet(long j, List<Integer[]> list, List<Long> list2) {
        list2.add(Long.valueOf(j));
        Integer[] numArr = new Integer[Long.bitCount(j)];
        long highestOneBit = Long.highestOneBit(this.baseCuboidId);
        int i = 0;
        for (int i2 = 0; i2 < this.nRowKey; i2++) {
            if ((highestOneBit & j) > 0) {
                numArr[i] = Integer.valueOf(i2);
                i++;
            }
            highestOneBit >>= 1;
        }
        list.add(numArr);
        Iterator<Long> it = this.cuboidScheduler.getSpanningCuboid(j).iterator();
        while (it.hasNext()) {
            addCuboidBitSet(it.next().longValue(), list, list2);
        }
    }

    @Override // org.apache.kylin.engine.mr.KylinMapper
    public void doMap(KEYIN keyin, Object obj, Mapper<KEYIN, Object, SelfDefineSortableKey, Text>.Context context) throws IOException, InterruptedException {
        String str;
        for (String[] strArr : this.flatTableInputFormat.parseMapperInput(obj)) {
            context.getCounter(RawDataCounter.BYTES).increment(countSizeInBytes(strArr));
            for (int i = 0; i < this.factDictCols.size(); i++) {
                String str2 = strArr[this.dictionaryColumnIndex[i]];
                if (str2 != null) {
                    int intValue = this.uhcIndex[i] == 0 ? this.columnIndexToReducerBeginId.get(Integer.valueOf(i)).intValue() : this.columnIndexToReducerBeginId.get(Integer.valueOf(i)).intValue() + ((str2.hashCode() & Integer.MAX_VALUE) % this.uhcReducerCount);
                    this.tmpbuf.clear();
                    byte[] bytes = Bytes.toBytes(str2);
                    int length = bytes.length + 1;
                    if (length >= this.tmpbuf.capacity()) {
                        this.tmpbuf = ByteBuffer.allocate(countNewSize(this.tmpbuf.capacity(), length));
                    }
                    this.tmpbuf.put(Bytes.toBytes(intValue)[3]);
                    this.tmpbuf.put(bytes);
                    this.outputKey.set(this.tmpbuf.array(), 0, this.tmpbuf.position());
                    this.sortableKey.init(this.outputKey, this.factDictCols.get(i).getType());
                    context.write(this.sortableKey, EMPTY_TEXT);
                    if (this.rowCount < 10) {
                        logger.info("Sample output: " + this.factDictCols.get(i) + " '" + str2 + "' => reducer " + intValue);
                    }
                }
            }
            if (this.collectStatistics) {
                if (this.rowCount % 100 < this.samplingPercentage) {
                    if (this.isUsePutRowKeyToHllNewAlgorithm) {
                        putRowKeyToHLLNew(strArr);
                    } else {
                        putRowKeyToHLLOld(strArr);
                    }
                }
                if (this.needFetchPartitionCol && (str = strArr[this.partitionColumnIndex]) != null) {
                    this.tmpbuf.clear();
                    byte[] bytes2 = Bytes.toBytes(str);
                    int length2 = bytes2.length + 1;
                    if (length2 >= this.tmpbuf.capacity()) {
                        this.tmpbuf = ByteBuffer.allocate(countNewSize(this.tmpbuf.capacity(), length2));
                    }
                    this.tmpbuf.put((byte) -2);
                    this.tmpbuf.put(bytes2);
                    this.outputKey.set(this.tmpbuf.array(), 0, this.tmpbuf.position());
                    this.sortableKey.init(this.outputKey, (byte) 0);
                    context.write(this.sortableKey, EMPTY_TEXT);
                }
            }
            this.rowCount++;
        }
    }

    private long countSizeInBytes(String[] strArr) {
        int i = 0;
        int length = strArr.length;
        for (int i2 = 0; i2 < length; i2++) {
            String str = strArr[i2];
            i = i + (str == null ? 1 : StringUtil.utf8Length(str)) + 1;
        }
        return i;
    }

    private void putRowKeyToHLLOld(String[] strArr) {
        for (int i = 0; i < this.nRowKey; i++) {
            Hasher newHasher = this.hf.newHasher();
            String str = strArr[this.intermediateTableDesc.getRowKeyColumnIndexes()[i]];
            if (str != null) {
                this.row_hashcodes[i].set(newHasher.putString(str).hash().asBytes());
            } else {
                this.row_hashcodes[i].set(newHasher.putInt(0).hash().asBytes());
            }
        }
        int length = this.allCuboidsBitSet.length;
        for (int i2 = 0; i2 < length; i2++) {
            Hasher newHasher2 = this.hf.newHasher();
            for (int i3 = 0; i3 < this.allCuboidsBitSet[i2].length; i3++) {
                newHasher2.putBytes(this.row_hashcodes[this.allCuboidsBitSet[i2][i3].intValue()].array());
            }
            this.allCuboidsHLL[i2].add(newHasher2.hash().asBytes());
        }
    }

    private void putRowKeyToHLLNew(String[] strArr) {
        for (int i = 0; i < this.nRowKey; i++) {
            Hasher newHasher = this.hf.newHasher();
            String str = strArr[this.intermediateTableDesc.getRowKeyColumnIndexes()[i]];
            if (str == null) {
                str = "0";
            }
            this.rowHashCodesLong[i] = Bytes.toLong(newHasher.putString(str).hash().asBytes()) + i;
        }
        int length = this.allCuboidsBitSet.length;
        for (int i2 = 0; i2 < length; i2++) {
            long j = 0;
            for (int i3 = 0; i3 < this.allCuboidsBitSet[i2].length; i3++) {
                j += this.rowHashCodesLong[this.allCuboidsBitSet[i2][i3].intValue()];
            }
            this.allCuboidsHLL[i2].addHashDirectly(j);
        }
    }

    @Override // org.apache.kylin.engine.mr.KylinMapper
    protected void doCleanup(Mapper<KEYIN, Object, SelfDefineSortableKey, Text>.Context context) throws IOException, InterruptedException {
        if (this.collectStatistics) {
            ByteBuffer allocate = ByteBuffer.allocate(1048576);
            for (int i = 0; i < this.cuboidIds.length; i++) {
                HLLCounter hLLCounter = this.allCuboidsHLL[i];
                this.tmpbuf.clear();
                this.tmpbuf.put((byte) -1);
                this.tmpbuf.putLong(this.cuboidIds[i].longValue());
                this.outputKey.set(this.tmpbuf.array(), 0, this.tmpbuf.position());
                allocate.clear();
                hLLCounter.writeRegisters(allocate);
                this.outputValue.set(allocate.array(), 0, allocate.position());
                this.sortableKey.init(this.outputKey, (byte) 0);
                context.write(this.sortableKey, this.outputValue);
            }
        }
    }

    private int countNewSize(int i, int i2) {
        int i3 = i;
        while (true) {
            int i4 = i3 * 2;
            if (i4 >= i2) {
                return i4;
            }
            i3 = i4;
        }
    }
}
