package org.apache.kylin.engine.mr.steps;

import com.esri.core.geometry.WktParser;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import com.google.common.hash.HashFunction;
import com.google.common.hash.Hasher;
import com.google.common.hash.Hashing;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.LinkedBlockingQueue;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.kylin.common.KylinVersion;
import org.apache.kylin.common.util.Bytes;
import org.apache.kylin.common.util.MemoryBudgetController;
import org.apache.kylin.common.util.StringUtil;
import org.apache.kylin.cube.DimensionRangeInfo;
import org.apache.kylin.cube.cuboid.CuboidUtil;
import org.apache.kylin.engine.mr.common.BatchConstants;
import org.apache.kylin.engine.mr.common.StatisticsDecisionUtil;
import org.apache.kylin.measure.hllc.HLLCounter;
import org.apache.kylin.measure.hllc.RegisterType;
import org.apache.kylin.metadata.datatype.DataType;
import org.apache.kylin.metadata.model.TblColRef;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/apache/kylin/engine/mr/steps/FactDistinctColumnsMapper.class */
public class FactDistinctColumnsMapper<KEYIN> extends FactDistinctColumnsMapperBase<KEYIN, Object> {
    protected int nRowKey;
    private Long[] cuboidIds;
    private int samplingPercentage;
    private ByteBuffer tmpbuf;
    private DictColDeduper dictColDeduper;
    private CuboidStatCalculator[] cuboidStatCalculators;
    private static final Logger logger = LoggerFactory.getLogger((Class<?>) FactDistinctColumnsMapper.class);
    private static final Text EMPTY_TEXT = new Text();
    private Integer[][] allCuboidsBitSet = (Integer[][]) null;
    private HLLCounter[] allCuboidsHLL = null;
    private int rowCount = 0;
    private Map<Integer, DimensionRangeInfo> dimensionRangeInfoMap = Maps.newHashMap();
    private SelfDefineSortableKey sortableKey = new SelfDefineSortableKey();

    /* loaded from: input_file:org/apache/kylin/engine/mr/steps/FactDistinctColumnsMapper$CuboidStatCalculator.class */
    public static class CuboidStatCalculator implements Runnable {
        private final int id;
        private final int nRowKey;
        private final int[] rowkeyColIndex;
        private final Long[] cuboidIds;
        private final Integer[][] cuboidsBitSet;
        private volatile HLLCounter[] cuboidsHLL;
        private final boolean isNewAlgorithm;
        private final HashFunction hf;
        private long[] rowHashCodesLong;
        private BlockingQueue<String[]> queue = new LinkedBlockingQueue(WktParser.WktToken.attribute_m);
        private Thread workThread;
        private volatile boolean stop;

        public CuboidStatCalculator(int i, int[] iArr, Long[] lArr, Integer[][] numArr, boolean z, HLLCounter[] hLLCounterArr) {
            this.cuboidsHLL = null;
            this.id = i;
            this.nRowKey = iArr.length;
            this.rowkeyColIndex = iArr;
            this.cuboidIds = lArr;
            this.cuboidsBitSet = numArr;
            this.isNewAlgorithm = z;
            if (this.isNewAlgorithm) {
                this.rowHashCodesLong = new long[this.nRowKey];
                this.hf = Hashing.murmur3_128();
            } else {
                this.hf = Hashing.murmur3_32();
            }
            this.cuboidsHLL = hLLCounterArr;
            this.workThread = new Thread(this);
        }

        public void start() {
            FactDistinctColumnsMapper.logger.info("cuboid stats calculator:" + this.id + " started, handle cuboids number:" + this.cuboidIds.length);
            this.workThread.start();
        }

        public void putRow(String[] strArr) {
            try {
                this.queue.put((String[]) Arrays.copyOf(strArr, strArr.length));
            } catch (InterruptedException e) {
                FactDistinctColumnsMapper.logger.error("interrupt", (Throwable) e);
            }
        }

        public void waitForCompletion() {
            this.stop = true;
            try {
                this.workThread.join();
            } catch (InterruptedException e) {
                FactDistinctColumnsMapper.logger.error("interrupt", (Throwable) e);
            }
        }

        /* JADX WARN: Multi-variable type inference failed */
        private void putRowKeyToHLLOld(String[] strArr) {
            byte[] bArr = new byte[this.nRowKey];
            for (int i = 0; i < this.nRowKey; i++) {
                Hasher newHasher = this.hf.newHasher();
                String str = strArr[this.rowkeyColIndex[i]];
                if (str != null) {
                    bArr[i] = newHasher.putString(str).hash().asBytes();
                } else {
                    bArr[i] = newHasher.putInt(0).hash().asBytes();
                }
            }
            int length = this.cuboidsBitSet.length;
            for (int i2 = 0; i2 < length; i2++) {
                Hasher newHasher2 = this.hf.newHasher();
                for (int i3 = 0; i3 < this.cuboidsBitSet[i2].length; i3++) {
                    newHasher2.putBytes(bArr[this.cuboidsBitSet[i2][i3].intValue()]);
                }
                this.cuboidsHLL[i2].add(newHasher2.hash().asBytes());
            }
        }

        private void putRowKeyToHLLNew(String[] strArr) {
            for (int i = 0; i < this.nRowKey; i++) {
                Hasher newHasher = this.hf.newHasher();
                String str = strArr[this.rowkeyColIndex[i]];
                if (str == null) {
                    str = "0";
                }
                this.rowHashCodesLong[i] = Bytes.toLong(newHasher.putString(str).hash().asBytes()) + i;
            }
            int length = this.cuboidsBitSet.length;
            for (int i2 = 0; i2 < length; i2++) {
                long j = 0;
                for (int i3 = 0; i3 < this.cuboidsBitSet[i2].length; i3++) {
                    j += this.rowHashCodesLong[this.cuboidsBitSet[i2][i3].intValue()];
                }
                this.cuboidsHLL[i2].addHashDirectly(j);
            }
        }

        public HLLCounter[] getHLLCounters() {
            return this.cuboidsHLL;
        }

        public Long[] getCuboidIds() {
            return this.cuboidIds;
        }

        @Override // java.lang.Runnable
        public void run() {
            while (true) {
                String[] poll = this.queue.poll();
                if (poll == null && this.stop) {
                    FactDistinctColumnsMapper.logger.info("cuboid stats calculator:" + this.id + " completed.");
                    return;
                } else if (poll == null) {
                    Thread.yield();
                } else if (this.isNewAlgorithm) {
                    putRowKeyToHLLNew(poll);
                } else {
                    putRowKeyToHLLOld(poll);
                }
            }
        }
    }

    /* loaded from: input_file:org/apache/kylin/engine/mr/steps/FactDistinctColumnsMapper$DictColDeduper.class */
    public static class DictColDeduper {
        final boolean enabled;
        final int resetThresholdMB;
        final Map<Integer, Set<String>> colValueSets;

        public DictColDeduper() {
            this(200, 100);
        }

        public DictColDeduper(int i, int i2) {
            this.colValueSets = Maps.newHashMap();
            this.enabled = MemoryBudgetController.getSystemAvailMB() >= i;
            this.resetThresholdMB = i2;
        }

        public void setIsDictCol(int i) {
            this.colValueSets.put(Integer.valueOf(i), new HashSet());
        }

        public boolean isDictCol(int i) {
            return this.colValueSets.containsKey(Integer.valueOf(i));
        }

        public boolean add(int i, String str) {
            return this.colValueSets.get(Integer.valueOf(i)).add(str);
        }

        public Set<String> getValueSet(int i) {
            return this.colValueSets.get(Integer.valueOf(i));
        }

        public void resetIfShortOfMem() {
            if (MemoryBudgetController.getSystemAvailMB() < this.resetThresholdMB) {
                Iterator<Set<String>> it = this.colValueSets.values().iterator();
                while (it.hasNext()) {
                    it.next().clear();
                }
            }
        }
    }

    /* loaded from: input_file:org/apache/kylin/engine/mr/steps/FactDistinctColumnsMapper$RawDataCounter.class */
    public enum RawDataCounter {
        BYTES
    }

    /* JADX INFO: Access modifiers changed from: protected */
    /* JADX WARN: Multi-variable type inference failed */
    @Override // org.apache.kylin.engine.mr.steps.FactDistinctColumnsMapperBase, org.apache.kylin.engine.mr.KylinMapper
    public void doSetup(Mapper<KEYIN, Object, SelfDefineSortableKey, Text>.Context context) throws IOException {
        boolean z;
        int i;
        super.doSetup(context);
        this.tmpbuf = ByteBuffer.allocate(4096);
        this.samplingPercentage = Integer.parseInt(context.getConfiguration().get(BatchConstants.CFG_STATISTICS_SAMPLING_PERCENT));
        this.nRowKey = this.cubeDesc.getRowkey().getRowKeyColumns().length;
        HashSet newHashSet = Sets.newHashSet(this.cubeSeg.getCuboidScheduler().getAllCuboidIds());
        if (StatisticsDecisionUtil.isAbleToOptimizeCubingPlan(this.cubeSeg)) {
            newHashSet.addAll(this.cubeSeg.getCubeDesc().getMandatoryCuboids());
        }
        this.cuboidIds = (Long[]) newHashSet.toArray(new Long[newHashSet.size()]);
        this.allCuboidsBitSet = CuboidUtil.getCuboidBitSet(this.cuboidIds, this.nRowKey);
        this.allCuboidsHLL = new HLLCounter[this.cuboidIds.length];
        for (int i2 = 0; i2 < this.cuboidIds.length; i2++) {
            this.allCuboidsHLL[i2] = new HLLCounter(this.cubeDesc.getConfig().getCubeStatsHLLPrecision(), RegisterType.DENSE);
        }
        if (KylinVersion.isBefore200(this.cubeDesc.getVersion())) {
            z = false;
            logger.info("Found KylinVersion : {}. Use old algorithm for cuboid sampling.", this.cubeDesc.getVersion());
        } else {
            z = true;
            logger.info("Found KylinVersion : {}. Use new algorithm for cuboid sampling. About the details of the new algorithm, please refer to KYLIN-2518", this.cubeDesc.getVersion());
        }
        int statsThreadNum = getStatsThreadNum(this.cuboidIds.length);
        this.cuboidStatCalculators = new CuboidStatCalculator[statsThreadNum];
        int length = this.cuboidIds.length / statsThreadNum;
        if (length <= 0) {
            length = 1;
        }
        for (int i3 = 0; i3 < statsThreadNum && (i = i3 * length) < this.cuboidIds.length; i3++) {
            int i4 = (i3 + 1) * length;
            if (i3 == statsThreadNum - 1) {
                i4 = this.cuboidIds.length;
            }
            CuboidStatCalculator cuboidStatCalculator = new CuboidStatCalculator(i3, this.intermediateTableDesc.getRowKeyColumnIndexes(), (Long[]) Arrays.copyOfRange(this.cuboidIds, i, i4), (Integer[][]) Arrays.copyOfRange(this.allCuboidsBitSet, i, i4), z, (HLLCounter[]) Arrays.copyOfRange(this.allCuboidsHLL, i, i4));
            this.cuboidStatCalculators[i3] = cuboidStatCalculator;
            cuboidStatCalculator.start();
        }
        this.dictColDeduper = new DictColDeduper();
        Set<TblColRef> allColumnsNeedDictionaryBuilt = this.cubeDesc.getAllColumnsNeedDictionaryBuilt();
        for (int i5 = 0; i5 < this.allCols.size(); i5++) {
            if (allColumnsNeedDictionaryBuilt.contains(this.allCols.get(i5))) {
                this.dictColDeduper.setIsDictCol(i5);
            }
        }
    }

    private int getStatsThreadNum(int i) {
        int cuboidNumberPerStatsCalculator = this.cubeDesc.getConfig().getCuboidNumberPerStatsCalculator();
        if (cuboidNumberPerStatsCalculator <= 0) {
            logger.warn("config from getCuboidNumberPerStatsCalculator() " + cuboidNumberPerStatsCalculator + " is should larger than 0");
            logger.info("Will use single thread for cuboid statistics calculation");
            return 1;
        }
        int cuboidStatsCalculatorMaxNumber = this.cubeDesc.getConfig().getCuboidStatsCalculatorMaxNumber();
        int i2 = ((i - 1) / cuboidNumberPerStatsCalculator) + 1;
        if (i2 > cuboidStatsCalculatorMaxNumber) {
            i2 = cuboidStatsCalculatorMaxNumber;
        }
        return i2;
    }

    @Override // org.apache.kylin.engine.mr.KylinMapper
    public void doMap(KEYIN keyin, Object obj, Mapper<KEYIN, Object, SelfDefineSortableKey, Text>.Context context) throws IOException, InterruptedException {
        for (String[] strArr : this.flatTableInputFormat.parseMapperInput(obj)) {
            context.getCounter(RawDataCounter.BYTES).increment(countSizeInBytes(strArr));
            for (int i = 0; i < this.allCols.size(); i++) {
                String str = strArr[this.columnIndex[i]];
                if (str != null) {
                    DataType type = this.allCols.get(i).getType();
                    if (!this.dictColDeduper.isDictCol(i)) {
                        DimensionRangeInfo dimensionRangeInfo = this.dimensionRangeInfoMap.get(Integer.valueOf(i));
                        if (dimensionRangeInfo == null) {
                            this.dimensionRangeInfoMap.put(Integer.valueOf(i), new DimensionRangeInfo(str, str));
                        } else {
                            dimensionRangeInfo.setMax(type.getOrder().max(dimensionRangeInfo.getMax(), str));
                            dimensionRangeInfo.setMin(type.getOrder().min(dimensionRangeInfo.getMin(), str));
                        }
                    } else if (this.dictColDeduper.add(i, str)) {
                        writeFieldValue(context, type, Integer.valueOf(i), str);
                    }
                }
            }
            if (this.rowCount % 100 < this.samplingPercentage) {
                putRowKeyToHLL(strArr);
            }
            if (this.rowCount % 100 == 0) {
                this.dictColDeduper.resetIfShortOfMem();
            }
            this.rowCount++;
        }
    }

    private void putRowKeyToHLL(String[] strArr) {
        for (CuboidStatCalculator cuboidStatCalculator : this.cuboidStatCalculators) {
            cuboidStatCalculator.putRow(strArr);
        }
    }

    private long countSizeInBytes(String[] strArr) {
        int i = 0;
        int length = strArr.length;
        for (int i2 = 0; i2 < length; i2++) {
            String str = strArr[i2];
            i = i + (str == null ? 1 : StringUtil.utf8Length(str)) + 1;
        }
        return i;
    }

    @Override // org.apache.kylin.engine.mr.KylinMapper
    protected void doCleanup(Mapper<KEYIN, Object, SelfDefineSortableKey, Text>.Context context) throws IOException, InterruptedException {
        ByteBuffer allocate = ByteBuffer.allocate(1048576);
        for (CuboidStatCalculator cuboidStatCalculator : this.cuboidStatCalculators) {
            cuboidStatCalculator.waitForCompletion();
        }
        for (CuboidStatCalculator cuboidStatCalculator2 : this.cuboidStatCalculators) {
            Long[] cuboidIds = cuboidStatCalculator2.getCuboidIds();
            HLLCounter[] hLLCounters = cuboidStatCalculator2.getHLLCounters();
            for (int i = 0; i < cuboidIds.length; i++) {
                HLLCounter hLLCounter = hLLCounters[i];
                this.tmpbuf.clear();
                this.tmpbuf.put((byte) -1);
                this.tmpbuf.putLong(cuboidIds[i].longValue());
                this.outputKey.set(this.tmpbuf.array(), 0, this.tmpbuf.position());
                allocate.clear();
                hLLCounter.writeRegisters(allocate);
                this.outputValue.set(allocate.array(), 0, allocate.position());
                this.sortableKey.init(this.outputKey, (byte) 0);
                context.write(this.sortableKey, this.outputValue);
            }
        }
        for (Integer num : this.dimensionRangeInfoMap.keySet()) {
            DimensionRangeInfo dimensionRangeInfo = this.dimensionRangeInfoMap.get(num);
            DataType type = this.allCols.get(num.intValue()).getType();
            writeFieldValue(context, type, num, dimensionRangeInfo.getMin());
            writeFieldValue(context, type, num, dimensionRangeInfo.getMax());
        }
    }

    private int countNewSize(int i, int i2) {
        int i3 = i;
        while (true) {
            int i4 = i3 * 2;
            if (i4 >= i2) {
                return i4;
            }
            i3 = i4;
        }
    }

    private void writeFieldValue(Mapper<KEYIN, Object, SelfDefineSortableKey, Text>.Context context, DataType dataType, Integer num, String str) throws IOException, InterruptedException {
        int reducerIdForCol = this.reducerMapping.getReducerIdForCol(num.intValue(), str);
        this.tmpbuf.clear();
        byte[] bytes = Bytes.toBytes(str);
        int length = bytes.length + 1;
        if (length >= this.tmpbuf.capacity()) {
            this.tmpbuf = ByteBuffer.allocate(countNewSize(this.tmpbuf.capacity(), length));
        }
        this.tmpbuf.put(Bytes.toBytes(reducerIdForCol)[3]);
        this.tmpbuf.put(bytes);
        this.outputKey.set(this.tmpbuf.array(), 0, this.tmpbuf.position());
        this.sortableKey.init(this.outputKey, dataType);
        context.write(this.sortableKey, EMPTY_TEXT);
        if (this.rowCount < 10) {
            logger.info("Sample output: " + this.allCols.get(num.intValue()) + " '" + str + "' => reducer " + reducerIdForCol);
        }
    }
}
