package org.apache.kylin.engine.spark;

import com.google.common.collect.Lists;
import java.io.Serializable;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.kylin.common.KylinConfig;
import org.apache.kylin.common.KylinVersion;
import org.apache.kylin.common.util.AbstractApplication;
import org.apache.kylin.common.util.Bytes;
import org.apache.kylin.common.util.HadoopUtil;
import org.apache.kylin.common.util.OptionsHelper;
import org.apache.kylin.cube.CubeInstance;
import org.apache.kylin.cube.CubeManager;
import org.apache.kylin.cube.CubeSegment;
import org.apache.kylin.cube.cuboid.CuboidModeEnum;
import org.apache.kylin.cube.cuboid.CuboidUtil;
import org.apache.kylin.cube.kv.RowKeyDecoder;
import org.apache.kylin.cube.model.CubeDesc;
import org.apache.kylin.engine.mr.common.AbstractHadoopJob;
import org.apache.kylin.engine.mr.common.BatchConstants;
import org.apache.kylin.engine.mr.common.CubeStatsWriter;
import org.apache.kylin.engine.mr.common.SerializableConfiguration;
import org.apache.kylin.engine.spark.SparkFunction;
import org.apache.kylin.measure.hllc.HLLCounter;
import org.apache.kylin.shaded.com.google.common.base.Preconditions;
import org.apache.kylin.shaded.com.google.common.collect.Maps;
import org.apache.kylin.shaded.com.google.common.hash.HashFunction;
import org.apache.kylin.shaded.com.google.common.hash.Hasher;
import org.apache.kylin.shaded.com.google.common.hash.Hashing;
import org.apache.spark.SparkConf;
import org.apache.spark.TaskContext;
import org.apache.spark.api.java.JavaSparkContext;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.supercsv.cellprocessor.constraint.DMinMax;
import scala.Tuple2;

/* loaded from: input_file:WEB-INF/lib/kylin-engine-spark-3.1.3.jar:org/apache/kylin/engine/spark/SparkCalculateStatsFromBaseCuboidJob.class */
public class SparkCalculateStatsFromBaseCuboidJob extends AbstractApplication implements Serializable {
    protected static final Logger logger = LoggerFactory.getLogger((Class<?>) SparkCalculateStatsFromBaseCuboidJob.class);
    public static final Option OPTION_CUBE_NAME;
    public static final Option OPTION_SEGMENT_ID;
    public static final Option OPTION_INPUT_PATH;
    public static final Option OPTION_OUTPUT_PATH;
    public static final Option OPTION_META_URL;
    public static final Option OPTION_JOB_MODE;
    public static final Option OPTION_SAMPLING_PERCENT;
    private int samplingPercent;
    private boolean isUsePutRowKeyToHllNewAlgorithm;
    private Long[] cuboidIds;
    protected int nRowKey;
    protected long baseCuboidId;
    RowKeyDecoder rowKeyDecoder;
    protected Map<Long, HLLCounter> cuboidHLLMap = Maps.newHashMap();
    private int rowCount = 0;
    private long[] rowHashCodesLong = null;
    private HLLCounter[] allCuboidsHLL = null;
    private Integer[][] allCuboidsBitSet = (Integer[][]) null;
    private HashFunction hf = null;
    private Options options = new Options();

    public SparkCalculateStatsFromBaseCuboidJob() {
        this.options.addOption(OPTION_CUBE_NAME);
        this.options.addOption(OPTION_INPUT_PATH);
        this.options.addOption(OPTION_OUTPUT_PATH);
        this.options.addOption(OPTION_SEGMENT_ID);
        this.options.addOption(OPTION_META_URL);
        this.options.addOption(OPTION_JOB_MODE);
        this.options.addOption(OPTION_SAMPLING_PERCENT);
    }

    @Override // org.apache.kylin.common.util.AbstractApplication
    protected Options getOptions() {
        return this.options;
    }

    /* JADX WARN: Finally extract failed */
    @Override // org.apache.kylin.common.util.AbstractApplication
    protected void execute(OptionsHelper optionsHelper) throws Exception {
        final String optionValue = optionsHelper.getOptionValue(OPTION_CUBE_NAME);
        final String optionValue2 = optionsHelper.getOptionValue(OPTION_SEGMENT_ID);
        String optionValue3 = optionsHelper.getOptionValue(OPTION_INPUT_PATH);
        final String optionValue4 = optionsHelper.getOptionValue(OPTION_OUTPUT_PATH);
        final String optionValue5 = optionsHelper.getOptionValue(OPTION_META_URL);
        String optionValue6 = optionsHelper.getOptionValue(OPTION_JOB_MODE);
        this.samplingPercent = Integer.parseInt(optionsHelper.getOptionValue(OPTION_SAMPLING_PERCENT));
        SparkConf kryoSerializerInConf = SparkUtil.setKryoSerializerInConf();
        kryoSerializerInConf.setAppName("Kylin_Calculate_Statics_From_BaseCuboid_Data_" + optionValue + "_With_Spark");
        kryoSerializerInConf.set("spark.speculation", "false");
        KylinSparkJobListener kylinSparkJobListener = new KylinSparkJobListener();
        JavaSparkContext javaSparkContext = new JavaSparkContext(kryoSerializerInConf);
        Throwable th = null;
        try {
            javaSparkContext.sc().addSparkListener(kylinSparkJobListener);
            HadoopUtil.deletePath(javaSparkContext.hadoopConfiguration(), new Path(optionValue4));
            final SerializableConfiguration serializableConfiguration = new SerializableConfiguration(javaSparkContext.hadoopConfiguration());
            KylinConfig loadKylinConfigFromHdfs = AbstractHadoopJob.loadKylinConfigFromHdfs(serializableConfiguration, optionValue5);
            KylinConfig.SetAndUnsetThreadLocalConfig andUnsetThreadLocalConfig = KylinConfig.setAndUnsetThreadLocalConfig(loadKylinConfigFromHdfs);
            Throwable th2 = null;
            try {
                CubeInstance cube = CubeManager.getInstance(loadKylinConfigFromHdfs).getCube(optionValue);
                CubeDesc descriptor = cube.getDescriptor();
                cube.getSegmentById(optionValue2);
                this.baseCuboidId = cube.getCuboidScheduler().getBaseCuboidId();
                this.nRowKey = descriptor.getRowkey().getRowKeyColumns().length;
                Set<Long> cuboidsByMode = cube.getCuboidsByMode(optionValue6);
                if (cuboidsByMode.size() == 0) {
                    Set<Long> cuboidsByMode2 = cube.getCuboidsByMode(CuboidModeEnum.CURRENT);
                    cuboidsByMode2.removeAll(cube.getCuboidsRecommend());
                    cuboidsByMode = cuboidsByMode2;
                }
                this.cuboidIds = (Long[]) cuboidsByMode.toArray(new Long[cuboidsByMode.size()]);
                this.allCuboidsBitSet = CuboidUtil.getCuboidBitSet(this.cuboidIds, this.nRowKey);
                final int cubeStatsHLLPrecision = loadKylinConfigFromHdfs.getCubeStatsHLLPrecision();
                this.allCuboidsHLL = new HLLCounter[this.cuboidIds.length];
                for (int i = 0; i < this.cuboidIds.length; i++) {
                    this.allCuboidsHLL[i] = new HLLCounter(cubeStatsHLLPrecision);
                }
                if (KylinVersion.isBefore200(descriptor.getVersion())) {
                    this.isUsePutRowKeyToHllNewAlgorithm = false;
                    this.hf = Hashing.murmur3_32();
                    logger.info("Found KylinVersion : {}. Use old algorithm for cuboid sampling.", descriptor.getVersion());
                } else {
                    this.isUsePutRowKeyToHllNewAlgorithm = true;
                    this.rowHashCodesLong = new long[this.nRowKey];
                    this.hf = Hashing.murmur3_128();
                    logger.info("Found KylinVersion : {}. Use new algorithm for cuboid sampling. About the details of the new algorithm, please refer to KYLIN-2518", descriptor.getVersion());
                }
                if (andUnsetThreadLocalConfig != null) {
                    if (0 != 0) {
                        try {
                            andUnsetThreadLocalConfig.close();
                        } catch (Throwable th3) {
                            th2.addSuppressed(th3);
                        }
                    } else {
                        andUnsetThreadLocalConfig.close();
                    }
                }
                javaSparkContext.sequenceFile(optionValue3, Text.class, Text.class).mapPartitionsToPair(new SparkFunction.PairFlatMapFunctionBase<Iterator<Tuple2<Text, Text>>, Text, Text>() { // from class: org.apache.kylin.engine.spark.SparkCalculateStatsFromBaseCuboidJob.1
                    @Override // org.apache.kylin.engine.spark.SparkFunction.FunctionBase
                    protected void doInit() {
                        CubeSegment segmentById = CubeManager.getInstance(AbstractHadoopJob.loadKylinConfigFromHdfs(serializableConfiguration, optionValue5)).getCube(optionValue).getSegmentById(optionValue2);
                        SparkCalculateStatsFromBaseCuboidJob.this.rowKeyDecoder = new RowKeyDecoder(segmentById);
                    }

                    /* JADX INFO: Access modifiers changed from: protected */
                    @Override // org.apache.kylin.engine.spark.SparkFunction.PairFlatMapFunctionBase
                    public Iterator<Tuple2<Text, Text>> doCall(Iterator<Tuple2<Text, Text>> it) throws Exception {
                        while (it.hasNext()) {
                            if (SparkCalculateStatsFromBaseCuboidJob.this.rowKeyDecoder.decode(it.next().mo9169_1().getBytes()) == SparkCalculateStatsFromBaseCuboidJob.this.baseCuboidId) {
                                List<String> values = SparkCalculateStatsFromBaseCuboidJob.this.rowKeyDecoder.getValues();
                                if (SparkCalculateStatsFromBaseCuboidJob.this.rowCount < SparkCalculateStatsFromBaseCuboidJob.this.samplingPercent) {
                                    Preconditions.checkArgument(SparkCalculateStatsFromBaseCuboidJob.this.nRowKey == values.size());
                                    String[] strArr = (String[]) values.toArray(new String[values.size()]);
                                    if (SparkCalculateStatsFromBaseCuboidJob.this.isUsePutRowKeyToHllNewAlgorithm) {
                                        SparkCalculateStatsFromBaseCuboidJob.this.putRowKeyToHLLNew(strArr);
                                    } else {
                                        SparkCalculateStatsFromBaseCuboidJob.this.putRowKeyToHLLOld(strArr);
                                    }
                                }
                                if (SparkCalculateStatsFromBaseCuboidJob.access$004(SparkCalculateStatsFromBaseCuboidJob.this) == 100) {
                                    SparkCalculateStatsFromBaseCuboidJob.this.rowCount = 0;
                                }
                            }
                        }
                        ByteBuffer allocate = ByteBuffer.allocate(1048576);
                        ArrayList<Tuple2> arrayList = new ArrayList();
                        for (int i2 = 0; i2 < SparkCalculateStatsFromBaseCuboidJob.this.cuboidIds.length; i2++) {
                            HLLCounter hLLCounter = SparkCalculateStatsFromBaseCuboidJob.this.allCuboidsHLL[i2];
                            Text text = new Text();
                            Text text2 = new Text();
                            text.set(Bytes.toBytes(SparkCalculateStatsFromBaseCuboidJob.this.cuboidIds[i2].longValue()));
                            SparkCalculateStatsFromBaseCuboidJob.logger.info("Cuboid id to be processed1: " + SparkCalculateStatsFromBaseCuboidJob.this.cuboidIds[i2]);
                            allocate.clear();
                            hLLCounter.writeRegisters(allocate);
                            text2.set(allocate.array(), 0, allocate.position());
                            SparkCalculateStatsFromBaseCuboidJob.logger.info("Cuboid id to be processed1: " + SparkCalculateStatsFromBaseCuboidJob.this.cuboidIds[i2] + "value is " + allocate.array().toString());
                            arrayList.add(new Tuple2(text, text2));
                            SparkCalculateStatsFromBaseCuboidJob.logger.info("result size: " + arrayList.size());
                            for (Tuple2 tuple2 : arrayList) {
                                SparkCalculateStatsFromBaseCuboidJob.logger.info("result key: " + ((Text) tuple2.mo9169_1()).toString());
                                SparkCalculateStatsFromBaseCuboidJob.logger.info("result values: " + ((Text) tuple2._2).toString());
                            }
                        }
                        return arrayList.iterator();
                    }
                }).groupByKey().foreach(new SparkFunction.VoidFunctionBase<Tuple2<Text, Iterable<Text>>>() { // from class: org.apache.kylin.engine.spark.SparkCalculateStatsFromBaseCuboidJob.2
                    @Override // org.apache.kylin.engine.spark.SparkFunction.FunctionBase
                    protected void doInit() {
                        KylinConfig.setAndUnsetThreadLocalConfig(AbstractHadoopJob.loadKylinConfigFromHdfs(serializableConfiguration, optionValue5));
                    }

                    /* JADX INFO: Access modifiers changed from: protected */
                    @Override // org.apache.kylin.engine.spark.SparkFunction.VoidFunctionBase
                    public void doCall(Tuple2<Text, Iterable<Text>> tuple2) throws Exception {
                        Text mo9169_1 = tuple2.mo9169_1();
                        Iterable<Text> mo9168_2 = tuple2.mo9168_2();
                        long j = Bytes.toLong(mo9169_1.getBytes());
                        SparkCalculateStatsFromBaseCuboidJob.logger.info("Cuboid id to be processed: " + j);
                        ArrayList newArrayList = Lists.newArrayList();
                        long j2 = 0;
                        for (Text text : mo9168_2) {
                            HLLCounter hLLCounter = new HLLCounter(cubeStatsHLLPrecision);
                            hLLCounter.readRegisters(ByteBuffer.wrap(text.getBytes(), 0, text.getLength()));
                            if (j == SparkCalculateStatsFromBaseCuboidJob.this.baseCuboidId) {
                                newArrayList.add(Long.valueOf(hLLCounter.getCountEstimate()));
                            }
                            j2 += hLLCounter.getCountEstimate();
                            if (SparkCalculateStatsFromBaseCuboidJob.this.cuboidHLLMap.get(Long.valueOf(j)) != null) {
                                SparkCalculateStatsFromBaseCuboidJob.this.cuboidHLLMap.get(Long.valueOf(j)).merge(hLLCounter);
                            } else {
                                SparkCalculateStatsFromBaseCuboidJob.this.cuboidHLLMap.put(Long.valueOf(j), hLLCounter);
                            }
                        }
                        long j3 = 0;
                        Iterator<HLLCounter> it = SparkCalculateStatsFromBaseCuboidJob.this.cuboidHLLMap.values().iterator();
                        while (it.hasNext()) {
                            j3 += it.next().getCountEstimate();
                        }
                        double d = j3 == 0 ? DMinMax.MIN_CHAR : j2 / j3;
                        SparkCalculateStatsFromBaseCuboidJob.logger.info("writer cuboIdstatic to " + optionValue4);
                        CubeStatsWriter.writePartialCuboidStatistics(serializableConfiguration.get(), new Path(optionValue4), SparkCalculateStatsFromBaseCuboidJob.this.cuboidHLLMap, SparkCalculateStatsFromBaseCuboidJob.this.samplingPercent, newArrayList.size(), d, TaskContext.getPartitionId());
                    }
                });
                if (javaSparkContext != null) {
                    if (0 == 0) {
                        javaSparkContext.close();
                        return;
                    }
                    try {
                        javaSparkContext.close();
                    } catch (Throwable th4) {
                        th.addSuppressed(th4);
                    }
                }
            } catch (Throwable th5) {
                if (andUnsetThreadLocalConfig != null) {
                    if (0 != 0) {
                        try {
                            andUnsetThreadLocalConfig.close();
                        } catch (Throwable th6) {
                            th2.addSuppressed(th6);
                        }
                    } else {
                        andUnsetThreadLocalConfig.close();
                    }
                }
                throw th5;
            }
        } catch (Throwable th7) {
            if (javaSparkContext != null) {
                if (0 != 0) {
                    try {
                        javaSparkContext.close();
                    } catch (Throwable th8) {
                        th.addSuppressed(th8);
                    }
                } else {
                    javaSparkContext.close();
                }
            }
            throw th7;
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* JADX WARN: Multi-variable type inference failed */
    public void putRowKeyToHLLOld(String[] strArr) {
        byte[] bArr = new byte[this.nRowKey];
        for (int i = 0; i < this.nRowKey; i++) {
            Hasher newHasher = this.hf.newHasher();
            String str = strArr[i];
            if (str != null) {
                bArr[i] = newHasher.putUnencodedChars((CharSequence) str).hash().asBytes();
            } else {
                bArr[i] = newHasher.putInt(0).hash().asBytes();
            }
        }
        for (int i2 = 0; i2 < this.cuboidIds.length; i2++) {
            Hasher newHasher2 = this.hf.newHasher();
            for (int i3 = 0; i3 < this.allCuboidsBitSet[i2].length; i3++) {
                newHasher2.putBytes(bArr[this.allCuboidsBitSet[i2][i3].intValue()]);
            }
            this.allCuboidsHLL[i2].add(newHasher2.hash().asBytes());
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    public void putRowKeyToHLLNew(String[] strArr) {
        for (int i = 0; i < this.nRowKey; i++) {
            Hasher newHasher = this.hf.newHasher();
            String str = strArr[i];
            if (str == null) {
                str = "0";
            }
            this.rowHashCodesLong[i] = Bytes.toLong(newHasher.putUnencodedChars((CharSequence) str).hash().asBytes()) + i;
        }
        int length = this.allCuboidsBitSet.length;
        for (int i2 = 0; i2 < length; i2++) {
            long j = 0;
            for (int i3 = 0; i3 < this.allCuboidsBitSet[i2].length; i3++) {
                j += this.rowHashCodesLong[this.allCuboidsBitSet[i2][i3].intValue()];
            }
            this.allCuboidsHLL[i2].addHashDirectly(j);
        }
    }

    static /* synthetic */ int access$004(SparkCalculateStatsFromBaseCuboidJob sparkCalculateStatsFromBaseCuboidJob) {
        int i = sparkCalculateStatsFromBaseCuboidJob.rowCount + 1;
        sparkCalculateStatsFromBaseCuboidJob.rowCount = i;
        return i;
    }

    static {
        OptionBuilder.withArgName(BatchConstants.ARG_CUBE_NAME);
        OptionBuilder.hasArg();
        OptionBuilder.isRequired(true);
        OPTION_CUBE_NAME = OptionBuilder.create(BatchConstants.ARG_CUBE_NAME);
        OptionBuilder.withArgName("segmentId");
        OptionBuilder.hasArg();
        OptionBuilder.isRequired(true);
        OPTION_SEGMENT_ID = OptionBuilder.create("segmentId");
        OptionBuilder.withArgName(BatchConstants.ARG_INPUT);
        OptionBuilder.hasArg();
        OptionBuilder.isRequired(true);
        OPTION_INPUT_PATH = OptionBuilder.create(BatchConstants.ARG_INPUT);
        OptionBuilder.withArgName(BatchConstants.ARG_OUTPUT);
        OptionBuilder.hasArg();
        OptionBuilder.isRequired(true);
        OPTION_OUTPUT_PATH = OptionBuilder.create(BatchConstants.ARG_OUTPUT);
        OptionBuilder.withArgName(BatchConstants.ARG_META_URL);
        OptionBuilder.hasArg();
        OptionBuilder.isRequired(true);
        OptionBuilder.withDescription("HDFS metadata url");
        OPTION_META_URL = OptionBuilder.create(BatchConstants.ARG_META_URL);
        OptionBuilder.withArgName(BatchConstants.ARG_CUBOID_MODE);
        OptionBuilder.hasArg();
        OptionBuilder.isRequired(true);
        OPTION_JOB_MODE = OptionBuilder.create(BatchConstants.ARG_CUBOID_MODE);
        OptionBuilder.withArgName(BatchConstants.ARG_STATS_SAMPLING_PERCENT);
        OptionBuilder.hasArg();
        OptionBuilder.isRequired(true);
        OPTION_SAMPLING_PERCENT = OptionBuilder.create(BatchConstants.ARG_STATS_SAMPLING_PERCENT);
    }
}
