package org.apache.kylin.engine.spark;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import org.apache.kylin.common.KylinConfig;
import org.apache.kylin.common.util.Bytes;
import org.apache.kylin.common.util.Dictionary;
import org.apache.kylin.common.util.HadoopUtil;
import org.apache.kylin.common.util.StringUtil;
import org.apache.kylin.cube.CubeSegment;
import org.apache.kylin.dict.ShrunkenDictionary;
import org.apache.kylin.engine.EngineFactory;
import org.apache.kylin.engine.mr.IMROutput2;
import org.apache.kylin.engine.mr.common.BatchConstants;
import org.apache.kylin.engine.mr.common.CubeStatsReader;
import org.apache.kylin.engine.spark.ISparkInput;
import org.apache.kylin.engine.spark.ISparkOutput;
import org.apache.kylin.metadata.model.TblColRef;
import org.apache.kylin.shaded.com.google.common.collect.Lists;
import org.apache.kylin.source.SourceManager;
import org.apache.kylin.storage.StorageFactory;
import org.apache.spark.SparkContext;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.hive.HiveUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/apache/kylin/engine/spark/SparkUtil.class */
public class SparkUtil {
    private static final Logger logger = LoggerFactory.getLogger((Class<?>) SparkUtil.class);

    public static ISparkInput.ISparkBatchCubingInputSide getBatchCubingInputSide(CubeSegment cubeSegment) {
        return (ISparkInput.ISparkBatchCubingInputSide) ((ISparkInput) SourceManager.createEngineAdapter(cubeSegment, ISparkInput.class)).getBatchCubingInputSide(EngineFactory.getJoinedFlatTableDesc(cubeSegment));
    }

    public static ISparkOutput.ISparkBatchCubingOutputSide getBatchCubingOutputSide(CubeSegment cubeSegment) {
        return ((ISparkOutput) StorageFactory.createEngineAdapter(cubeSegment, ISparkOutput.class)).getBatchCubingOutputSide(cubeSegment);
    }

    public static ISparkOutput.ISparkBatchMergeOutputSide getBatchMergeOutputSide2(CubeSegment cubeSegment) {
        return ((ISparkOutput) StorageFactory.createEngineAdapter(cubeSegment, ISparkOutput.class)).getBatchMergeOutputSide(cubeSegment);
    }

    public static ISparkInput.ISparkBatchMergeInputSide getBatchMergeInputSide(CubeSegment cubeSegment) {
        return (ISparkInput.ISparkBatchMergeInputSide) ((ISparkInput) SourceManager.createEngineAdapter(cubeSegment, ISparkInput.class)).getBatchMergeInputSide(cubeSegment);
    }

    public static IMROutput2.IMRBatchOptimizeOutputSide2 getBatchOptimizeOutputSide2(CubeSegment cubeSegment) {
        return ((IMROutput2) StorageFactory.createEngineAdapter(cubeSegment, IMROutput2.class)).getBatchOptimizeOutputSide(cubeSegment);
    }

    public static JavaPairRDD parseInputPath(String str, FileSystem fileSystem, JavaSparkContext javaSparkContext, Class cls, Class cls2) throws IOException {
        ArrayList newArrayList = Lists.newArrayList();
        Path path = new Path(str);
        boolean z = false;
        for (FileStatus fileStatus : fileSystem.listStatus(path)) {
            if (fileStatus.isDirectory() && !fileStatus.getPath().getName().startsWith("_")) {
                z = true;
                newArrayList.add(fileStatus.getPath().toString());
            }
        }
        return !z ? javaSparkContext.sequenceFile(path.toString(), cls, cls2) : javaSparkContext.sequenceFile(StringUtil.join(newArrayList, ","), cls, cls2);
    }

    public static int estimateLayerPartitionNum(int i, CubeStatsReader cubeStatsReader, KylinConfig kylinConfig) {
        return Math.min(kylinConfig.getSparkMaxPartition(), Math.max(kylinConfig.getSparkMinPartition(), (int) (cubeStatsReader.estimateLayerSize(i) / kylinConfig.getSparkRDDPartitionCutMB())));
    }

    public static int estimateTotalPartitionNum(CubeStatsReader cubeStatsReader, KylinConfig kylinConfig) {
        double d = 0.0d;
        Iterator<Double> it = cubeStatsReader.getCuboidSizeMap().values().iterator();
        while (it.hasNext()) {
            d += it.next().doubleValue();
        }
        return Math.min(kylinConfig.getSparkMaxPartition(), Math.max(kylinConfig.getSparkMinPartition(), (int) (d / kylinConfig.getSparkRDDPartitionCutMB())));
    }

    public static void setHadoopConfForCuboid(Job job, CubeSegment cubeSegment, String str) throws Exception {
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
    }

    public static void modifySparkHadoopConfiguration(SparkContext sparkContext, KylinConfig kylinConfig) throws Exception {
        sparkContext.hadoopConfiguration().set("dfs.replication", kylinConfig.getCuboidDfsReplication());
        sparkContext.hadoopConfiguration().set("mapreduce.output.fileoutputformat.compress", "true");
        sparkContext.hadoopConfiguration().set("mapreduce.output.fileoutputformat.compress.type", "BLOCK");
        sparkContext.hadoopConfiguration().set("mapreduce.output.fileoutputformat.compress.codec", "org.apache.hadoop.io.compress.DefaultCodec");
    }

    public static JavaRDD<String[]> hiveRecordInputRDD(boolean z, JavaSparkContext javaSparkContext, String str, String str2) throws IOException {
        return (z && HadoopUtil.isSequenceDir(javaSparkContext.hadoopConfiguration(), new Path(str))) ? getSequenceFormatHiveInput(javaSparkContext, str) : getOtherFormatHiveInput(javaSparkContext, str2);
    }

    private static JavaRDD<String[]> getSequenceFormatHiveInput(JavaSparkContext javaSparkContext, String str) {
        return javaSparkContext.sequenceFile(str, BytesWritable.class, Text.class).values().map(new Function<Text, String[]>() { // from class: org.apache.kylin.engine.spark.SparkUtil.1
            public String[] call(Text text) throws Exception {
                return Bytes.toString(text.getBytes(), 0, text.getLength()).split(BatchConstants.SEQUENCE_FILE_DEFAULT_DELIMITER, -1);
            }
        });
    }

    private static JavaRDD<String[]> getOtherFormatHiveInput(JavaSparkContext javaSparkContext, String str) {
        return SparkSession.builder().sparkContext(HiveUtils.withHiveExternalCatalog(javaSparkContext.sc())).config(javaSparkContext.getConf()).enableHiveSupport().getOrCreate().table(str).javaRDD().map(new Function<Row, String[]>() { // from class: org.apache.kylin.engine.spark.SparkUtil.2
            public String[] call(Row row) throws Exception {
                String[] strArr = new String[row.size()];
                for (int i = 0; i < row.size(); i++) {
                    Object obj = row.get(i);
                    if (obj != null) {
                        strArr[i] = obj.toString();
                    } else {
                        strArr[i] = null;
                    }
                }
                return strArr;
            }
        });
    }

    public static Map<TblColRef, Dictionary<String>> getDictionaryMap(CubeSegment cubeSegment, String str, Configuration configuration) throws IOException {
        Map<TblColRef, Dictionary<String>> buildDictionaryMap = cubeSegment.buildDictionaryMap();
        String str2 = configuration.get(BatchConstants.ARG_SHRUNKEN_DICT_PATH);
        if (str2 == null) {
            return buildDictionaryMap;
        }
        FileSystem fileSystem = FileSystem.get(configuration);
        ShrunkenDictionary.StringValueSerializer stringValueSerializer = new ShrunkenDictionary.StringValueSerializer();
        for (TblColRef tblColRef : cubeSegment.getCubeDesc().getAllGlobalDictColumnsNeedBuilt()) {
            Path path = new Path(new Path(str2, tblColRef.getIdentity()), str);
            if (fileSystem.exists(path)) {
                FSDataInputStream open = fileSystem.open(path);
                Throwable th = null;
                try {
                    try {
                        ShrunkenDictionary shrunkenDictionary = new ShrunkenDictionary(stringValueSerializer);
                        shrunkenDictionary.readFields(open);
                        logger.info("Read Shrunken dictionary from {} success", path);
                        buildDictionaryMap.put(tblColRef, shrunkenDictionary);
                        if (open != null) {
                            if (0 != 0) {
                                try {
                                    open.close();
                                } catch (Throwable th2) {
                                    th.addSuppressed(th2);
                                }
                            } else {
                                open.close();
                            }
                        }
                    } catch (Throwable th3) {
                        if (open != null) {
                            if (th != null) {
                                try {
                                    open.close();
                                } catch (Throwable th4) {
                                    th.addSuppressed(th4);
                                }
                            } else {
                                open.close();
                            }
                        }
                        throw th3;
                    }
                } finally {
                }
            } else {
                logger.warn("Shrunken dictionary for column " + tblColRef.getIdentity() + " in split " + str + " does not exist!!!");
            }
        }
        return buildDictionaryMap;
    }
}
