package org.apache.kylin.engine.spark.builder;

import com.esotericsoftware.kryo.Kryo;
import com.esotericsoftware.kryo.io.Output;
import it.unimi.dsi.fastutil.objects.Object2LongOpenHashMap;
import java.util.Set;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.kylin.engine.spark.job.NSparkCubingUtil;
import org.apache.kylin.engine.spark.metadata.ColumnDesc;
import org.apache.kylin.engine.spark.metadata.SegmentInfo;
import org.apache.spark.dict.NGlobalDictionary;
import org.apache.spark.internal.Logging;
import org.apache.spark.sql.Column;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.KylinFunctions$;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.functions$;
import org.apache.spark.sql.types.StringType$;
import org.apache.spark.sql.types.StructType;
import org.apache.spark.utils.SparkVersionUtils$;
import org.slf4j.Logger;
import scala.Function0;
import scala.Predef$;
import scala.collection.IterableLike;
import scala.collection.JavaConverters$;
import scala.collection.Seq;
import scala.collection.Seq$;
import scala.collection.TraversableLike;
import scala.collection.immutable.List;
import scala.collection.immutable.List$;
import scala.collection.immutable.StringOps;
import scala.collection.mutable.ArrayOps;
import scala.reflect.ClassTag$;
import scala.runtime.BoxedUnit;
import scala.runtime.BoxesRunTime;
import scala.runtime.IntRef;
import scala.runtime.ObjectRef;

/* compiled from: CubeTableEncoder.scala */
/* loaded from: input_file:WEB-INF/lib/kylin-spark-engine-4.0.2.jar:org/apache/kylin/engine/spark/builder/CubeTableEncoder$.class */
public final class CubeTableEncoder$ implements Logging {
    public static CubeTableEncoder$ MODULE$;
    private transient Logger org$apache$spark$internal$Logging$$log_;

    static {
        new CubeTableEncoder$();
    }

    public String logName() {
        return Logging.logName$(this);
    }

    public Logger log() {
        return Logging.log$(this);
    }

    public void logInfo(Function0<String> function0) {
        Logging.logInfo$(this, function0);
    }

    public void logDebug(Function0<String> function0) {
        Logging.logDebug$(this, function0);
    }

    public void logTrace(Function0<String> function0) {
        Logging.logTrace$(this, function0);
    }

    public void logWarning(Function0<String> function0) {
        Logging.logWarning$(this, function0);
    }

    public void logError(Function0<String> function0) {
        Logging.logError$(this, function0);
    }

    public void logInfo(Function0<String> function0, Throwable th) {
        Logging.logInfo$(this, function0, th);
    }

    public void logDebug(Function0<String> function0, Throwable th) {
        Logging.logDebug$(this, function0, th);
    }

    public void logTrace(Function0<String> function0, Throwable th) {
        Logging.logTrace$(this, function0, th);
    }

    public void logWarning(Function0<String> function0, Throwable th) {
        Logging.logWarning$(this, function0, th);
    }

    public void logError(Function0<String> function0, Throwable th) {
        Logging.logError$(this, function0, th);
    }

    public boolean isTraceEnabled() {
        return Logging.isTraceEnabled$(this);
    }

    public void initializeLogIfNecessary(boolean z) {
        Logging.initializeLogIfNecessary$(this, z);
    }

    public boolean initializeLogIfNecessary(boolean z, boolean z2) {
        return Logging.initializeLogIfNecessary$(this, z, z2);
    }

    public boolean initializeLogIfNecessary$default$2() {
        return Logging.initializeLogIfNecessary$default$2$(this);
    }

    public void initializeForcefully(boolean z, boolean z2) {
        Logging.initializeForcefully$(this, z, z2);
    }

    public Logger org$apache$spark$internal$Logging$$log_() {
        return this.org$apache$spark$internal$Logging$$log_;
    }

    public void org$apache$spark$internal$Logging$$log__$eq(Logger logger) {
        this.org$apache$spark$internal$Logging$$log_ = logger;
    }

    /* JADX WARN: Type inference failed for: r1v19, types: [org.apache.spark.sql.Dataset, T] */
    public Dataset<Row> encodeTable(Dataset<Row> dataset, SegmentInfo segmentInfo, Set<ColumnDesc> set, String str) {
        if (SparkVersionUtils$.MODULE$.isLessThanSparkVersion("2.4", true)) {
            Predef$.MODULE$.m9292assert(!new StringOps(Predef$.MODULE$.augmentString(dataset.sparkSession().conf().get("spark.sql.adaptive.enabled", "false"))).toBoolean(), () -> {
                return "Parameter 'spark.sql.adaptive.enabled' must be false when encode tables.";
            });
        }
        StructType schema = dataset.schema();
        ObjectRef create = ObjectRef.create(dataset);
        dataset.sparkSession().sparkContext().setJobDescription("Encode count source data.");
        long count = dataset.count() / segmentInfo.kylinconf().getGlobalDictV2ThresholdBucketSize();
        IntRef create2 = IntRef.create(0);
        ((IterableLike) JavaConverters$.MODULE$.asScalaSetConverter(set).asScala()).foreach(columnDesc -> {
            $anonfun$encodeTable$2(segmentInfo, count, create2, schema, create, dataset, str, columnDesc);
            return BoxedUnit.UNIT;
        });
        dataset.sparkSession().sparkContext().setJobDescription((String) null);
        if (!set.isEmpty() && segmentInfo.kylinconf().rePartitionEncodedDatasetWithRowKey()) {
            Seq seq = (Seq) ((Dataset) create.elem).schema().map(structField -> {
                return structField.name();
            }, Seq$.MODULE$.canBuildFrom());
            List list = (List) ((List) ((TraversableLike) segmentInfo.allRowKeyCols().map(columnDesc2 -> {
                return NSparkCubingUtil.convertFromDot(columnDesc2.identity());
            }, List$.MODULE$.canBuildFrom())).filter(obj -> {
                return BoxesRunTime.boxToBoolean(seq.contains(obj));
            })).map(str2 -> {
                return functions$.MODULE$.col(str2);
            }, List$.MODULE$.canBuildFrom());
            if (segmentInfo.kylinconf().getRepartitionNumAfterEncode() > 0) {
                create2.elem = segmentInfo.kylinconf().getRepartitionNumAfterEncode();
            }
            logInfo(() -> {
                return new StringBuilder(61).append("repartition encoded dataset to ").append(create2.elem).append(" partitions to avoid data skew").toString();
            });
            create.elem = ((Dataset) create.elem).repartition(create2.elem, Predef$.MODULE$.wrapRefArray((Object[]) list.toArray(ClassTag$.MODULE$.apply(Column.class))));
        }
        return (Dataset) create.elem;
    }

    public static final /* synthetic */ long $anonfun$encodeTable$4(Object2LongOpenHashMap object2LongOpenHashMap, Row row) {
        return object2LongOpenHashMap.put((Object2LongOpenHashMap) row.getString(0), row.getLong(1));
    }

    /* JADX WARN: Type inference failed for: r1v26, types: [org.apache.spark.sql.Dataset, T] */
    /* JADX WARN: Type inference failed for: r1v61, types: [org.apache.spark.sql.Dataset, T] */
    public static final /* synthetic */ void $anonfun$encodeTable$2(SegmentInfo segmentInfo, long j, IntRef intRef, StructType structType, ObjectRef objectRef, Dataset dataset, String str, ColumnDesc columnDesc) {
        int bucketSizeOrDefault = new NGlobalDictionary(segmentInfo.project(), columnDesc.tableAliasName(), columnDesc.columnName(), segmentInfo.kylinconf().getHdfsWorkingDirectory()).getBucketSizeOrDefault(segmentInfo.kylinconf().getGlobalDictV2MinHashPartitions());
        int i = (int) (((j / bucketSizeOrDefault) + 1) * bucketSizeOrDefault);
        if (i > intRef.elem) {
            intRef.elem = i;
        }
        String convertFromDot = NSparkCubingUtil.convertFromDot(columnDesc.identity());
        int fieldIndex = structType.fieldIndex(convertFromDot);
        String mkString = new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps(new String[]{segmentInfo.project(), columnDesc.tableAliasName(), columnDesc.columnName(), segmentInfo.kylinconf().getHdfsWorkingDirectory()})).mkString("_0_DOT_0_");
        String concat = structType.apply(fieldIndex).name().concat(CubeBuilderHelper$.MODULE$.ENCODE_SUFFIX());
        Column as = KylinFunctions$.MODULE$.dict_encode(functions$.MODULE$.col(convertFromDot).cast(StringType$.MODULE$), functions$.MODULE$.lit(mkString), functions$.MODULE$.lit(BoxesRunTime.boxToInteger(bucketSizeOrDefault)).cast(StringType$.MODULE$)).as(concat);
        Seq seq = (Seq) ((Dataset) objectRef.elem).schema().map(structField -> {
            return functions$.MODULE$.col(structField.name());
        }, Seq$.MODULE$.canBuildFrom());
        boolean z = false;
        if (segmentInfo.kylinconf().detectDataSkewInDictEncodingEnabled()) {
            Column cast = functions$.MODULE$.col(convertFromDot).cast(StringType$.MODULE$);
            Dataset cache = dataset.select(Predef$.MODULE$.wrapRefArray(new Column[]{cast})).sample(segmentInfo.kylinconf().sampleRateInEncodingSkewDetection()).cache();
            long count = cache.count();
            Path path = new Path(new StringBuilder(14).append(segmentInfo.kylinconf().getJobTmpDir(segmentInfo.project())).append("/").append(str).append("/skewed_data/").append(columnDesc.identity()).toString());
            Object2LongOpenHashMap object2LongOpenHashMap = new Object2LongOpenHashMap();
            new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[]) cache.groupBy(convertFromDot, Predef$.MODULE$.wrapRefArray(new String[0])).agg(functions$.MODULE$.count(functions$.MODULE$.lit(BoxesRunTime.boxToInteger(1))).alias("count_value"), Predef$.MODULE$.wrapRefArray(new Column[0])).filter(functions$.MODULE$.col("count_value").$greater(BoxesRunTime.boxToDouble(count * segmentInfo.kylinconf().skewPercentageThreshHold()))).repartition(i, Predef$.MODULE$.wrapRefArray(new Column[]{cast})).select((Seq) scala.collection.mutable.Seq$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new Column[]{cast, as}))).collect())).foreach(row -> {
                return BoxesRunTime.boxToLong($anonfun$encodeTable$4(object2LongOpenHashMap, row));
            });
            cache.unpersist();
            if (object2LongOpenHashMap.size() > 0) {
                z = true;
                Kryo kryo = new Kryo();
                FileSystem fileSystem = path.getFileSystem(new Configuration());
                if (fileSystem.exists(path)) {
                    BoxesRunTime.boxToBoolean(fileSystem.delete(path, true));
                } else {
                    BoxedUnit boxedUnit = BoxedUnit.UNIT;
                }
                Output output = new Output(fileSystem.create(path));
                kryo.writeClassAndObject(output, object2LongOpenHashMap);
                output.close();
                Column alias = KylinFunctions$.MODULE$.scatter_skew_data(cast, functions$.MODULE$.lit(path.toString())).alias(new StringBuilder(18).append("scatter_skew_data_").append(columnDesc.columnName()).toString());
                as = KylinFunctions$.MODULE$.dict_encode(functions$.MODULE$.col(convertFromDot).cast(StringType$.MODULE$), functions$.MODULE$.lit(new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps(new String[]{segmentInfo.project(), columnDesc.tableAliasName(), columnDesc.columnName(), segmentInfo.kylinconf().getHdfsWorkingDirectory(), path.toString()})).mkString("_0_DOT_0_")), functions$.MODULE$.lit(BoxesRunTime.boxToInteger(bucketSizeOrDefault)).cast(StringType$.MODULE$)).alias(concat);
                objectRef.elem = ((Dataset) objectRef.elem).select((Seq) seq.$plus$plus(scala.collection.mutable.Seq$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new Column[]{alias})), Seq$.MODULE$.canBuildFrom())).repartition(i, Predef$.MODULE$.wrapRefArray(new Column[]{functions$.MODULE$.col(new StringBuilder(18).append("scatter_skew_data_").append(columnDesc.columnName()).toString())})).select((Seq) seq.$plus$plus(scala.collection.mutable.Seq$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new Column[]{as})), Seq$.MODULE$.canBuildFrom()));
            }
        }
        if (z) {
            return;
        }
        objectRef.elem = ((Dataset) objectRef.elem).repartition(i, Predef$.MODULE$.wrapRefArray(new Column[]{functions$.MODULE$.col(convertFromDot).cast(StringType$.MODULE$)})).select((Seq) seq.$plus$plus(scala.collection.mutable.Seq$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new Column[]{as})), Seq$.MODULE$.canBuildFrom()));
    }

    private CubeTableEncoder$() {
        MODULE$ = this;
        Logging.$init$(this);
    }
}
