package org.apache.hudi;

import java.util.List;
import org.apache.hudi.client.WriteStatus;
import org.apache.hudi.client.model.HoodieInternalRow;
import org.apache.hudi.common.config.TypedProperties;
import org.apache.hudi.common.data.HoodieData;
import org.apache.hudi.common.engine.TaskContextSupplier;
import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.model.HoodieRecordPayload;
import org.apache.hudi.common.util.ReflectionUtils;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.index.SparkHoodieIndexFactory;
import org.apache.hudi.keygen.BuiltinKeyGenerator;
import org.apache.hudi.keygen.SparkKeyGeneratorInterface;
import org.apache.hudi.table.BulkInsertPartitioner;
import org.apache.hudi.table.HoodieTable;
import org.apache.hudi.table.action.commit.BulkInsertDataInternalWriterHelper;
import org.apache.spark.internal.Logging;
import org.apache.spark.rdd.RDD;
import org.apache.spark.rdd.RDD$;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.HoodieUnsafeRowUtils;
import org.apache.spark.sql.HoodieUnsafeRowUtils$;
import org.apache.spark.sql.HoodieUnsafeUtils$;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.catalyst.InternalRow;
import org.apache.spark.sql.catalyst.expressions.Alias;
import org.apache.spark.sql.catalyst.expressions.Alias$;
import org.apache.spark.sql.catalyst.expressions.Literal;
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan;
import org.apache.spark.sql.catalyst.plans.logical.Project;
import org.apache.spark.sql.types.StringType$;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructField$;
import org.apache.spark.sql.types.StructType;
import org.apache.spark.sql.types.StructType$;
import org.apache.spark.unsafe.types.UTF8String;
import org.slf4j.Logger;
import scala.Function0;
import scala.Predef$;
import scala.Tuple2;
import scala.collection.IterableLike;
import scala.collection.JavaConverters$;
import scala.collection.Seq;
import scala.collection.Seq$;
import scala.collection.TraversableLike;
import scala.collection.immutable.Set;
import scala.collection.immutable.StringOps;
import scala.collection.mutable.ArrayOps;
import scala.collection.mutable.Buffer$;
import scala.math.Ordering$String$;
import scala.reflect.ClassTag$;
import scala.runtime.BoxedUnit;
import scala.runtime.BoxesRunTime;

/* compiled from: HoodieDatasetBulkInsertHelper.scala */
/* loaded from: input_file:org/apache/hudi/HoodieDatasetBulkInsertHelper$.class */
public final class HoodieDatasetBulkInsertHelper$ implements Logging {
    public static HoodieDatasetBulkInsertHelper$ MODULE$;
    private transient Logger org$apache$spark$internal$Logging$$log_;

    static {
        new HoodieDatasetBulkInsertHelper$();
    }

    public String logName() {
        return Logging.logName$(this);
    }

    public Logger log() {
        return Logging.log$(this);
    }

    public void logInfo(Function0<String> function0) {
        Logging.logInfo$(this, function0);
    }

    public void logDebug(Function0<String> function0) {
        Logging.logDebug$(this, function0);
    }

    public void logTrace(Function0<String> function0) {
        Logging.logTrace$(this, function0);
    }

    public void logWarning(Function0<String> function0) {
        Logging.logWarning$(this, function0);
    }

    public void logError(Function0<String> function0) {
        Logging.logError$(this, function0);
    }

    public void logInfo(Function0<String> function0, Throwable th) {
        Logging.logInfo$(this, function0, th);
    }

    public void logDebug(Function0<String> function0, Throwable th) {
        Logging.logDebug$(this, function0, th);
    }

    public void logTrace(Function0<String> function0, Throwable th) {
        Logging.logTrace$(this, function0, th);
    }

    public void logWarning(Function0<String> function0, Throwable th) {
        Logging.logWarning$(this, function0, th);
    }

    public void logError(Function0<String> function0, Throwable th) {
        Logging.logError$(this, function0, th);
    }

    public boolean isTraceEnabled() {
        return Logging.isTraceEnabled$(this);
    }

    public void initializeLogIfNecessary(boolean z) {
        Logging.initializeLogIfNecessary$(this, z);
    }

    public boolean initializeLogIfNecessary(boolean z, boolean z2) {
        return Logging.initializeLogIfNecessary$(this, z, z2);
    }

    public boolean initializeLogIfNecessary$default$2() {
        return Logging.initializeLogIfNecessary$default$2$(this);
    }

    public void initializeForcefully(boolean z, boolean z2) {
        Logging.initializeForcefully$(this, z, z2);
    }

    public Logger org$apache$spark$internal$Logging$$log_() {
        return this.org$apache$spark$internal$Logging$$log_;
    }

    public void org$apache$spark$internal$Logging$$log__$eq(Logger logger) {
        this.org$apache$spark$internal$Logging$$log_ = logger;
    }

    public Dataset<Row> prepareForBulkInsert(Dataset<Row> dataset, HoodieWriteConfig hoodieWriteConfig, BulkInsertPartitioner<Dataset<Row>> bulkInsertPartitioner, boolean z) {
        Dataset<Row> createDataFrameFrom;
        boolean populateMetaFields = hoodieWriteConfig.populateMetaFields();
        StructType schema = dataset.schema();
        Seq apply = Seq$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new StructField[]{new StructField(HoodieRecord.COMMIT_TIME_METADATA_FIELD, StringType$.MODULE$, StructField$.MODULE$.apply$default$3(), StructField$.MODULE$.apply$default$4()), new StructField(HoodieRecord.COMMIT_SEQNO_METADATA_FIELD, StringType$.MODULE$, StructField$.MODULE$.apply$default$3(), StructField$.MODULE$.apply$default$4()), new StructField(HoodieRecord.RECORD_KEY_METADATA_FIELD, StringType$.MODULE$, StructField$.MODULE$.apply$default$3(), StructField$.MODULE$.apply$default$4()), new StructField(HoodieRecord.PARTITION_PATH_METADATA_FIELD, StringType$.MODULE$, StructField$.MODULE$.apply$default$3(), StructField$.MODULE$.apply$default$4()), new StructField(HoodieRecord.FILENAME_METADATA_FIELD, StringType$.MODULE$, StructField$.MODULE$.apply$default$3(), StructField$.MODULE$.apply$default$4())}));
        StructType apply2 = StructType$.MODULE$.apply((Seq) apply.$plus$plus(new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps(schema.fields())), Seq$.MODULE$.canBuildFrom()));
        if (populateMetaFields) {
            String stringOrThrow = hoodieWriteConfig.getStringOrThrow(HoodieWriteConfig.KEYGENERATOR_CLASS_NAME, "Key-generator class name is required");
            RDD<InternalRow> mapPartitions = dataset.queryExecution().toRdd().mapPartitions(iterator -> {
                SparkKeyGeneratorInterface sparkKeyGeneratorInterface = (SparkKeyGeneratorInterface) ReflectionUtils.loadClass(stringOrThrow, new TypedProperties(hoodieWriteConfig.getProps()));
                return iterator.map(internalRow -> {
                    return new HoodieInternalRow(UTF8String.EMPTY_UTF8, UTF8String.EMPTY_UTF8, sparkKeyGeneratorInterface.getRecordKey(internalRow, schema), sparkKeyGeneratorInterface.getPartitionPath(internalRow, schema), UTF8String.EMPTY_UTF8, internalRow, false);
                });
            }, dataset.queryExecution().toRdd().mapPartitions$default$2(), ClassTag$.MODULE$.apply(InternalRow.class));
            createDataFrameFrom = HoodieUnsafeUtils$.MODULE$.createDataFrameFromRDD(dataset.sparkSession(), hoodieWriteConfig.shouldCombineBeforeInsert() ? dedupeRows(mapPartitions, apply2, hoodieWriteConfig.getPreCombineField(), SparkHoodieIndexFactory.isGlobalIndex(hoodieWriteConfig)) : mapPartitions, apply2);
        } else {
            LogicalPlan logical = dataset.queryExecution().logical();
            createDataFrameFrom = HoodieUnsafeUtils$.MODULE$.createDataFrameFrom(dataset.sparkSession(), new Project((Seq) ((Seq) apply.map(structField -> {
                Literal literal = new Literal(UTF8String.EMPTY_UTF8, StringType$.MODULE$);
                String name = structField.name();
                return new Alias(literal, name, Alias$.MODULE$.apply$default$3(literal, name), Alias$.MODULE$.apply$default$4(literal, name), Alias$.MODULE$.apply$default$5(literal, name), Alias$.MODULE$.apply$default$6(literal, name));
            }, Seq$.MODULE$.canBuildFrom())).$plus$plus(logical.output(), Seq$.MODULE$.canBuildFrom()), logical));
        }
        Dataset<Row> dataset2 = createDataFrameFrom;
        return bulkInsertPartitioner.repartitionRecords(z ? dropPartitionColumns(dataset2, hoodieWriteConfig) : dataset2, hoodieWriteConfig.getBulkInsertShuffleParallelism());
    }

    public HoodieData<WriteStatus> bulkInsert(Dataset<Row> dataset, String str, HoodieTable<? extends HoodieRecordPayload<? extends HoodieRecordPayload<?>>, ?, ?, ?> hoodieTable, HoodieWriteConfig hoodieWriteConfig, BulkInsertPartitioner<Dataset<Row>> bulkInsertPartitioner, int i, boolean z) {
        Dataset<Row> repartitionRecords = bulkInsertPartitioner.repartitionRecords(dataset, i);
        boolean arePartitionRecordsSorted = bulkInsertPartitioner.arePartitionRecordsSorted();
        StructType schema = dataset.schema();
        return hoodieTable.getContext().parallelize((List) JavaConverters$.MODULE$.seqAsJavaListConverter(new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((WriteStatus[]) repartitionRecords.queryExecution().toRdd().mapPartitions(iterator -> {
            TaskContextSupplier taskContextSupplier = hoodieTable.getTaskContextSupplier();
            BulkInsertDataInternalWriterHelper bulkInsertDataInternalWriterHelper = new BulkInsertDataInternalWriterHelper(hoodieTable, hoodieWriteConfig, str, Predef$.MODULE$.Integer2int(taskContextSupplier.getPartitionIdSupplier().get()), Predef$.MODULE$.Integer2int(taskContextSupplier.getStageIdSupplier().get()), Predef$.MODULE$.Long2long(taskContextSupplier.getAttemptIdSupplier().get()), schema, hoodieWriteConfig.populateMetaFields(), arePartitionRecordsSorted, z);
            try {
                try {
                    iterator.foreach(internalRow -> {
                        bulkInsertDataInternalWriterHelper.write(internalRow);
                        return BoxedUnit.UNIT;
                    });
                    bulkInsertDataInternalWriterHelper.close();
                    return ((IterableLike) ((TraversableLike) JavaConverters$.MODULE$.asScalaBufferConverter(bulkInsertDataInternalWriterHelper.getWriteStatuses()).asScala()).map(hoodieInternalWriteStatus -> {
                        return hoodieInternalWriteStatus.toWriteStatus();
                    }, Buffer$.MODULE$.canBuildFrom())).iterator();
                } finally {
                }
            } catch (Throwable th) {
                bulkInsertDataInternalWriterHelper.close();
                throw th;
            }
        }, repartitionRecords.queryExecution().toRdd().mapPartitions$default$2(), ClassTag$.MODULE$.apply(WriteStatus.class)).collect())).toList()).asJava());
    }

    private RDD<InternalRow> dedupeRows(RDD<InternalRow> rdd, StructType structType, String str, boolean z) {
        int fieldIndex = structType.fieldIndex(HoodieRecord.RECORD_KEY_METADATA_FIELD);
        int fieldIndex2 = structType.fieldIndex(HoodieRecord.PARTITION_PATH_METADATA_FIELD);
        HoodieUnsafeRowUtils.NestedFieldPath composeNestedFieldPath = HoodieUnsafeRowUtils$.MODULE$.composeNestedFieldPath(structType, str);
        return RDD$.MODULE$.rddToPairRDDFunctions(RDD$.MODULE$.rddToPairRDDFunctions(rdd.map(internalRow -> {
            String sb;
            if (z) {
                sb = internalRow.getString(fieldIndex);
            } else {
                String string = internalRow.getString(fieldIndex2);
                sb = new StringBuilder(1).append(string).append(":").append(internalRow.getString(fieldIndex)).toString();
            }
            return new Tuple2(sb, internalRow.copy());
        }, ClassTag$.MODULE$.apply(Tuple2.class)), ClassTag$.MODULE$.apply(String.class), ClassTag$.MODULE$.apply(InternalRow.class), Ordering$String$.MODULE$).reduceByKey((internalRow2, internalRow3) -> {
            return ((Comparable) HoodieUnsafeRowUtils$.MODULE$.getNestedInternalRowValue(internalRow2, composeNestedFieldPath)).compareTo((Comparable) HoodieUnsafeRowUtils$.MODULE$.getNestedInternalRowValue(internalRow3, composeNestedFieldPath)) >= 0 ? internalRow2 : internalRow3;
        }), ClassTag$.MODULE$.apply(String.class), ClassTag$.MODULE$.apply(InternalRow.class), Ordering$String$.MODULE$).values();
    }

    private Dataset<Row> dropPartitionColumns(Dataset<Row> dataset, HoodieWriteConfig hoodieWriteConfig) {
        Set set = getPartitionPathFields(hoodieWriteConfig).toSet();
        Set set2 = (Set) set.filter(str -> {
            return BoxesRunTime.boxToBoolean($anonfun$dropPartitionColumns$1(str));
        });
        if (set2.nonEmpty()) {
            logWarning(() -> {
                return new StringBuilder(43).append("Can not drop nested partition path fields: ").append(set2).toString();
            });
        }
        return dataset.drop(set.$minus$minus(set2).toSeq());
    }

    private Seq<String> getPartitionPathFields(HoodieWriteConfig hoodieWriteConfig) {
        return (Seq) JavaConverters$.MODULE$.asScalaBufferConverter(((BuiltinKeyGenerator) ReflectionUtils.loadClass(hoodieWriteConfig.getString(HoodieWriteConfig.KEYGENERATOR_CLASS_NAME), new TypedProperties(hoodieWriteConfig.getProps()))).getPartitionPathFields()).asScala();
    }

    public static final /* synthetic */ boolean $anonfun$dropPartitionColumns$1(String str) {
        return new StringOps(Predef$.MODULE$.augmentString(str)).contains(BoxesRunTime.boxToCharacter('.'));
    }

    private HoodieDatasetBulkInsertHelper$() {
        MODULE$ = this;
        Logging.$init$(this);
    }
}
