package datafu.spark;

import org.apache.spark.sql.Column;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.datafu.types.SparkOverwriteUDAFs$;
import org.apache.spark.sql.expressions.Window$;
import org.apache.spark.sql.functions$;
import org.apache.spark.sql.types.LongType$;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructType;
import org.apache.spark.sql.types.StructType$;
import scala.Array$;
import scala.MatchError;
import scala.None$;
import scala.Option;
import scala.Predef$;
import scala.StringContext;
import scala.Tuple2;
import scala.collection.GenTraversableOnce;
import scala.collection.Seq;
import scala.collection.Seq$;
import scala.collection.SeqLike;
import scala.collection.TraversableLike;
import scala.collection.TraversableOnce;
import scala.collection.immutable.$colon;
import scala.collection.immutable.IndexedSeq;
import scala.collection.immutable.IndexedSeq$;
import scala.collection.immutable.Nil$;
import scala.collection.immutable.Set;
import scala.collection.mutable.ArrayOps;
import scala.reflect.ClassTag$;
import scala.reflect.api.Mirror;
import scala.reflect.api.TypeCreator;
import scala.reflect.api.Types;
import scala.reflect.api.Universe;
import scala.reflect.runtime.package$;
import scala.runtime.BoxesRunTime;
import scala.runtime.RichInt$;
import scala.runtime.RichLong;

/* compiled from: SparkDFUtils.scala */
/* loaded from: input_file:datafu/spark/SparkDFUtils$.class */
public final class SparkDFUtils$ {
    public static SparkDFUtils$ MODULE$;

    static {
        new SparkDFUtils$();
    }

    public Dataset<Row> dedupWithOrder(Dataset<Row> dataset, Column column, Seq<Column> seq) {
        return dedupTopN(dataset, 1, column, seq);
    }

    public Dataset<Row> dedupTopN(Dataset<Row> dataset, int i, Column column, Seq<Column> seq) {
        return dataset.withColumn("rn", functions$.MODULE$.row_number().over(Window$.MODULE$.partitionBy(Predef$.MODULE$.wrapRefArray(new Column[]{column})).orderBy(seq))).where(functions$.MODULE$.col("rn").$less$eq(BoxesRunTime.boxToInteger(i))).drop("rn");
    }

    public Dataset<Row> dedupWithCombiner(Dataset<Row> dataset, Seq<Column> seq, Seq<Column> seq2, boolean z, Seq<Column> seq3, Seq<String> seq4, boolean z2) {
        Nil$ nil$ = Nil$.MODULE$;
        Dataset drop = ((seq4 != null ? !seq4.equals(nil$) : nil$ != null) ? z2 ? dataset.withColumn("sort_by_column", functions$.MODULE$.struct(seq2)).select("sort_by_column", seq4) : dataset.select(Predef$.MODULE$.wrapRefArray((Object[]) new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[]) new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps(dataset.columns())).filter(str -> {
            return BoxesRunTime.boxToBoolean($anonfun$dedupWithCombiner$1(seq4, str));
        }))).map(str2 -> {
            return new Column(str2);
        }, Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(Column.class))))).withColumn("sort_by_column", functions$.MODULE$.struct(seq2)) : dataset.withColumn("sort_by_column", functions$.MODULE$.struct(seq2))).groupBy(seq).agg(((Column) (z ? (column, column2) -> {
            return SparkOverwriteUDAFs$.MODULE$.maxValueByKey(column, column2);
        } : (column3, column4) -> {
            return SparkOverwriteUDAFs$.MODULE$.minValueByKey(column3, column4);
        }).apply(functions$.MODULE$.expr("sort_by_column"), functions$.MODULE$.expr("struct(sort_by_column, *)"))).as("h1"), Predef$.MODULE$.wrapRefArray(new Column[]{functions$.MODULE$.struct((Seq) seq3.$plus$colon(functions$.MODULE$.lit(BoxesRunTime.boxToInteger(1)).as("lit_placeholder_col"), Seq$.MODULE$.canBuildFrom())).as("h2")})).selectExpr(Predef$.MODULE$.wrapRefArray(new String[]{"h1.*", "h2.*"})).drop("lit_placeholder_col").drop("sort_by_column");
        return drop.sparkSession().createDataFrame(drop.rdd(), StructType$.MODULE$.apply(((TraversableOnce) ((TraversableLike) dataset.schema().$plus$plus((GenTraversableOnce) drop.schema().filter(structField -> {
            return BoxesRunTime.boxToBoolean($anonfun$dedupWithCombiner$5(dataset, structField));
        }), Seq$.MODULE$.canBuildFrom())).filter(structField2 -> {
            return BoxesRunTime.boxToBoolean($anonfun$dedupWithCombiner$7(seq4, z2, structField2));
        })).toList()));
    }

    public boolean dedupWithCombiner$default$4() {
        return true;
    }

    public Seq<Column> dedupWithCombiner$default$5() {
        return Nil$.MODULE$;
    }

    public Seq<String> dedupWithCombiner$default$6() {
        return Nil$.MODULE$;
    }

    public boolean dedupWithCombiner$default$7() {
        return true;
    }

    public Dataset<Row> flatten(Dataset<Row> dataset, String str) {
        Predef$.MODULE$.assert(dataset.schema().apply(str).dataType() instanceof StructType, () -> {
            return new StringBuilder(30).append("Column ").append(str).append(" must be of type Struct").toString();
        });
        Set set = new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[]) new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps(dataset.schema().fields())).map(structField -> {
            return structField.name();
        }, Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(String.class))))).toSet();
        return dataset.selectExpr(Predef$.MODULE$.wrapRefArray((Object[]) new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((String[]) new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[]) new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps(dataset.schema().apply(str).dataType().fields())).filter(structField2 -> {
            return BoxesRunTime.boxToBoolean($anonfun$flatten$3(set, structField2));
        }))).map(structField3 -> {
            return new StringBuilder(5).append("`").append(str).append("`.`").append(structField3.name()).append("`").toString();
        }, Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(String.class))))).$plus$colon("*", ClassTag$.MODULE$.apply(String.class)))).drop(str);
    }

    public Dataset<Row> changeSchema(Dataset<Row> dataset, Seq<String> seq) {
        return dataset.select(Predef$.MODULE$.wrapRefArray((Object[]) new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[]) new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps(dataset.columns())).zip(seq, Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(Tuple2.class))))).map(tuple2 -> {
            if (tuple2 != null) {
                String str = (String) tuple2._1();
                String str2 = (String) tuple2._2();
                if (str != null && str2 != null) {
                    return functions$.MODULE$.col(str).as(str2);
                }
            }
            throw new MatchError(tuple2);
        }, Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(Column.class)))));
    }

    public Dataset<Row> joinSkewed(Dataset<Row> dataset, Dataset<Row> dataset2, Column column, int i, String str) {
        SparkSession sparkSession = dataset.sparkSession();
        return dataset.withColumn("randLeft", functions$.MODULE$.ceil(functions$.MODULE$.rand().$times(BoxesRunTime.boxToInteger(i)))).join(dataset2.crossJoin(sparkSession.implicits().localSeqToDatasetHolder(RichInt$.MODULE$.to$extension0(Predef$.MODULE$.intWrapper(1), i), sparkSession.implicits().newIntEncoder()).toDF(Predef$.MODULE$.wrapRefArray(new String[]{"shard"}))), column.and(sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"randLeft"}))).$(Nil$.MODULE$).$eq$eq$eq(sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"shard"}))).$(Nil$.MODULE$))), str).drop(sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"randLeft"}))).$(Nil$.MODULE$)).drop(sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"shard"}))).$(Nil$.MODULE$));
    }

    public int joinSkewed$default$4() {
        return 10;
    }

    public String joinSkewed$default$5() {
        return "inner";
    }

    public Dataset<Row> broadcastJoinSkewed(Dataset<Row> dataset, Dataset<Row> dataset2, String str, int i, Option<Object> option, String str2) {
        SparkSession sparkSession = dataset.sparkSession();
        Dataset count = dataset2.groupBy(str, Predef$.MODULE$.wrapRefArray(new String[0])).count();
        Dataset cache = ((Dataset) option.map(obj -> {
            return $anonfun$broadcastJoinSkewed$1(count, sparkSession, BoxesRunTime.unboxToLong(obj));
        }).getOrElse(() -> {
            return count;
        })).sort(Predef$.MODULE$.wrapRefArray(new Column[]{sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"count"}))).$(Nil$.MODULE$).desc()})).limit(i).drop("count").withColumnRenamed(str, "skew_join_key").cache();
        Dataset drop = dataset.join(functions$.MODULE$.broadcast(cache), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"skew_join_key"}))).$(Nil$.MODULE$).$eq$eq$eq(functions$.MODULE$.col(str)), "left").withColumn("is_skewed_record", functions$.MODULE$.col("skew_join_key").isNotNull()).drop("skew_join_key");
        return drop.filter("not is_skewed_record").join(dataset2.join(functions$.MODULE$.broadcast(cache), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"skew_join_key"}))).$(Nil$.MODULE$).$eq$eq$eq(functions$.MODULE$.col(str)), "left").where("skew_join_key is null").drop("skew_join_key"), new $colon.colon(str, Nil$.MODULE$), str2).union(functions$.MODULE$.broadcast(drop.filter("is_skewed_record")).join(dataset2.join(functions$.MODULE$.broadcast(cache), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"skew_join_key"}))).$(Nil$.MODULE$).$eq$eq$eq(functions$.MODULE$.col(str))).drop("skew_join_key"), new $colon.colon(str, Nil$.MODULE$), str2)).drop(Predef$.MODULE$.wrapRefArray(new String[]{"is_skewed_record", "skew_join_key"}));
    }

    public Option<Object> broadcastJoinSkewed$default$5() {
        return None$.MODULE$;
    }

    public String broadcastJoinSkewed$default$6() {
        return "inner";
    }

    public Dataset<Row> joinWithRange(Dataset<Row> dataset, String str, Dataset<Row> dataset2, String str2, String str3, long j) {
        return joinWithRangeInternal(dataset, str, dataset2, str2, str3, j).drop(Predef$.MODULE$.wrapRefArray(new String[]{"range_start", "range_end", "decreased_range_single", "single", "decreased_single", "range_size"}));
    }

    private Dataset<Row> joinWithRangeInternal(Dataset<Row> dataset, String str, Dataset<Row> dataset2, String str2, String str3, long j) {
        return dataset.withColumn("single", functions$.MODULE$.floor(functions$.MODULE$.col(str).cast(LongType$.MODULE$))).withColumn("decreased_single", functions$.MODULE$.floor(functions$.MODULE$.col(str).cast(LongType$.MODULE$).$div(functions$.MODULE$.lit(BoxesRunTime.boxToLong(j))))).join(dataset2.withColumn("range_start", functions$.MODULE$.col(str2).cast(LongType$.MODULE$)).withColumn("range_end", functions$.MODULE$.col(str3).cast(LongType$.MODULE$)).withColumn("decreased_range_single", functions$.MODULE$.explode(functions$.MODULE$.udf((obj, obj2) -> {
            return $anonfun$joinWithRangeInternal$1(BoxesRunTime.unboxToLong(obj), BoxesRunTime.unboxToLong(obj2));
        }, package$.MODULE$.universe().TypeTag().apply(package$.MODULE$.universe().runtimeMirror(getClass().getClassLoader()), new TypeCreator() { // from class: datafu.spark.SparkDFUtils$$typecreator1$1
            public <U extends Universe> Types.TypeApi apply(Mirror<U> mirror) {
                Universe universe = mirror.universe();
                return universe.internal().reificationSupport().TypeRef(universe.internal().reificationSupport().ThisType(mirror.staticPackage("scala").asModule().moduleClass()), mirror.staticClass("scala.Array"), new $colon.colon(mirror.staticClass("scala.Long").asType().toTypeConstructor(), Nil$.MODULE$));
            }
        }), package$.MODULE$.universe().TypeTag().Long(), package$.MODULE$.universe().TypeTag().Long()).apply(Predef$.MODULE$.wrapRefArray(new Column[]{functions$.MODULE$.col("range_start").$div(functions$.MODULE$.lit(BoxesRunTime.boxToLong(j))), functions$.MODULE$.col("range_end").$div(functions$.MODULE$.lit(BoxesRunTime.boxToLong(j)))})))), functions$.MODULE$.col("decreased_single").$eq$eq$eq(functions$.MODULE$.col("decreased_range_single")), "left_outer").withColumn("range_size", functions$.MODULE$.expr("(range_end - range_start + 1)")).filter("single>=range_start and single<=range_end");
    }

    public Dataset<Row> joinWithRangeAndDedup(Dataset<Row> dataset, String str, Dataset<Row> dataset2, String str2, String str3, long j, boolean z) {
        Dataset<Row> joinWithRangeInternal = joinWithRangeInternal(dataset, str, dataset2, str2, str3, j);
        return (z ? dedupWithCombiner(joinWithRangeInternal, DataFrameOps$.MODULE$.columnToColumns(functions$.MODULE$.col(str)), DataFrameOps$.MODULE$.columnToColumns(functions$.MODULE$.struct("range_size", Predef$.MODULE$.wrapRefArray(new String[]{"range_start"}))), false, dedupWithCombiner$default$5(), dedupWithCombiner$default$6(), dedupWithCombiner$default$7()) : dedupWithCombiner(joinWithRangeInternal, DataFrameOps$.MODULE$.columnToColumns(functions$.MODULE$.col(str)), DataFrameOps$.MODULE$.columnToColumns(functions$.MODULE$.struct(Predef$.MODULE$.wrapRefArray(new Column[]{functions$.MODULE$.expr("-range_size"), functions$.MODULE$.col("range_start")}))), true, dedupWithCombiner$default$5(), dedupWithCombiner$default$6(), dedupWithCombiner$default$7())).drop(Predef$.MODULE$.wrapRefArray(new String[]{"range_start", "range_end", "decreased_range_single", "single", "decreased_single", "range_size"}));
    }

    public Dataset<Row> explodeArray(Dataset<Row> dataset, Column column, String str) {
        return dataset.select((Seq) ((IndexedSeq) RichInt$.MODULE$.until$extension0(Predef$.MODULE$.intWrapper(0), ((Row[]) dataset.agg(functions$.MODULE$.max(functions$.MODULE$.size(column)), Predef$.MODULE$.wrapRefArray(new Column[0])).collect())[0].getInt(0)).map(obj -> {
            return $anonfun$explodeArray$1(column, str, BoxesRunTime.unboxToInt(obj));
        }, IndexedSeq$.MODULE$.canBuildFrom())).$plus$colon(functions$.MODULE$.col("*"), IndexedSeq$.MODULE$.canBuildFrom()));
    }

    public Dataset<Row> dedupRandomN(Dataset<Row> dataset, Column column, int i) {
        return dataset.groupBy(Predef$.MODULE$.wrapRefArray(new Column[]{column})).agg(SparkOverwriteUDAFs$.MODULE$.collectLimitedList(functions$.MODULE$.expr("struct(*)"), i).as("list"), Predef$.MODULE$.wrapRefArray(new Column[0])).select(Predef$.MODULE$.wrapRefArray(new Column[]{column, functions$.MODULE$.expr("explode(list)")}));
    }

    public static final /* synthetic */ boolean $anonfun$dedupWithCombiner$1(Seq seq, String str) {
        return !seq.contains(str);
    }

    public static final /* synthetic */ boolean $anonfun$dedupWithCombiner$5(Dataset dataset, StructField structField) {
        return !((SeqLike) dataset.schema().map(structField2 -> {
            return structField2.name();
        }, Seq$.MODULE$.canBuildFrom())).contains(structField.name());
    }

    public static final /* synthetic */ boolean $anonfun$dedupWithCombiner$7(Seq seq, boolean z, StructField structField) {
        Nil$ nil$ = Nil$.MODULE$;
        if (seq != null ? !seq.equals(nil$) : nil$ != null) {
            if ((!z || !seq.contains(structField.name())) && (z || seq.contains(structField.name()))) {
                return false;
            }
        }
        return true;
    }

    public static final /* synthetic */ boolean $anonfun$flatten$3(Set set, StructField structField) {
        return !set.contains(structField.name());
    }

    public static final /* synthetic */ Dataset $anonfun$broadcastJoinSkewed$1(Dataset dataset, SparkSession sparkSession, long j) {
        return dataset.filter(sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"count"}))).$(Nil$.MODULE$).$greater$eq(BoxesRunTime.boxToLong(j)));
    }

    public static final /* synthetic */ long[] $anonfun$joinWithRangeInternal$1(long j, long j2) {
        return (long[]) new RichLong(Predef$.MODULE$.longWrapper(j)).to(BoxesRunTime.boxToLong(j2)).toArray(ClassTag$.MODULE$.Long());
    }

    public static final /* synthetic */ Column $anonfun$explodeArray$1(Column column, String str, int i) {
        return column.getItem(BoxesRunTime.boxToInteger(i)).alias(new StringBuilder(0).append(str).append(i).toString());
    }

    private SparkDFUtils$() {
        MODULE$ = this;
    }
}
