package com.datastax.data.prepare.spark.dataset;

import com.datastax.data.prepare.util.Consts;
import com.datastax.insight.annonation.InsightComponent;
import com.datastax.insight.annonation.InsightComponentArg;
import com.datastax.insight.spec.Operator;
import java.util.Map;
import org.apache.parquet.Strings;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.function.FlatMapFunction;
import org.apache.spark.api.java.function.MapFunction;
import org.apache.spark.api.java.function.MapPartitionsFunction;
import org.apache.spark.sql.Column;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoder;
import org.apache.spark.sql.Row;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import scala.Tuple2;

/* loaded from: input_file:com/datastax/data/prepare/spark/dataset/DataSetTransformation.class */
public class DataSetTransformation implements Operator {
    private static final Logger logger = LoggerFactory.getLogger(DataSetTransformation.class);

    protected static <T> Dataset<Row> join(Dataset<T> dataset, Dataset<T> dataset2, Column column, String str) {
        return dataset.join(dataset2, column, str);
    }

    protected static <T, U> Dataset<Tuple2<T, U>> joinWith(Dataset<T> dataset, Dataset<U> dataset2, Column column, String str) {
        return dataset.joinWith(dataset2, column, str);
    }

    @InsightComponent(name = "Join", description = "Join", type = "com.datastax.insight.dataprprocess.join", icon = "arrows", order = 50010501)
    public static <T> Dataset<Row> join(@InsightComponentArg(externalInput = true, name = "左数据集", description = "left dataset") Dataset<T> dataset, @InsightComponentArg(externalInput = true, name = "右数据集", description = "right dataset") Dataset<T> dataset2, @InsightComponentArg(name = "左列名", description = "keyLeft") String str, @InsightComponentArg(name = "右列名", description = "keyRight") String str2, @InsightComponentArg(name = "连接方法", description = "join方法", defaultValue = "innner", items = "inner;outer;left_outer;right_outer;left_semi") String str3) {
        if (dataset == null || dataset2 == null) {
            logger.info("left或者right数据集为空，返回left数据集");
            return dataset.toDF();
        }
        if (str3 == null || str3.length() == 0) {
            str3 = "inner";
        }
        return dataset.join(dataset2, dataset.col(str).equalTo(dataset2.col(str2)), str3);
    }

    @InsightComponent(name = "JoinMutil", description = "JoinMutil", type = "com.datastax.insight.dataprprocess.join", icon = "arrows")
    public static <T> Dataset<Row> joinMutil(@InsightComponentArg(externalInput = true, name = "左数据集", description = "left dataset") Dataset<T> dataset, @InsightComponentArg(externalInput = true, name = "右数据集", description = "right dataset") Dataset<T> dataset2, @InsightComponentArg(name = "左各列名", description = "左各列名，以分号隔开") String str, @InsightComponentArg(name = "右各列名", description = "左各列名，以分号隔开") String str2, @InsightComponentArg(name = "连接方法", description = "join方法", defaultValue = "innner", items = "inner;outer;left_outer;right_outer;left_semi") String str3) {
        if (dataset == null || dataset2 == null) {
            logger.info("left或者right数据集为空，返回left数据集");
            return dataset.toDF();
        }
        if (str3 == null || str3.length() == 0) {
            str3 = "inner";
        }
        Column column = null;
        String[] split = str.split(Consts.DELIMITER);
        String[] split2 = str2.split(Consts.DELIMITER);
        int i = 0;
        while (i < split.length) {
            column = i == 0 ? dataset.col(split[i]).equalTo(dataset2.col(split2[i])) : column.and(dataset.col(split[i]).equalTo(dataset2.col(split2[i])));
            i++;
        }
        return dataset.join(dataset2, column, str3);
    }

    @InsightComponent(name = "Split", description = "Split", type = "com.datastax.insight.dataprprocess.split1", icon = "arrows", order = 500501)
    public static <T> Dataset<T>[] split(@InsightComponentArg(externalInput = true, name = "数据集", description = "输入的DataSet") Dataset<T> dataset, @InsightComponentArg(name = "数据切分权重", description = "分割比例") String str) {
        if (dataset == null || Strings.isNullOrEmpty(str)) {
            logger.info("数据集为空或者weights为空");
            return new Dataset[]{dataset};
        }
        String[] split = str.split(Consts.DELIMITER);
        double[] dArr = new double[split.length];
        for (int i = 0; i < split.length; i++) {
            if (split[i].matches("\\d+\\.?\\d*")) {
                dArr[i] = Double.parseDouble(split[i]);
            }
        }
        return dataset.randomSplit(dArr);
    }

    @InsightComponent(name = "Split", description = "Split", type = "com.datastax.insight.dataprprocess.split", icon = "arrows", order = 500501)
    public static <T> Dataset<T>[] split(@InsightComponentArg(externalInput = true, name = "数据集", description = "输入的DataSet") Dataset<T> dataset, @InsightComponentArg(name = "数据切分权重", description = "分割比例") double[] dArr) {
        if (dataset != null && dArr.length != 0) {
            return dataset.randomSplit(dArr);
        }
        logger.info("数据集为空或者weights为空");
        return new Dataset[]{dataset};
    }

    @InsightComponent(name = "Union", description = "Union", type = "com.datastax.insight.dataprprocess.union", icon = "arrows", order = 50010502)
    public static <T> Dataset<T> union(@InsightComponentArg(externalInput = true, name = "左数据集", description = "left dataset") Dataset<T> dataset, @InsightComponentArg(externalInput = true, name = "右数据集", description = "right dataset") Dataset<T> dataset2) {
        if (dataset == null || dataset2 == null) {
            logger.info("数据集为空,返回left数据集");
            return dataset;
        }
        if (dataset.schema().fieldNames().length == dataset2.schema().fieldNames().length) {
            return dataset.union(dataset2);
        }
        logger.info("left和right数据集的列数不等，不能进行union操作，返回left数据集");
        return dataset;
    }

    protected static <T, U> Dataset<U> as(Dataset<T> dataset, Encoder<U> encoder) {
        return dataset.as(encoder);
    }

    @InsightComponent(name = "Alias", description = "Alias", type = "com.datastax.insight.dataprprocess.alias", icon = "arrows", order = 50010802)
    public static <T> Dataset<T> alias(@InsightComponentArg(externalInput = true, name = "数据集", description = "操作数据") Dataset<T> dataset, @InsightComponentArg(name = "别名", description = "alias") String str) {
        return dataset.as(str);
    }

    @InsightComponent(name = "排序", description = "Sort", type = "com.datastax.insight.dataprprocess.sort", icon = "arrows", order = 50010601)
    public static <T> Dataset<T> sort(@InsightComponentArg(externalInput = true, name = "数据集", description = "操作数据") Dataset<T> dataset, @InsightComponentArg(name = "类型", description = "排序类型", defaultValue = "sort", items = "sort;sortWithinPartitions;orderBy") String str, @InsightComponentArg(name = "列名", description = "排序列，用分号隔开") String str2, @InsightComponentArg(name = "排列类型", description = "设置列从大到小或者从小到大排列", defaultValue = "ASC", items = "DESC;ASC") String str3) {
        String[] split = str2.split(Consts.DELIMITER);
        if (split.length == 0) {
            logger.info("选择的列名为空，返回原数据集");
            return dataset;
        }
        if (str == null || str.length() == 0) {
            str = Consts.SORT;
        }
        boolean z = Consts.DESC.equals(str3);
        Column[] columnArr = new Column[split.length];
        for (int i = 0; i < split.length; i++) {
            columnArr[i] = z ? dataset.col(split[i]).desc_nulls_last() : dataset.col(split[i]).asc_nulls_last();
        }
        if (Consts.SORT.equals(str)) {
            return dataset.sort(columnArr);
        }
        if (Consts.SORT_WIRHINPARTITIONS.equals(str)) {
            return dataset.sortWithinPartitions(columnArr);
        }
        if (Consts.ORDERBY.equals(str)) {
            return dataset.orderBy(columnArr);
        }
        logger.info("type不在可选值内，返回原数据集");
        return dataset;
    }

    @InsightComponent(name = "Select", description = "Select", type = "com.datastax.insight.dataprprocess.select", icon = "arrows", order = 50010102)
    public static <T> Dataset<Row> select(@InsightComponentArg(externalInput = true, name = "数据集", description = "操作数据") Dataset<T> dataset, @InsightComponentArg(name = "类型", description = "选择的类型", defaultValue = "column", items = "column;expression") String str, @InsightComponentArg(name = "列名", description = "选择列，以分号隔开") String str2) {
        String[] a = com.datastax.data.prepare.util.c.a(str2.split(Consts.DELIMITER));
        if (a.length == 0 || dataset == null) {
            logger.info("选择的列名为空或者数据集为空，返回原数据集");
            return dataset.toDF();
        }
        if (str == null || str.length() == 0) {
            str = Consts.COLUMN;
        }
        if (!Consts.COLUMN.equals(str)) {
            if (Consts.EXPRESSION.equals(str)) {
                return dataset.selectExpr(a);
            }
            logger.info("type不在可选值内，返回原数据集");
            return dataset.toDF();
        }
        if (a.length == 1) {
            return dataset.select(a[0], new String[0]);
        }
        String[] strArr = new String[a.length - 1];
        System.arraycopy(a, 1, strArr, 0, a.length - 1);
        return dataset.select(a[0], strArr);
    }

    @InsightComponent(name = "Filter", description = "Filter", type = "com.datastax.insight.dataprprocess.filter", icon = "arrows", order = 50010201)
    public static <T> Dataset<T> filter(@InsightComponentArg(externalInput = true, name = "数据集", description = "操作数据") Dataset<T> dataset, @InsightComponentArg(name = "表达式", description = "表达式") String str) {
        if (dataset != null && str != null && str.length() != 0) {
            return dataset.filter(str);
        }
        logger.info("数据集为空或者表达式为空，返回原数据集");
        return dataset;
    }

    protected static <T> Dataset<T> where(Dataset<T> dataset, String str) {
        return dataset.where(str);
    }

    protected static <T> Dataset<Row> agg(Dataset<T> dataset, Map<String, String> map) {
        return dataset.agg(map);
    }

    @InsightComponent(name = "Limit", description = "Limit", type = "com.datastax.insight.dataprprocess.limit", icon = "arrows", order = 50010101)
    public static <T> Dataset<T> limit(@InsightComponentArg(externalInput = true, name = "数据集", description = "操作数据") Dataset<T> dataset, @InsightComponentArg(name = "行数", description = "limit n") int i) {
        if (dataset == null) {
            logger.info("数据集为空");
            return dataset;
        }
        if (i < 0) {
            logger.info("n小于0, 默认为20");
            i = 20;
        }
        return dataset.limit(i);
    }

    @InsightComponent(name = "交集", description = "Intersect", type = "com.datastax.insight.dataprprocess.intersect", icon = "arrows", order = 50010505)
    public static <T> Dataset<T> intersect(@InsightComponentArg(externalInput = true, name = "数据集1", description = "操作数据1") Dataset<T> dataset, @InsightComponentArg(externalInput = true, name = "数据集2", description = "操作数据2") Dataset<T> dataset2) {
        if (dataset != null && dataset2 != null) {
            return dataset.intersect(dataset2);
        }
        logger.info("数据集为空,返回null");
        return dataset;
    }

    @InsightComponent(name = "差集", description = "Except", type = "com.datastax.insight.dataprprocess.expect", icon = "arrows", order = 50010202)
    public static <T> Dataset<T> except(@InsightComponentArg(externalInput = true, name = "数据集1", description = "操作数据1") Dataset<T> dataset, @InsightComponentArg(externalInput = true, name = "数据集2", description = "操作数据2") Dataset<T> dataset2) {
        if (dataset != null && dataset2 != null) {
            return dataset.except(dataset2);
        }
        logger.info("数据集为空,返回null");
        return dataset;
    }

    protected static <T> Dataset<Row> withColumnRenamed(Dataset<T> dataset, String str, String str2) {
        return dataset.withColumnRenamed(str, str2);
    }

    @InsightComponent(name = "Drop", description = "Drop", type = "com.datastax.insight.dataprprocess.drop", icon = "arrows", order = 50010104)
    public static <T> Dataset<Row> drop(@InsightComponentArg(externalInput = true, name = "数据集", description = "dataset") Dataset<T> dataset, @InsightComponentArg(name = "类型", description = "drop 的类型", defaultValue = "drop", items = "drop;drop duplicates") String str, @InsightComponentArg(name = "列名", description = "丢弃的列名，用分号隔开") String str2) {
        String[] a = com.datastax.data.prepare.util.c.a(str2.split(Consts.DELIMITER));
        if (str == null || str.length() == 0) {
            str = Consts.DROP;
        }
        if (Consts.DROP.equals(str)) {
            if (a.length != 0) {
                return dataset.drop(a);
            }
            logger.info("列名为空, 返回原数据集");
            return dataset.toDF();
        }
        if (Consts.DROP_DUPLICATES.equals(str)) {
            return a.length == 0 ? dataset.dropDuplicates().toDF() : dataset.dropDuplicates(a).toDF();
        }
        logger.info("type不再可选项内, 返回原数据集");
        return dataset.toDF();
    }

    @InsightComponent(name = "Map", description = "Map", type = "com.datastax.insight.dataprprocess.map", icon = "arrows", order = 50010803)
    public static <T, U> Dataset<U> map(@InsightComponentArg(name = "数据集", description = "操作数据") Dataset<T> dataset, @InsightComponentArg(name = "函数") MapFunction<T, U> mapFunction, @InsightComponentArg(name = "编码器") Encoder<U> encoder) {
        return dataset.map(mapFunction, encoder);
    }

    protected static <T, U> Dataset<U> mapPartitions(Dataset<T> dataset, MapPartitionsFunction<T, U> mapPartitionsFunction, Encoder<U> encoder) {
        return dataset.mapPartitions(mapPartitionsFunction, encoder);
    }

    protected static <T, U> Dataset<U> flatMap(Dataset<T> dataset, FlatMapFunction<T, U> flatMapFunction, Encoder<U> encoder) {
        return dataset.flatMap(flatMapFunction, encoder);
    }

    @InsightComponent(name = "Repartition", description = "Repartition", type = "com.datastax.insight.dataprprocess.repartition", icon = "arrows", order = 50010504)
    public static <T> Dataset<T> repartition(@InsightComponentArg(externalInput = true, name = "data", description = "操作数据") Dataset<T> dataset, @InsightComponentArg(name = "n", description = "n") int i) {
        return dataset.repartition(i);
    }

    @InsightComponent(name = "Coalesce", description = "Coalesce", type = "com.datastax.insight.dataprprocess.coalesce", icon = "arrows", order = 50010503)
    public static <T> Dataset<T> coalesce(@InsightComponentArg(externalInput = true, name = "data", description = "操作数据") Dataset<T> dataset, @InsightComponentArg(name = "n", description = "n") int i) {
        return dataset.coalesce(i);
    }

    @InsightComponent(name = "toJavaRDD", description = "toJavaRDD", type = "com.datastax.insight.dataprprocess.toJavaRDD", icon = "arrows", order = 500204)
    public static <T> JavaRDD<T> toJavaRDD(@InsightComponentArg(externalInput = true, name = "data", description = "操作数据") Dataset<T> dataset) {
        return dataset.toJavaRDD();
    }

    @InsightComponent(name = "toJson", description = "toJson", type = "com.datastax.insight.dataprprocess.toJson", icon = "arrows", order = 500203)
    public static <T> Dataset<String> toJSON(@InsightComponentArg(externalInput = true, name = "data", description = "操作数据") Dataset<T> dataset) {
        return dataset.toJSON();
    }

    protected static <T> Dataset<Row> toDF(Dataset<T> dataset) {
        return dataset.toDF();
    }

    protected static <T> Dataset<Row> toDF(Dataset<T> dataset, String[] strArr) {
        return dataset.toDF(strArr);
    }

    @InsightComponent(name = "toDF", description = "toDF", type = "com.datastax.insight.dataprprocess.toDF", icon = "arrows", order = 500201)
    public static <T> Dataset<Row> toDF(@InsightComponentArg(externalInput = true, name = "数据集") Dataset<T> dataset, @InsightComponentArg(name = "列名") String str) {
        if (dataset == null) {
            return dataset.toDF();
        }
        String[] strArr = new String[dataset.columns().length];
        if (str == null || str.length() == 0) {
            for (int i = 0; i < strArr.length; i++) {
                strArr[i] = "_c" + String.valueOf(i);
            }
        } else {
            strArr = com.datastax.data.prepare.util.c.a(str.split(Consts.DELIMITER));
        }
        return dataset.toDF(strArr);
    }

    protected static <T> Dataset<Row> cast(Dataset<T> dataset, String str, String str2) {
        Dataset<Row> dataset2 = null;
        for (String str3 : str.split(Consts.DELIMITER)) {
            dataset2 = dataset2 == null ? dataset.withColumn(str3, dataset.col(str3).cast(str2)) : dataset2.withColumn(str3, dataset.col(str3).cast(str2));
        }
        return dataset2 == null ? dataset.toDF() : dataset2;
    }

    @InsightComponent(name = "Show", description = "Show", type = "com.datastax.insight.dataprprocess.show")
    public static <T> void show(@InsightComponentArg(name = "数据集", description = "数据集") Dataset<T> dataset, @InsightComponentArg(name = "行数", description = "显示的行数", defaultValue = "10") int i, @InsightComponentArg(name = "字符数", description = "显示的字符数", defaultValue = "20") int i2) {
        if (dataset == null) {
            logger.info("数据集为空");
            return;
        }
        int i3 = i < 0 ? 10 : i;
        int i4 = i2 < 0 ? 20 : i2;
        System.out.println("===DataExa-Insight User Output Started===");
        dataset.show(i3, i4);
        System.out.println("===DataExa-Insight User Output Ended===");
    }
}
