package defpackage;

import com.beust.jcommander.JCommander;
import com.beust.jcommander.Parameter;
import java.util.ArrayList;
import java.util.List;
import java.util.ResourceBundle;
import java.util.stream.Collectors;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hudi.DataSourceReadOptions;
import org.apache.hudi.DataSourceWriteOptions;
import org.apache.hudi.HoodieDataSourceHelpers;
import org.apache.hudi.common.model.HoodieTableType;
import org.apache.hudi.common.testutils.HoodieTestDataGenerator;
import org.apache.hudi.common.testutils.RawTripTestPayload;
import org.apache.hudi.common.testutils.Transformations;
import org.apache.hudi.config.HoodieCompactionConfig;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.hive.MultiPartKeysValueExtractor;
import org.apache.hudi.hive.NonPartitionedExtractor;
import org.apache.hudi.keygen.NonpartitionedKeyGenerator;
import org.apache.hudi.keygen.SimpleKeyGenerator;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.Column;
import org.apache.spark.sql.DataFrameWriter;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;

/* loaded from: input_file:HoodieJavaApp.class */
public class HoodieJavaApp {

    @Parameter(names = {"--table-path", "-p"}, description = "path for Hoodie sample table")
    private String tablePath = "file:///tmp/hoodie/sample-table";

    @Parameter(names = {"--table-name", "-n"}, description = "table name for Hoodie sample table")
    private String tableName = "hoodie_test";

    @Parameter(names = {"--table-type", "-t"}, description = "One of COPY_ON_WRITE or MERGE_ON_READ")
    private String tableType = HoodieTableType.COPY_ON_WRITE.name();

    @Parameter(names = {"--hive-sync", "-hv"}, description = "Enable syncing to hive")
    private Boolean enableHiveSync = false;

    @Parameter(names = {"--hive-db", "-hd"}, description = "hive database")
    private String hiveDB = "default";

    @Parameter(names = {"--hive-table", "-ht"}, description = "hive table")
    private String hiveTable = "hoodie_sample_test";

    @Parameter(names = {"--hive-user", "-hu"}, description = "hive username")
    private String hiveUser = "hive";

    @Parameter(names = {"--hive-password", "-hp"}, description = "hive password")
    private String hivePass = "hive";

    @Parameter(names = {"--hive-url", "-hl"}, description = "hive JDBC URL")
    private String hiveJdbcUrl = "jdbc:hive2://localhost:10000";

    @Parameter(names = {"--non-partitioned", "-np"}, description = "Use non-partitioned Table")
    private Boolean nonPartitionedTable = false;

    @Parameter(names = {"--use-multi-partition-keys", "-mp"}, description = "Use Multiple Partition Keys")
    private Boolean useMultiPartitionKeys = false;

    @Parameter(names = {"--help", "-h"}, help = true)
    public Boolean help = false;
    private static final Logger LOG = LogManager.getLogger(HoodieJavaApp.class);

    public static void main(String[] strArr) throws Exception {
        HoodieJavaApp hoodieJavaApp = new HoodieJavaApp();
        JCommander jCommander = new JCommander(hoodieJavaApp, (ResourceBundle) null, strArr);
        if (hoodieJavaApp.help.booleanValue()) {
            jCommander.usage();
            System.exit(1);
        }
        hoodieJavaApp.run();
    }

    public void run() throws Exception {
        SparkSession orCreate = SparkSession.builder().appName("Hoodie Spark APP").config("spark.serializer", "org.apache.spark.serializer.KryoSerializer").master("local[1]").getOrCreate();
        JavaSparkContext javaSparkContext = new JavaSparkContext(orCreate.sparkContext());
        orCreate.sparkContext().setLogLevel("WARN");
        FileSystem fileSystem = FileSystem.get(javaSparkContext.hadoopConfiguration());
        HoodieTestDataGenerator hoodieTestDataGenerator = this.nonPartitionedTable.booleanValue() ? new HoodieTestDataGenerator(new String[]{""}) : new HoodieTestDataGenerator();
        fileSystem.delete(new Path(this.tablePath), true);
        ArrayList arrayList = new ArrayList(hoodieTestDataGenerator.generateInserts("001", 100));
        DataFrameWriter<Row> mode = orCreate.read().json(javaSparkContext.parallelize(RawTripTestPayload.recordsToStrings(arrayList), 2)).write().format("org.apache.hudi").option("hoodie.insert.shuffle.parallelism", "2").option("hoodie.upsert.shuffle.parallelism", "2").option(DataSourceWriteOptions.TABLE_TYPE().key(), this.tableType).option(DataSourceWriteOptions.OPERATION().key(), DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL()).option(DataSourceWriteOptions.RECORDKEY_FIELD().key(), "_row_key").option(DataSourceWriteOptions.PARTITIONPATH_FIELD().key(), "partition").option(DataSourceWriteOptions.PRECOMBINE_FIELD().key(), "timestamp").option(HoodieWriteConfig.TBL_NAME.key(), this.tableName).option(DataSourceWriteOptions.KEYGENERATOR_CLASS_NAME().key(), this.nonPartitionedTable.booleanValue() ? NonpartitionedKeyGenerator.class.getCanonicalName() : SimpleKeyGenerator.class.getCanonicalName()).option(DataSourceWriteOptions.ASYNC_COMPACT_ENABLE().key(), "false").option(DataSourceWriteOptions.ASYNC_CLUSTERING_ENABLE().key(), "true").mode(SaveMode.Overwrite);
        updateHiveSyncConfig(mode);
        mode.save(this.tablePath);
        String latestCommit = HoodieDataSourceHelpers.latestCommit(fileSystem, this.tablePath);
        LOG.info("First commit at instant time :" + latestCommit);
        List generateUpdates = hoodieTestDataGenerator.generateUpdates("002", 100);
        arrayList.addAll(generateUpdates);
        DataFrameWriter<Row> mode2 = orCreate.read().json(javaSparkContext.parallelize(RawTripTestPayload.recordsToStrings(generateUpdates), 2)).write().format("org.apache.hudi").option("hoodie.insert.shuffle.parallelism", "2").option("hoodie.upsert.shuffle.parallelism", "2").option(DataSourceWriteOptions.TABLE_TYPE().key(), this.tableType).option(DataSourceWriteOptions.RECORDKEY_FIELD().key(), "_row_key").option(DataSourceWriteOptions.PARTITIONPATH_FIELD().key(), "partition").option(DataSourceWriteOptions.PRECOMBINE_FIELD().key(), "timestamp").option(DataSourceWriteOptions.KEYGENERATOR_CLASS_NAME().key(), this.nonPartitionedTable.booleanValue() ? NonpartitionedKeyGenerator.class.getCanonicalName() : SimpleKeyGenerator.class.getCanonicalName()).option(HoodieCompactionConfig.INLINE_COMPACT_NUM_DELTA_COMMITS.key(), "1").option(DataSourceWriteOptions.ASYNC_COMPACT_ENABLE().key(), "false").option(DataSourceWriteOptions.ASYNC_CLUSTERING_ENABLE().key(), "true").option(HoodieWriteConfig.TBL_NAME.key(), this.tableName).mode(SaveMode.Append);
        updateHiveSyncConfig(mode2);
        mode2.save(this.tablePath);
        String latestCommit2 = HoodieDataSourceHelpers.latestCommit(fileSystem, this.tablePath);
        LOG.info("Second commit at instant time :" + latestCommit2);
        DataFrameWriter<Row> mode3 = orCreate.read().json(javaSparkContext.parallelize((List) Transformations.randomSelectAsHoodieKeys(arrayList, 20).stream().map(hoodieKey -> {
            return "{\"_row_key\":\"" + hoodieKey.getRecordKey() + "\",\"partition\":\"" + hoodieKey.getPartitionPath() + "\"}";
        }).collect(Collectors.toList()), 2)).write().format("org.apache.hudi").option("hoodie.insert.shuffle.parallelism", "2").option("hoodie.upsert.shuffle.parallelism", "2").option("hoodie.delete.shuffle.parallelism", "2").option(DataSourceWriteOptions.TABLE_TYPE().key(), this.tableType).option(DataSourceWriteOptions.OPERATION().key(), "delete").option(DataSourceWriteOptions.RECORDKEY_FIELD().key(), "_row_key").option(DataSourceWriteOptions.PARTITIONPATH_FIELD().key(), "partition").option(DataSourceWriteOptions.PRECOMBINE_FIELD().key(), "_row_key").option(DataSourceWriteOptions.KEYGENERATOR_CLASS_NAME().key(), this.nonPartitionedTable.booleanValue() ? NonpartitionedKeyGenerator.class.getCanonicalName() : SimpleKeyGenerator.class.getCanonicalName()).option(HoodieCompactionConfig.INLINE_COMPACT_NUM_DELTA_COMMITS.key(), "1").option(DataSourceWriteOptions.ASYNC_COMPACT_ENABLE().key(), "false").option(DataSourceWriteOptions.ASYNC_CLUSTERING_ENABLE().key(), "true").option(HoodieWriteConfig.TBL_NAME.key(), this.tableName).mode(SaveMode.Append);
        updateHiveSyncConfig(mode3);
        mode3.save(this.tablePath);
        LOG.info("Third commit at instant time :" + HoodieDataSourceHelpers.latestCommit(fileSystem, this.tablePath));
        orCreate.read().format("org.apache.hudi").load(this.tablePath + (this.nonPartitionedTable.booleanValue() ? "/*" : "/*/*/*/*")).registerTempTable("hoodie_ro");
        orCreate.sql("describe hoodie_ro").show();
        orCreate.sql("select fare.amount, begin_lon, begin_lat, timestamp from hoodie_ro where fare.amount > 2.0").show();
        if (this.tableType.equals(HoodieTableType.COPY_ON_WRITE.name())) {
            Dataset load = orCreate.read().format("org.apache.hudi").option(DataSourceReadOptions.QUERY_TYPE().key(), DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL()).option(DataSourceReadOptions.BEGIN_INSTANTTIME().key(), latestCommit).load(this.tablePath);
            LOG.info("You will only see records from : " + latestCommit2);
            load.groupBy(new Column[]{load.col("_hoodie_commit_time")}).count().show();
        }
    }

    private DataFrameWriter<Row> updateHiveSyncConfig(DataFrameWriter<Row> dataFrameWriter) {
        if (this.enableHiveSync.booleanValue()) {
            LOG.info("Enabling Hive sync to " + this.hiveJdbcUrl);
            DataFrameWriter option = dataFrameWriter.option(DataSourceWriteOptions.HIVE_TABLE().key(), this.hiveTable).option(DataSourceWriteOptions.HIVE_DATABASE().key(), this.hiveDB).option(DataSourceWriteOptions.HIVE_URL().key(), this.hiveJdbcUrl).option(DataSourceWriteOptions.HIVE_USER().key(), this.hiveUser).option(DataSourceWriteOptions.HIVE_PASS().key(), this.hivePass).option(DataSourceWriteOptions.HIVE_SYNC_ENABLED().key(), "true");
            dataFrameWriter = this.nonPartitionedTable.booleanValue() ? option.option(DataSourceWriteOptions.HIVE_PARTITION_EXTRACTOR_CLASS().key(), NonPartitionedExtractor.class.getCanonicalName()).option(DataSourceWriteOptions.PARTITIONPATH_FIELD().key(), "") : this.useMultiPartitionKeys.booleanValue() ? option.option(DataSourceWriteOptions.HIVE_PARTITION_FIELDS().key(), "year,month,day").option(DataSourceWriteOptions.HIVE_PARTITION_EXTRACTOR_CLASS().key(), MultiPartKeysValueExtractor.class.getCanonicalName()) : option.option(DataSourceWriteOptions.HIVE_PARTITION_FIELDS().key(), "dateStr");
        }
        return dataFrameWriter;
    }
}
