package org.apache.hudi.integ.testsuite;

import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hive.com.beust.jcommander.Parameters;
import org.apache.hudi.DataSourceUtils;
import org.apache.hudi.com.beust.jcommander.JCommander;
import org.apache.hudi.com.beust.jcommander.Parameter;
import org.apache.hudi.common.config.SerializableConfiguration;
import org.apache.hudi.common.config.TypedProperties;
import org.apache.hudi.common.fs.FSUtils;
import org.apache.hudi.common.model.HoodieTableType;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.util.ReflectionUtils;
import org.apache.hudi.exception.HoodieException;
import org.apache.hudi.integ.testsuite.configuration.DFSDeltaConfig;
import org.apache.hudi.integ.testsuite.dag.DagUtils;
import org.apache.hudi.integ.testsuite.dag.WorkflowDag;
import org.apache.hudi.integ.testsuite.dag.WorkflowDagGenerator;
import org.apache.hudi.integ.testsuite.dag.scheduler.DagScheduler;
import org.apache.hudi.integ.testsuite.generator.DeltaGenerator;
import org.apache.hudi.integ.testsuite.reader.DeltaInputType;
import org.apache.hudi.integ.testsuite.writer.DeltaOutputMode;
import org.apache.hudi.keygen.BuiltinKeyGenerator;
import org.apache.hudi.utilities.UtilHelpers;
import org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer;
import org.apache.hudi.utilities.schema.SchemaProvider;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.SparkSession;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/apache/hudi/integ/testsuite/HoodieTestSuiteJob.class */
public class HoodieTestSuiteJob {
    private static volatile Logger log = LoggerFactory.getLogger(HoodieTestSuiteJob.class);
    private final HoodieTestSuiteConfig cfg;
    TypedProperties props;
    private transient SchemaProvider schemaProvider;
    private transient FileSystem fs;
    private transient JavaSparkContext jsc;
    private transient SparkSession sparkSession;
    private transient HiveConf hiveConf;
    private BuiltinKeyGenerator keyGenerator;

    /* loaded from: input_file:org/apache/hudi/integ/testsuite/HoodieTestSuiteJob$HoodieTestSuiteConfig.class */
    public static class HoodieTestSuiteConfig extends HoodieDeltaStreamer.Config {

        @Parameter(names = {"--input-base-path"}, description = "base path for input data(Will be created if did not exist first time around. If exists, more data will be added to that path)", required = true)
        public String inputBasePath;

        @Parameter(names = {"--workload-yaml-path"}, description = "Workflow Dag yaml path to generate the workload")
        public String workloadYamlPath;

        @Parameter(names = {"--workload-generator-classname"}, description = "WorkflowDag of operations to generate the workload", required = true)
        public String workloadDagGenerator = WorkflowDagGenerator.class.getName();

        @Parameter(names = {"--delta-output-type"}, description = "Subclass of org.apache.hudi.testsuite.workload.DeltaOutputMode to readAvro data.")
        public String outputTypeName = DeltaOutputMode.DFS.name();

        @Parameter(names = {"--delta-input-format"}, description = "Subclass of org.apache.hudi.testsuite.workload.DeltaOutputMode to read avro data.")
        public String inputFormatName = DeltaInputType.AVRO.name();

        @Parameter(names = {"--input-file-size"}, description = "The min/max size of the input files to generate", required = true)
        public Long limitFileSize = 125829120L;

        @Parameter(names = {"--use-deltastreamer"}, description = "Choose whether to use HoodieDeltaStreamer to perform ingestion. If set to false, HoodieWriteClient will be used")
        public Boolean useDeltaStreamer = false;
    }

    public HoodieTestSuiteJob(HoodieTestSuiteConfig hoodieTestSuiteConfig, JavaSparkContext javaSparkContext) throws IOException {
        this.cfg = hoodieTestSuiteConfig;
        this.jsc = javaSparkContext;
        this.sparkSession = SparkSession.builder().config(javaSparkContext.getConf()).getOrCreate();
        this.fs = FSUtils.getFs(hoodieTestSuiteConfig.inputBasePath, javaSparkContext.hadoopConfiguration());
        this.props = UtilHelpers.readConfig(this.fs, new Path(hoodieTestSuiteConfig.propsFilePath), hoodieTestSuiteConfig.configs).getConfig();
        log.info("Creating workload generator with configs : {}", this.props.toString());
        this.schemaProvider = UtilHelpers.createSchemaProvider(hoodieTestSuiteConfig.schemaProviderClassName, this.props, javaSparkContext);
        this.hiveConf = getDefaultHiveConf(javaSparkContext.hadoopConfiguration());
        this.keyGenerator = (BuiltinKeyGenerator) DataSourceUtils.createKeyGenerator(this.props);
        if (this.fs.exists(new Path(hoodieTestSuiteConfig.targetBasePath))) {
            return;
        }
        HoodieTableMetaClient.initTableType(javaSparkContext.hadoopConfiguration(), hoodieTestSuiteConfig.targetBasePath, HoodieTableType.valueOf(hoodieTestSuiteConfig.tableType), hoodieTestSuiteConfig.targetTableName, "archived");
    }

    private static HiveConf getDefaultHiveConf(Configuration configuration) {
        HiveConf hiveConf = new HiveConf();
        hiveConf.addResource(configuration);
        return hiveConf;
    }

    public static void main(String[] strArr) throws Exception {
        HoodieTestSuiteConfig hoodieTestSuiteConfig = new HoodieTestSuiteConfig();
        JCommander jCommander = new JCommander(hoodieTestSuiteConfig, strArr);
        if (hoodieTestSuiteConfig.help.booleanValue() || strArr.length == 0) {
            jCommander.usage();
            System.exit(1);
        }
        new HoodieTestSuiteJob(hoodieTestSuiteConfig, UtilHelpers.buildSparkContext("workload-generator-" + hoodieTestSuiteConfig.outputTypeName + Parameters.DEFAULT_OPTION_PREFIXES + hoodieTestSuiteConfig.inputFormatName, hoodieTestSuiteConfig.sparkMaster)).runTestSuite();
    }

    public void runTestSuite() {
        try {
            try {
                WorkflowDag build = this.cfg.workloadYamlPath == null ? ((WorkflowDagGenerator) ReflectionUtils.loadClass(this.cfg.workloadDagGenerator)).build() : DagUtils.convertYamlPathToDag(this.fs, this.cfg.workloadYamlPath);
                log.info("Workflow Dag => " + DagUtils.convertDagToYaml(build));
                long currentTimeMillis = System.currentTimeMillis();
                String schema = this.schemaProvider.getSourceSchema().toString();
                new DagScheduler(build, new HoodieTestSuiteWriter(this.jsc, this.props, this.cfg, schema), new DeltaGenerator(new DFSDeltaConfig(DeltaOutputMode.valueOf(this.cfg.outputTypeName), DeltaInputType.valueOf(this.cfg.inputFormatName), new SerializableConfiguration(this.jsc.hadoopConfiguration()), this.cfg.inputBasePath, this.cfg.targetBasePath, schema, this.cfg.limitFileSize), this.jsc, this.sparkSession, schema, this.keyGenerator)).schedule();
                log.info("Finished scheduling all tasks, Time taken {}", Long.valueOf(System.currentTimeMillis() - currentTimeMillis));
                this.jsc.stop();
            } catch (Exception e) {
                log.error("Failed to run Test Suite ", e);
                throw new HoodieException("Failed to run Test Suite ", e);
            }
        } catch (Throwable th) {
            this.jsc.stop();
            throw th;
        }
    }
}
