package org.apache.hudi.client;

import java.io.IOException;
import java.lang.invoke.SerializedLambda;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import java.nio.file.Path;
import java.time.Instant;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Random;
import java.util.Spliterators;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import java.util.stream.StreamSupport;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericRecord;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hudi.DataSourceWriteOptions;
import org.apache.hudi.avro.model.HoodieFileStatus;
import org.apache.hudi.client.bootstrap.BootstrapMode;
import org.apache.hudi.client.bootstrap.FullRecordBootstrapDataProvider;
import org.apache.hudi.client.bootstrap.selector.BootstrapModeSelector;
import org.apache.hudi.client.bootstrap.selector.FullRecordBootstrapModeSelector;
import org.apache.hudi.client.bootstrap.selector.MetadataOnlyBootstrapModeSelector;
import org.apache.hudi.client.common.HoodieSparkEngineContext;
import org.apache.hudi.common.bootstrap.FileStatusUtils;
import org.apache.hudi.common.bootstrap.index.BootstrapIndex;
import org.apache.hudi.common.config.TypedProperties;
import org.apache.hudi.common.fs.FSUtils;
import org.apache.hudi.common.model.HoodieKey;
import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.model.HoodieTableType;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.testutils.HoodieTestDataGenerator;
import org.apache.hudi.common.testutils.HoodieTestUtils;
import org.apache.hudi.common.testutils.RawTripTestPayload;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.ParquetReaderIterator;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.config.HoodieBootstrapConfig;
import org.apache.hudi.config.HoodieCompactionConfig;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.exception.HoodieIOException;
import org.apache.hudi.hadoop.HoodieParquetInputFormat;
import org.apache.hudi.hadoop.realtime.HoodieParquetRealtimeInputFormat;
import org.apache.hudi.index.HoodieIndex;
import org.apache.hudi.keygen.NonpartitionedKeyGenerator;
import org.apache.hudi.keygen.SimpleKeyGenerator;
import org.apache.hudi.table.action.bootstrap.BootstrapUtils;
import org.apache.hudi.testutils.HoodieClientTestBase;
import org.apache.hudi.testutils.HoodieMergeOnReadTestUtils;
import org.apache.parquet.avro.AvroParquetReader;
import org.apache.parquet.avro.AvroReadSupport;
import org.apache.parquet.avro.AvroSchemaConverter;
import org.apache.parquet.hadoop.ParquetFileReader;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.Column;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SQLContext;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.functions;
import org.apache.spark.sql.types.DataTypes;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.io.TempDir;

/* loaded from: input_file:org/apache/hudi/client/TestBootstrap.class */
public class TestBootstrap extends HoodieClientTestBase {
    public static final String TRIP_HIVE_COLUMN_TYPES = "bigint,string,string,string,double,double,double,double,struct<amount:double,currency:string>,array<struct<amount:double,currency:string>>,boolean";

    @TempDir
    public Path tmpFolder;
    protected String bootstrapBasePath = null;
    private HoodieParquetInputFormat roInputFormat;
    private JobConf roJobConf;
    private HoodieParquetRealtimeInputFormat rtInputFormat;
    private JobConf rtJobConf;
    private SparkSession spark;

    /* loaded from: input_file:org/apache/hudi/client/TestBootstrap$EffectiveMode.class */
    private enum EffectiveMode {
        FULL_BOOTSTRAP_MODE,
        METADATA_BOOTSTRAP_MODE,
        MIXED_BOOTSTRAP_MODE
    }

    /* loaded from: input_file:org/apache/hudi/client/TestBootstrap$TestFullBootstrapDataProvider.class */
    public static class TestFullBootstrapDataProvider extends FullRecordBootstrapDataProvider<JavaRDD<HoodieRecord>> {
        public TestFullBootstrapDataProvider(TypedProperties typedProperties, HoodieSparkEngineContext hoodieSparkEngineContext) {
            super(typedProperties, hoodieSparkEngineContext);
        }

        public JavaRDD<HoodieRecord> generateInputRecords(String str, String str2, List<Pair<String, List<HoodieFileStatus>>> list) {
            String path = FileStatusUtils.toPath(((HoodieFileStatus) list.stream().flatMap(pair -> {
                return ((List) pair.getValue()).stream();
            }).findAny().get()).getPath()).toString();
            JavaSparkContext sparkContext = HoodieSparkEngineContext.getSparkContext(this.context);
            try {
                return TestBootstrap.generateInputBatch(sparkContext, list, new AvroSchemaConverter().convert(ParquetFileReader.open(sparkContext.hadoopConfiguration(), new org.apache.hadoop.fs.Path(path)).getFooter().getFileMetaData().getSchema()));
            } catch (IOException e) {
                throw new HoodieIOException(e.getMessage(), e);
            }
        }

        /* renamed from: generateInputRecords, reason: collision with other method in class */
        public /* bridge */ /* synthetic */ Object m5generateInputRecords(String str, String str2, List list) {
            return generateInputRecords(str, str2, (List<Pair<String, List<HoodieFileStatus>>>) list);
        }
    }

    /* loaded from: input_file:org/apache/hudi/client/TestBootstrap$TestRandomBootstapModeSelector.class */
    public static class TestRandomBootstapModeSelector extends BootstrapModeSelector {
        private int currIdx;

        public TestRandomBootstapModeSelector(HoodieWriteConfig hoodieWriteConfig) {
            super(hoodieWriteConfig);
            this.currIdx = new Random().nextInt(2);
        }

        public Map<BootstrapMode, List<String>> select(List<Pair<String, List<HoodieFileStatus>>> list) {
            ArrayList arrayList = new ArrayList();
            list.stream().forEach(pair -> {
                BootstrapMode bootstrapMode = this.currIdx == 0 ? BootstrapMode.METADATA_ONLY : BootstrapMode.FULL_RECORD;
                this.currIdx = (this.currIdx + 1) % 2;
                arrayList.add(Pair.of(bootstrapMode, pair.getKey()));
            });
            return (Map) arrayList.stream().collect(Collectors.groupingBy((v0) -> {
                return v0.getKey();
            }, Collectors.mapping((v0) -> {
                return v0.getValue();
            }, Collectors.toList())));
        }
    }

    @BeforeEach
    public void setUp() throws Exception {
        this.bootstrapBasePath = this.tmpFolder.toAbsolutePath().toString() + "/data";
        initPath();
        initSparkContexts();
        initTestDataGenerator();
        initMetaClient();
        reloadInputFormats();
    }

    @AfterEach
    public void tearDown() throws IOException {
        cleanupSparkContexts();
        cleanupClients();
        cleanupTestDataGenerator();
    }

    private void reloadInputFormats() {
        this.roInputFormat = new HoodieParquetInputFormat();
        this.roJobConf = new JobConf(this.jsc.hadoopConfiguration());
        this.roInputFormat.setConf(this.roJobConf);
        this.rtInputFormat = new HoodieParquetRealtimeInputFormat();
        this.rtJobConf = new JobConf(this.jsc.hadoopConfiguration());
        this.rtInputFormat.setConf(this.rtJobConf);
    }

    public Schema generateNewDataSetAndReturnSchema(long j, int i, List<String> list, String str) throws Exception {
        boolean z = (list == null || list.isEmpty()) ? false : true;
        Dataset<Row> generateTestRawTripDataset = generateTestRawTripDataset(j, 0, i, list, this.jsc, this.sqlContext);
        generateTestRawTripDataset.printSchema();
        if (z) {
            generateTestRawTripDataset.write().partitionBy(new String[]{"datestr"}).format("parquet").mode(SaveMode.Overwrite).save(str);
        } else {
            generateTestRawTripDataset.write().format("parquet").mode(SaveMode.Overwrite).save(str);
        }
        return new AvroSchemaConverter().convert(ParquetFileReader.open(this.metaClient.getHadoopConf(), new org.apache.hadoop.fs.Path(FileStatusUtils.toPath(((HoodieFileStatus) ((Optional) BootstrapUtils.getAllLeafFoldersWithFiles(this.metaClient, this.metaClient.getFs(), str, this.context).stream().findAny().map(pair -> {
            return ((List) pair.getValue()).stream().findAny();
        }).orElse(null)).get()).getPath()).toString())).getFooter().getFileMetaData().getSchema());
    }

    @Test
    public void testMetadataBootstrapUnpartitionedCOW() throws Exception {
        testBootstrapCommon(false, false, EffectiveMode.METADATA_BOOTSTRAP_MODE);
    }

    @Test
    public void testMetadataBootstrapWithUpdatesCOW() throws Exception {
        testBootstrapCommon(true, false, EffectiveMode.METADATA_BOOTSTRAP_MODE);
    }

    private void testBootstrapCommon(boolean z, boolean z2, EffectiveMode effectiveMode) throws Exception {
        String name;
        String str;
        boolean z3;
        boolean z4;
        int i;
        List<String> asList;
        if (z2) {
            this.metaClient = HoodieTestUtils.init(this.basePath, HoodieTableType.MERGE_ON_READ, this.bootstrapBasePath);
        } else {
            this.metaClient = HoodieTestUtils.init(this.basePath, HoodieTableType.COPY_ON_WRITE, this.bootstrapBasePath);
        }
        String canonicalName = z ? SimpleKeyGenerator.class.getCanonicalName() : NonpartitionedKeyGenerator.class.getCanonicalName();
        switch (effectiveMode) {
            case FULL_BOOTSTRAP_MODE:
                name = FullRecordBootstrapModeSelector.class.getCanonicalName();
                str = "00000000000002";
                z3 = false;
                z4 = false;
                i = 1;
                asList = Arrays.asList(str);
                break;
            case METADATA_BOOTSTRAP_MODE:
                name = MetadataOnlyBootstrapModeSelector.class.getCanonicalName();
                str = "00000000000001";
                z3 = true;
                z4 = true;
                i = 1;
                asList = Arrays.asList(str);
                break;
            default:
                name = TestRandomBootstapModeSelector.class.getName();
                str = "00000000000002";
                z3 = false;
                z4 = true;
                i = 2;
                asList = Arrays.asList("00000000000001", "00000000000002");
                break;
        }
        List<String> asList2 = Arrays.asList("2020/04/01", "2020/04/02", "2020/04/03");
        long epochMilli = Instant.now().toEpochMilli();
        Schema generateNewDataSetAndReturnSchema = generateNewDataSetAndReturnSchema(epochMilli, 100, asList2, this.bootstrapBasePath);
        HoodieWriteConfig build = getConfigBuilder(generateNewDataSetAndReturnSchema.toString()).withAutoCommit(true).withSchema(generateNewDataSetAndReturnSchema.toString()).withCompactionConfig(HoodieCompactionConfig.newBuilder().withMaxNumDeltaCommitsBeforeCompaction(1).build()).withBootstrapConfig(HoodieBootstrapConfig.newBuilder().withBootstrapBasePath(this.bootstrapBasePath).withBootstrapKeyGenClass(canonicalName).withFullBootstrapInputProvider(TestFullBootstrapDataProvider.class.getName()).withBootstrapParallelism(3).withBootstrapModeSelector(name).build()).build();
        SparkRDDWriteClient sparkRDDWriteClient = new SparkRDDWriteClient(this.context, build);
        sparkRDDWriteClient.bootstrap(Option.empty());
        checkBootstrapResults(100, generateNewDataSetAndReturnSchema, str, z3, i, i, epochMilli, epochMilli, z2, asList);
        FSUtils.deleteInstantFile(this.metaClient.getFs(), this.metaClient.getMetaPath(), new HoodieInstant(HoodieInstant.State.COMPLETED, z2 ? "deltacommit" : "commit", str));
        sparkRDDWriteClient.rollBackInflightBootstrap();
        this.metaClient.reloadActiveTimeline();
        Assertions.assertEquals(0, this.metaClient.getCommitsTimeline().countInstants());
        Assertions.assertEquals(0L, BootstrapUtils.getAllLeafFoldersWithFiles(this.metaClient, this.metaClient.getFs(), this.basePath, this.context).stream().flatMap(pair -> {
            return ((List) pair.getValue()).stream();
        }).count());
        Assertions.assertFalse(BootstrapIndex.getBootstrapIndex(this.metaClient).useIndex());
        SparkRDDWriteClient sparkRDDWriteClient2 = new SparkRDDWriteClient(this.context, build);
        sparkRDDWriteClient2.bootstrap(Option.empty());
        this.metaClient.reloadActiveTimeline();
        BootstrapIndex bootstrapIndex = BootstrapIndex.getBootstrapIndex(this.metaClient);
        if (z4) {
            Assertions.assertTrue(bootstrapIndex.useIndex());
        } else {
            Assertions.assertFalse(bootstrapIndex.useIndex());
        }
        checkBootstrapResults(100, generateNewDataSetAndReturnSchema, str, z3, i, i, epochMilli, epochMilli, z2, asList);
        long epochMilli2 = Instant.now().toEpochMilli();
        String str2 = this.tmpFolder.toAbsolutePath().toString() + "/data2";
        generateNewDataSetAndReturnSchema(epochMilli2, 100, asList2, str2);
        JavaRDD<HoodieRecord> generateInputBatch = generateInputBatch(this.jsc, BootstrapUtils.getAllLeafFoldersWithFiles(this.metaClient, this.metaClient.getFs(), str2, this.context), generateNewDataSetAndReturnSchema);
        String startCommit = sparkRDDWriteClient2.startCommit();
        sparkRDDWriteClient2.upsert(generateInputBatch, startCommit);
        checkBootstrapResults(100, generateNewDataSetAndReturnSchema, startCommit, false, i + 1, epochMilli2, z2 ? epochMilli : epochMilli2, z2);
        if (z2) {
            Option scheduleCompaction = sparkRDDWriteClient2.scheduleCompaction(Option.empty());
            Assertions.assertTrue(scheduleCompaction.isPresent());
            sparkRDDWriteClient2.compact((String) scheduleCompaction.get());
            checkBootstrapResults(100, generateNewDataSetAndReturnSchema, (String) scheduleCompaction.get(), z3, i + 2, 2, epochMilli2, epochMilli2, !z2, Arrays.asList((String) scheduleCompaction.get()));
        }
    }

    @Test
    public void testMetadataBootstrapWithUpdatesMOR() throws Exception {
        testBootstrapCommon(true, true, EffectiveMode.METADATA_BOOTSTRAP_MODE);
    }

    @Test
    public void testFullBootstrapOnlyCOW() throws Exception {
        testBootstrapCommon(true, false, EffectiveMode.FULL_BOOTSTRAP_MODE);
    }

    @Test
    public void testFullBootstrapWithUpdatesMOR() throws Exception {
        testBootstrapCommon(true, true, EffectiveMode.FULL_BOOTSTRAP_MODE);
    }

    @Test
    public void testMetaAndFullBootstrapCOW() throws Exception {
        testBootstrapCommon(true, false, EffectiveMode.MIXED_BOOTSTRAP_MODE);
    }

    @Test
    public void testMetadataAndFullBootstrapWithUpdatesMOR() throws Exception {
        testBootstrapCommon(true, true, EffectiveMode.MIXED_BOOTSTRAP_MODE);
    }

    private void checkBootstrapResults(int i, Schema schema, String str, boolean z, int i2, long j, long j2, boolean z2) throws Exception {
        checkBootstrapResults(i, schema, str, z, i2, i2, j, j2, z2, Arrays.asList(str));
    }

    private void checkBootstrapResults(int i, Schema schema, String str, boolean z, int i2, int i3, long j, long j2, boolean z2, List<String> list) throws Exception {
        this.metaClient.reloadActiveTimeline();
        Assertions.assertEquals(i2, this.metaClient.getCommitsTimeline().filterCompletedInstants().countInstants());
        Assertions.assertEquals(str, ((HoodieInstant) this.metaClient.getActiveTimeline().getCommitsTimeline().filterCompletedInstants().lastInstant().get()).getTimestamp());
        Dataset load = this.sqlContext.read().format("parquet").load(this.basePath);
        Dataset load2 = this.sqlContext.read().format("parquet").load(this.bootstrapBasePath);
        load.registerTempTable("bootstrapped");
        load2.registerTempTable("original");
        if (z) {
            Assertions.assertEquals(((List) BootstrapUtils.getAllLeafFoldersWithFiles(this.metaClient, this.metaClient.getFs(), this.bootstrapBasePath, this.context).stream().flatMap(pair -> {
                return ((List) pair.getValue()).stream();
            }).collect(Collectors.toList())).size() * i3, this.sqlContext.sql("select distinct _hoodie_file_name from bootstrapped").count());
        }
        if (!z2) {
            Assertions.assertEquals(i, this.sqlContext.sql("select * from bootstrapped where _hoodie_commit_time IN (" + String.join(", ", (Iterable<? extends CharSequence>) list.stream().map(str2 -> {
                return "\"" + str2 + "\"";
            }).collect(Collectors.toList())) + ")").count());
            Assertions.assertEquals(0L, this.sqlContext.sql("select a._row_key from original a where a._row_key not in (select _hoodie_record_key from bootstrapped)").count());
            Assertions.assertEquals(0L, this.sqlContext.sql("select a._hoodie_record_key from bootstrapped a where a._hoodie_record_key not in (select _row_key from original)").count());
        }
        reloadInputFormats();
        List<GenericRecord> recordsUsingInputFormat = HoodieMergeOnReadTestUtils.getRecordsUsingInputFormat(this.jsc.hadoopConfiguration(), (List) FSUtils.getAllPartitionPaths(this.context, this.basePath, false, false, false).stream().map(str3 -> {
            return this.basePath + "/" + str3;
        }).collect(Collectors.toList()), this.basePath, this.roJobConf, false, schema, TRIP_HIVE_COLUMN_TYPES, false, new ArrayList());
        Assertions.assertEquals(i, recordsUsingInputFormat.size());
        HashSet hashSet = new HashSet();
        for (GenericRecord genericRecord : recordsUsingInputFormat) {
            Assertions.assertEquals(genericRecord.get("_row_key").toString(), genericRecord.get("_hoodie_record_key").toString(), "Record :" + genericRecord);
            Assertions.assertEquals(j2, ((LongWritable) genericRecord.get("timestamp")).get(), 0.1d, "Record :" + genericRecord);
            Assertions.assertFalse(hashSet.contains(genericRecord.get("_hoodie_record_key").toString()));
            hashSet.add(genericRecord.get("_hoodie_record_key").toString());
        }
        Assertions.assertEquals(i, hashSet.size());
        reloadInputFormats();
        HashSet hashSet2 = new HashSet();
        List<GenericRecord> recordsUsingInputFormat2 = HoodieMergeOnReadTestUtils.getRecordsUsingInputFormat(this.jsc.hadoopConfiguration(), (List) FSUtils.getAllPartitionPaths(this.context, this.basePath, false, false, false).stream().map(str4 -> {
            return this.basePath + "/" + str4;
        }).collect(Collectors.toList()), this.basePath, this.rtJobConf, true, schema, TRIP_HIVE_COLUMN_TYPES, false, new ArrayList());
        Assertions.assertEquals(i, recordsUsingInputFormat2.size());
        for (GenericRecord genericRecord2 : recordsUsingInputFormat2) {
            Assertions.assertEquals(genericRecord2.get("_row_key").toString(), genericRecord2.get("_hoodie_record_key").toString(), "Realtime Record :" + genericRecord2);
            Assertions.assertEquals(j, ((LongWritable) genericRecord2.get("timestamp")).get(), 0.1d, "Realtime Record :" + genericRecord2);
            Assertions.assertFalse(hashSet2.contains(genericRecord2.get("_hoodie_record_key").toString()));
            hashSet2.add(genericRecord2.get("_hoodie_record_key").toString());
        }
        Assertions.assertEquals(i, hashSet2.size());
        reloadInputFormats();
        List<GenericRecord> recordsUsingInputFormat3 = HoodieMergeOnReadTestUtils.getRecordsUsingInputFormat(this.jsc.hadoopConfiguration(), (List) FSUtils.getAllPartitionPaths(this.context, this.basePath, false, false, false).stream().map(str5 -> {
            return this.basePath + "/" + str5;
        }).collect(Collectors.toList()), this.basePath, this.roJobConf, false, schema, TRIP_HIVE_COLUMN_TYPES, true, HoodieRecord.HOODIE_META_COLUMNS);
        Assertions.assertEquals(i, recordsUsingInputFormat3.size());
        HashSet hashSet3 = new HashSet();
        for (GenericRecord genericRecord3 : recordsUsingInputFormat3) {
            Assertions.assertFalse(hashSet3.contains(genericRecord3.get("_hoodie_record_key").toString()));
            hashSet3.add(genericRecord3.get("_hoodie_record_key").toString());
        }
        Assertions.assertEquals(i, hashSet3.size());
        reloadInputFormats();
        HashSet hashSet4 = new HashSet();
        List<GenericRecord> recordsUsingInputFormat4 = HoodieMergeOnReadTestUtils.getRecordsUsingInputFormat(this.jsc.hadoopConfiguration(), (List) FSUtils.getAllPartitionPaths(this.context, this.basePath, false, false, false).stream().map(str6 -> {
            return this.basePath + "/" + str6;
        }).collect(Collectors.toList()), this.basePath, this.rtJobConf, true, schema, TRIP_HIVE_COLUMN_TYPES, true, HoodieRecord.HOODIE_META_COLUMNS);
        Assertions.assertEquals(i, recordsUsingInputFormat4.size());
        for (GenericRecord genericRecord4 : recordsUsingInputFormat4) {
            Assertions.assertFalse(hashSet4.contains(genericRecord4.get("_hoodie_record_key").toString()));
            hashSet4.add(genericRecord4.get("_hoodie_record_key").toString());
        }
        Assertions.assertEquals(i, hashSet4.size());
        reloadInputFormats();
        List<GenericRecord> recordsUsingInputFormat5 = HoodieMergeOnReadTestUtils.getRecordsUsingInputFormat(this.jsc.hadoopConfiguration(), (List) FSUtils.getAllPartitionPaths(this.context, this.basePath, false, false, false).stream().map(str7 -> {
            return this.basePath + "/" + str7;
        }).collect(Collectors.toList()), this.basePath, this.roJobConf, false, schema, TRIP_HIVE_COLUMN_TYPES, true, Arrays.asList("_row_key"));
        Assertions.assertEquals(i, recordsUsingInputFormat5.size());
        HashSet hashSet5 = new HashSet();
        for (GenericRecord genericRecord5 : recordsUsingInputFormat5) {
            Assertions.assertFalse(hashSet5.contains(genericRecord5.get("_row_key").toString()));
            hashSet5.add(genericRecord5.get("_row_key").toString());
        }
        Assertions.assertEquals(i, hashSet5.size());
        reloadInputFormats();
        HashSet hashSet6 = new HashSet();
        List<GenericRecord> recordsUsingInputFormat6 = HoodieMergeOnReadTestUtils.getRecordsUsingInputFormat(this.jsc.hadoopConfiguration(), (List) FSUtils.getAllPartitionPaths(this.context, this.basePath, false, false, false).stream().map(str8 -> {
            return this.basePath + "/" + str8;
        }).collect(Collectors.toList()), this.basePath, this.rtJobConf, true, schema, TRIP_HIVE_COLUMN_TYPES, true, Arrays.asList("_row_key"));
        Assertions.assertEquals(i, recordsUsingInputFormat6.size());
        for (GenericRecord genericRecord6 : recordsUsingInputFormat6) {
            Assertions.assertFalse(hashSet6.contains(genericRecord6.get("_row_key").toString()));
            hashSet6.add(genericRecord6.get("_row_key").toString());
        }
        Assertions.assertEquals(i, hashSet6.size());
    }

    /* JADX INFO: Access modifiers changed from: private */
    public static JavaRDD<HoodieRecord> generateInputBatch(JavaSparkContext javaSparkContext, List<Pair<String, List<HoodieFileStatus>>> list, Schema schema) {
        return javaSparkContext.parallelize((List) ((List) list.stream().flatMap(pair -> {
            return ((List) pair.getValue()).stream().map(hoodieFileStatus -> {
                return Pair.of(pair.getKey(), FileStatusUtils.toPath(hoodieFileStatus.getPath()));
            });
        }).collect(Collectors.toList())).stream().flatMap(pair2 -> {
            try {
                Configuration hadoopConfiguration = javaSparkContext.hadoopConfiguration();
                AvroReadSupport.setAvroReadSchema(hadoopConfiguration, schema);
                return StreamSupport.stream(Spliterators.spliteratorUnknownSize((Iterator) new ParquetReaderIterator(AvroParquetReader.builder((org.apache.hadoop.fs.Path) pair2.getValue()).withConf(hadoopConfiguration).build()), 0), false).map(genericRecord -> {
                    try {
                        String obj = genericRecord.get("_row_key").toString();
                        String str = (String) pair2.getKey();
                        return new HoodieRecord(new HoodieKey(obj, str), new RawTripTestPayload(genericRecord.toString(), obj, str, "{\"type\": \"record\",\"name\": \"triprec\",\"fields\": [ {\"name\": \"timestamp\",\"type\": \"long\"},{\"name\": \"_row_key\", \"type\": \"string\"},{\"name\": \"rider\", \"type\": \"string\"},{\"name\": \"driver\", \"type\": \"string\"},{\"name\": \"begin_lat\", \"type\": \"double\"},{\"name\": \"begin_lon\", \"type\": \"double\"},{\"name\": \"end_lat\", \"type\": \"double\"},{\"name\": \"end_lon\", \"type\": \"double\"},{\"name\": \"distance_in_meters\", \"type\": \"int\"},{\"name\": \"seconds_since_epoch\", \"type\": \"long\"},{\"name\": \"weight\", \"type\": \"float\"},{\"name\": \"nation\", \"type\": \"bytes\"},{\"name\":\"current_date\",\"type\": {\"type\": \"int\", \"logicalType\": \"date\"}},{\"name\":\"current_ts\",\"type\": {\"type\": \"long\"}},{\"name\":\"height\",\"type\":{\"type\":\"fixed\",\"name\":\"abc\",\"size\":5,\"logicalType\":\"decimal\",\"precision\":10,\"scale\":6}},{\"name\": \"city_to_state\", \"type\": {\"type\": \"map\", \"values\": \"string\"}},{\"name\": \"fare\",\"type\": {\"type\":\"record\", \"name\":\"fare\",\"fields\": [{\"name\": \"amount\",\"type\": \"double\"},{\"name\": \"currency\", \"type\": \"string\"}]}},{\"name\": \"tip_history\", \"default\": null, \"type\": {\"type\": \"array\", \"items\": {\"type\": \"record\", \"default\": null, \"name\": \"tip_history\", \"fields\": [{\"name\": \"amount\", \"type\": \"double\"}, {\"name\": \"currency\", \"type\": \"string\"}]}}},{\"name\": \"_hoodie_is_deleted\", \"type\": \"boolean\", \"default\": false} ]}"));
                    } catch (IOException e) {
                        throw new HoodieIOException(e.getMessage(), e);
                    }
                });
            } catch (IOException e) {
                throw new HoodieIOException(e.getMessage(), e);
            }
        }).collect(Collectors.toList()));
    }

    public HoodieWriteConfig.Builder getConfigBuilder(String str) {
        HoodieWriteConfig.Builder withExternalSchemaTrasformation = getConfigBuilder(str, HoodieIndex.IndexType.BLOOM).withExternalSchemaTrasformation(true);
        TypedProperties typedProperties = new TypedProperties();
        typedProperties.setProperty(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY(), "_row_key");
        typedProperties.setProperty(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY(), "datestr");
        return withExternalSchemaTrasformation.withProps(typedProperties);
    }

    public static Dataset<Row> generateTestRawTripDataset(long j, int i, int i2, List<String> list, JavaSparkContext javaSparkContext, SQLContext sQLContext) {
        boolean z = (list == null || list.isEmpty()) ? false : true;
        ArrayList arrayList = new ArrayList();
        IntStream.range(i, i2).forEach(i3 -> {
            String str = "" + i3;
            arrayList.add(HoodieTestDataGenerator.generateGenericRecord("trip_" + str, "rider_" + str, "driver_" + str, j, false, false).toString());
        });
        if (z) {
            sQLContext.udf().register("partgen", str -> {
                return URLEncoder.encode((String) list.get(Integer.parseInt(str.split("_")[1]) % list.size()), StandardCharsets.UTF_8.toString());
            }, DataTypes.StringType);
        }
        Dataset json = sQLContext.read().json(javaSparkContext.parallelize(arrayList));
        return z ? json.withColumn("datestr", functions.callUDF("partgen", new Column[]{new Column("_row_key")})).select("timestamp", new String[]{"_row_key", "rider", "driver", "begin_lat", "begin_lon", "end_lat", "end_lon", "fare", "tip_history", "_hoodie_is_deleted", "datestr"}) : json.select("timestamp", new String[]{"_row_key", "rider", "driver", "begin_lat", "begin_lon", "end_lat", "end_lon", "fare", "tip_history", "_hoodie_is_deleted"});
    }

    private static /* synthetic */ Object $deserializeLambda$(SerializedLambda serializedLambda) {
        String implMethodName = serializedLambda.getImplMethodName();
        boolean z = -1;
        switch (implMethodName.hashCode()) {
            case -20286129:
                if (implMethodName.equals("lambda$generateTestRawTripDataset$534cd30a$1")) {
                    z = false;
                    break;
                }
                break;
        }
        switch (z) {
            case false:
                if (serializedLambda.getImplMethodKind() == 6 && serializedLambda.getFunctionalInterfaceClass().equals("org/apache/spark/sql/api/java/UDF1") && serializedLambda.getFunctionalInterfaceMethodName().equals("call") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/lang/Object;)Ljava/lang/Object;") && serializedLambda.getImplClass().equals("org/apache/hudi/client/TestBootstrap") && serializedLambda.getImplMethodSignature().equals("(Ljava/util/List;Ljava/lang/String;)Ljava/lang/String;")) {
                    List list = (List) serializedLambda.getCapturedArg(0);
                    return str -> {
                        return URLEncoder.encode((String) list.get(Integer.parseInt(str.split("_")[1]) % list.size()), StandardCharsets.UTF_8.toString());
                    };
                }
                break;
        }
        throw new IllegalArgumentException("Invalid lambda deserialization");
    }
}
