package org.apache.hudi.utilities.sources;

import java.io.IOException;
import java.nio.file.Path;
import java.util.Arrays;
import java.util.List;
import java.util.Properties;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericData;
import org.apache.hudi.client.SparkRDDWriteClient;
import org.apache.hudi.common.config.HoodieMetadataConfig;
import org.apache.hudi.common.config.TypedProperties;
import org.apache.hudi.common.model.HoodieAvroPayload;
import org.apache.hudi.common.model.HoodieAvroRecord;
import org.apache.hudi.common.model.HoodieKey;
import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion;
import org.apache.hudi.common.testutils.SchemaTestUtil;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.config.HoodieArchivalConfig;
import org.apache.hudi.config.HoodieCleanConfig;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.testutils.SparkClientFunctionalTestHarness;
import org.apache.hudi.utilities.schema.FilebasedSchemaProvider;
import org.apache.hudi.utilities.schema.SchemaProvider;
import org.apache.hudi.utilities.sources.helpers.IncrSourceHelper;
import org.apache.hudi.utilities.sources.helpers.gcs.FileDataFetcher;
import org.apache.hudi.utilities.sources.helpers.gcs.FilePathsFetcher;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.SparkSession;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.io.TempDir;
import org.mockito.ArgumentMatchers;
import org.mockito.Mock;
import org.mockito.Mockito;
import org.mockito.MockitoAnnotations;

/* loaded from: input_file:org/apache/hudi/utilities/sources/TestGcsEventsHoodieIncrSource.class */
public class TestGcsEventsHoodieIncrSource extends SparkClientFunctionalTestHarness {

    @TempDir
    protected Path tempDir;

    @Mock
    FilePathsFetcher filePathsFetcher;

    @Mock
    FileDataFetcher fileDataFetcher;
    protected FilebasedSchemaProvider schemaProvider;
    private HoodieTableMetaClient metaClient;
    private static final Logger LOG = LogManager.getLogger(TestGcsEventsHoodieIncrSource.class);

    /* loaded from: input_file:org/apache/hudi/utilities/sources/TestGcsEventsHoodieIncrSource$GcsDataRecord.class */
    public static class GcsDataRecord {
        public String id;
        public String text;

        public GcsDataRecord(String str, String str2) {
            this.id = str;
            this.text = str2;
        }

        public String getId() {
            return this.id;
        }

        public String getText() {
            return this.text;
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:org/apache/hudi/utilities/sources/TestGcsEventsHoodieIncrSource$MetadataSchemaProvider.class */
    public static class MetadataSchemaProvider extends SchemaProvider {
        private final Schema schema;

        public MetadataSchemaProvider() {
            super(new TypedProperties());
            this.schema = SchemaTestUtil.getSchemaFromResource(TestGcsEventsHoodieIncrSource.class, "/delta-streamer-config/gcs-metadata.avsc", true);
        }

        public Schema getSourceSchema() {
            return this.schema;
        }
    }

    @BeforeEach
    public void setUp() throws IOException {
        this.metaClient = getHoodieMetaClient(hadoopConf(), basePath());
        MockitoAnnotations.initMocks(this);
    }

    public String basePath() {
        return this.tempDir.toAbsolutePath().toUri().toString();
    }

    @Test
    public void shouldNotFindNewDataIfCommitTimeOfWriteAndReadAreEqual() throws IOException {
        readAndAssert(IncrSourceHelper.MissingCheckpointStrategy.READ_UPTO_LATEST_COMMIT, Option.of("1"), 0, (String) writeGcsMetadataRecords("1").getKey());
        ((FilePathsFetcher) Mockito.verify(this.filePathsFetcher, Mockito.times(0))).getGcsFilePaths((JavaSparkContext) Mockito.any(), (Dataset) Mockito.any(), ArgumentMatchers.anyBoolean());
        ((FileDataFetcher) Mockito.verify(this.fileDataFetcher, Mockito.times(0))).fetchFileData((SparkSession) Mockito.any(), (List) Mockito.any(), (TypedProperties) Mockito.any());
    }

    @Test
    public void shouldFetchDataIfCommitTimeForReadsLessThanForWrites() throws IOException {
        Pair<String, List<HoodieRecord>> writeGcsMetadataRecords = writeGcsMetadataRecords("2");
        List asList = Arrays.asList("data-file-1.json", "data-file-2.json");
        Mockito.when(this.filePathsFetcher.getGcsFilePaths((JavaSparkContext) Mockito.any(), (Dataset) Mockito.any(), ArgumentMatchers.anyBoolean())).thenReturn(asList);
        Mockito.when(this.fileDataFetcher.fetchFileData((SparkSession) Mockito.any(), (List) ArgumentMatchers.eq(asList), (TypedProperties) Mockito.any())).thenReturn(Option.of(spark().createDataFrame(Arrays.asList(new GcsDataRecord("1", "Hello 1"), new GcsDataRecord("2", "Hello 2"), new GcsDataRecord("3", "Hello 3"), new GcsDataRecord("4", "Hello 4")), GcsDataRecord.class)));
        readAndAssert(IncrSourceHelper.MissingCheckpointStrategy.READ_UPTO_LATEST_COMMIT, Option.of("1"), 4, (String) writeGcsMetadataRecords.getKey());
        ((FilePathsFetcher) Mockito.verify(this.filePathsFetcher, Mockito.times(1))).getGcsFilePaths((JavaSparkContext) Mockito.any(), (Dataset) Mockito.any(), ArgumentMatchers.anyBoolean());
        ((FileDataFetcher) Mockito.verify(this.fileDataFetcher, Mockito.times(1))).fetchFileData((SparkSession) Mockito.any(), (List) ArgumentMatchers.eq(asList), (TypedProperties) Mockito.any());
    }

    private void readAndAssert(IncrSourceHelper.MissingCheckpointStrategy missingCheckpointStrategy, Option<String> option, int i, String str) {
        Pair fetchNextBatch = new GcsEventsHoodieIncrSource(setProps(missingCheckpointStrategy), jsc(), spark(), this.schemaProvider, this.filePathsFetcher, this.fileDataFetcher).fetchNextBatch(option, 100L);
        Option option2 = (Option) fetchNextBatch.getLeft();
        String str2 = (String) fetchNextBatch.getRight();
        Assertions.assertNotNull(str2);
        if (i == 0) {
            Assertions.assertFalse(option2.isPresent());
        } else {
            Assertions.assertEquals(((Dataset) option2.get()).count(), i);
        }
        Assertions.assertEquals(str2, str);
    }

    private HoodieRecord getGcsMetadataRecord(String str, String str2, String str3, String str4) {
        Schema sourceSchema = new MetadataSchemaProvider().getSourceSchema();
        LOG.info("schema: " + sourceSchema);
        String str5 = "id:" + str3 + "/" + str2 + "/" + str4;
        String format = String.format("https://storage.googleapis.com/download/storage/v1/b/%s/o/%s?generation=%s&alt=media", str3, str2, str4);
        String format2 = String.format("https://www.googleapis.com/storage/v1/b/%s/o/%s", str3, str2);
        GenericData.Record record = new GenericData.Record(sourceSchema);
        record.put("_row_key", str5);
        record.put("partition_path", str3);
        record.put("timestamp", Long.valueOf(Long.parseLong(str)));
        record.put("bucket", str3);
        record.put("contentLanguage", "en");
        record.put("contentType", "application/octet-stream");
        record.put("crc32c", "oRB3Aw==");
        record.put("etag", "CP7EwYCu6/kCEAE=");
        record.put("generation", str4);
        record.put("id", str5);
        record.put("kind", "storage#object");
        record.put("md5Hash", "McsS8FkcDSrB3cGfb18ysA==");
        record.put("mediaLink", format);
        record.put("metageneration", "1");
        record.put("name", str2);
        record.put("selfLink", format2);
        record.put("size", "370");
        record.put("storageClass", "STANDARD");
        record.put("timeCreated", "2022-08-29T05:52:55.869Z");
        record.put("timeStorageClassUpdated", "2022-08-29T05:52:55.869Z");
        record.put("updated", "2022-08-29T05:52:55.869Z");
        return new HoodieAvroRecord(new HoodieKey(str5, str3), new HoodieAvroPayload(Option.of(record)));
    }

    private HoodieWriteConfig getWriteConfig() {
        return getConfigBuilder(basePath(), this.metaClient).withArchivalConfig(HoodieArchivalConfig.newBuilder().archiveCommitsWith(2, 3).build()).withCleanConfig(HoodieCleanConfig.newBuilder().retainCommits(1).build()).withMetadataConfig(HoodieMetadataConfig.newBuilder().withMaxNumDeltaCommitsBeforeCompaction(1).build()).build();
    }

    private Pair<String, List<HoodieRecord>> writeGcsMetadataRecords(String str) throws IOException {
        SparkRDDWriteClient hoodieWriteClient = getHoodieWriteClient(getWriteConfig());
        hoodieWriteClient.startCommitWithTime(str);
        List asList = Arrays.asList(getGcsMetadataRecord(str, "data-file-1.json", "bucket-1", "1"), getGcsMetadataRecord(str, "data-file-2.json", "bucket-1", "1"), getGcsMetadataRecord(str, "data-file-3.json", "bucket-1", "1"), getGcsMetadataRecord(str, "data-file-4.json", "bucket-1", "1"));
        org.apache.hudi.testutils.Assertions.assertNoWriteErrors(hoodieWriteClient.upsert(jsc().parallelize(asList, 1), str).collect());
        return Pair.of(str, asList);
    }

    private TypedProperties setProps(IncrSourceHelper.MissingCheckpointStrategy missingCheckpointStrategy) {
        Properties properties = new Properties();
        properties.setProperty("hoodie.deltastreamer.source.hoodieincr.path", basePath());
        properties.setProperty("hoodie.deltastreamer.source.hoodieincr.missing.checkpoint.strategy", missingCheckpointStrategy.name());
        properties.setProperty("hoodie.deltastreamer.source.gcsincr.datafile.format", "json");
        return new TypedProperties(properties);
    }

    private HoodieWriteConfig.Builder getConfigBuilder(String str, HoodieTableMetaClient hoodieTableMetaClient) {
        return HoodieWriteConfig.newBuilder().withPath(str).withSchema(new MetadataSchemaProvider().getSourceSchema().toString()).withParallelism(2, 2).withBulkInsertParallelism(2).withFinalizeWriteParallelism(2).withDeleteParallelism(2).withTimelineLayoutVersion(TimelineLayoutVersion.CURR_VERSION.intValue()).forTable(hoodieTableMetaClient.getTableConfig().getTableName());
    }
}
