package org.apache.hudi.utilities.sources.helpers;

import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import org.apache.hudi.common.config.TypedProperties;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.testutils.HoodieSparkClientTestHarness;
import org.apache.hudi.utilities.schema.FilebasedSchemaProvider;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.RowFactory;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;

/* loaded from: input_file:org/apache/hudi/utilities/sources/helpers/TestCloudObjectsSelectorCommon.class */
public class TestCloudObjectsSelectorCommon extends HoodieSparkClientTestHarness {
    @BeforeEach
    void setUp() {
        initSparkContexts();
    }

    @AfterEach
    public void teardown() throws Exception {
        cleanupResources();
    }

    @Test
    public void emptyMetadataReturnsEmptyOption() {
        Assertions.assertFalse(new CloudObjectsSelectorCommon(new TypedProperties()).loadAsDataset(this.sparkSession, Collections.emptyList(), "json", Option.empty(), 1).isPresent());
    }

    @Test
    public void filesFromMetadataRead() {
        Option loadAsDataset = new CloudObjectsSelectorCommon(new TypedProperties()).loadAsDataset(this.sparkSession, Collections.singletonList(new CloudObjectMetadata("src/test/resources/data/partitioned/country=US/state=CA/data.json", 1L)), "json", Option.empty(), 1);
        Assertions.assertTrue(loadAsDataset.isPresent());
        Assertions.assertEquals(1L, ((Dataset) loadAsDataset.get()).count());
        Assertions.assertEquals(Collections.singletonList(RowFactory.create(new Object[]{"some data"})), ((Dataset) loadAsDataset.get()).collectAsList());
    }

    @Test
    public void partitionValueAddedToRow() {
        List singletonList = Collections.singletonList(new CloudObjectMetadata("src/test/resources/data/partitioned/country=US/state=CA/data.json", 1L));
        TypedProperties typedProperties = new TypedProperties();
        typedProperties.put("hoodie.streamer.source.cloud.data.partition.fields.from.path", "country,state");
        Option loadAsDataset = new CloudObjectsSelectorCommon(typedProperties).loadAsDataset(this.sparkSession, singletonList, "json", Option.empty(), 1);
        Assertions.assertTrue(loadAsDataset.isPresent());
        Assertions.assertEquals(1L, ((Dataset) loadAsDataset.get()).count());
        Assertions.assertEquals(Collections.singletonList(RowFactory.create(new Object[]{"some data", "US", "CA"})), ((Dataset) loadAsDataset.get()).collectAsList());
    }

    @Test
    public void loadDatasetWithSchema() {
        TypedProperties typedProperties = new TypedProperties();
        TestCloudObjectsSelectorCommon.class.getClassLoader().getResource("schema/sample_data_schema.avsc");
        typedProperties.put("hoodie.streamer.schemaprovider.source.schema.file", TestCloudObjectsSelectorCommon.class.getClassLoader().getResource("schema/sample_data_schema.avsc").getPath());
        typedProperties.put("hoodie.streamer.schema.provider.class.name", FilebasedSchemaProvider.class.getName());
        typedProperties.put("hoodie.streamer.source.cloud.data.partition.fields.from.path", "country,state");
        Option loadAsDataset = new CloudObjectsSelectorCommon(typedProperties).loadAsDataset(this.sparkSession, Collections.singletonList(new CloudObjectMetadata("src/test/resources/data/partitioned/country=US/state=CA/data.json", 1L)), "json", Option.of(new FilebasedSchemaProvider(typedProperties, this.jsc)), 1);
        Assertions.assertTrue(loadAsDataset.isPresent());
        Assertions.assertEquals(1L, ((Dataset) loadAsDataset.get()).count());
        Assertions.assertEquals(Collections.singletonList(RowFactory.create(new Object[]{"some data", "US", "CA"})), ((Dataset) loadAsDataset.get()).collectAsList());
    }

    @Test
    public void loadDatasetWithSchemaAndRepartition() {
        TypedProperties typedProperties = new TypedProperties();
        TestCloudObjectsSelectorCommon.class.getClassLoader().getResource("schema/sample_data_schema.avsc");
        typedProperties.put("hoodie.streamer.schemaprovider.source.schema.file", TestCloudObjectsSelectorCommon.class.getClassLoader().getResource("schema/sample_data_schema.avsc").getPath());
        typedProperties.put("hoodie.streamer.schema.provider.class.name", FilebasedSchemaProvider.class.getName());
        typedProperties.put("hoodie.streamer.source.cloud.data.partition.fields.from.path", "country,state");
        typedProperties.put("hoodie.streamer.source.cloud.data.partition.max.size", "1");
        Option loadAsDataset = new CloudObjectsSelectorCommon(typedProperties).loadAsDataset(this.sparkSession, Arrays.asList(new CloudObjectMetadata("src/test/resources/data/partitioned/country=US/state=CA/data.json", 1000L), new CloudObjectMetadata("src/test/resources/data/partitioned/country=US/state=TX/data.json", 1000L), new CloudObjectMetadata("src/test/resources/data/partitioned/country=IND/state=TS/data.json", 1000L)), "json", Option.of(new FilebasedSchemaProvider(typedProperties, this.jsc)), 30);
        Assertions.assertTrue(loadAsDataset.isPresent());
        Assertions.assertEquals(new HashSet(Arrays.asList(RowFactory.create(new Object[]{"some data", "US", "CA"}), RowFactory.create(new Object[]{"some data", "US", "TX"}), RowFactory.create(new Object[]{"some data", "IND", "TS"}))), new HashSet(((Dataset) loadAsDataset.get()).collectAsList()));
    }

    @Test
    public void partitionKeyNotPresentInPath() {
        List singletonList = Collections.singletonList(new CloudObjectMetadata("src/test/resources/data/partitioned/country=US/state=CA/data.json", 1L));
        TypedProperties typedProperties = new TypedProperties();
        typedProperties.put("hoodie.deltastreamer.source.cloud.data.reader.comma.separated.path.format", "false");
        typedProperties.put("hoodie.deltastreamer.source.cloud.data.partition.fields.from.path", "unknown");
        Option loadAsDataset = new CloudObjectsSelectorCommon(typedProperties).loadAsDataset(this.sparkSession, singletonList, "json", Option.empty(), 1);
        Assertions.assertTrue(loadAsDataset.isPresent());
        Assertions.assertEquals(1L, ((Dataset) loadAsDataset.get()).count());
        Assertions.assertEquals(Collections.singletonList(RowFactory.create(new Object[]{"some data", null})), ((Dataset) loadAsDataset.get()).collectAsList());
    }
}
