package org.apache.hudi.functional;

import java.io.IOException;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.stream.Collectors;
import org.apache.hudi.avro.model.HoodieClusteringGroup;
import org.apache.hudi.avro.model.HoodieClusteringPlan;
import org.apache.hudi.client.WriteStatus;
import org.apache.hudi.common.config.HoodieStorageConfig;
import org.apache.hudi.common.model.HoodieTableType;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
import org.apache.hudi.common.table.view.FileSystemViewStorageConfig;
import org.apache.hudi.common.table.view.FileSystemViewStorageType;
import org.apache.hudi.common.testutils.HoodieTestDataGenerator;
import org.apache.hudi.common.testutils.HoodieTestUtils;
import org.apache.hudi.common.util.ClusteringUtils;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.config.HoodieClusteringConfig;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.keygen.constant.KeyGeneratorOptions;
import org.apache.hudi.table.action.cluster.ClusteringPlanPartitionFilterMode;
import org.apache.hudi.testutils.HoodieSparkClientTestHarness;
import org.apache.hudi.testutils.MetadataMergeWriteStatus;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.sql.Row;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;

/* loaded from: input_file:org/apache/hudi/functional/TestSparkSortAndSizeClustering.class */
public class TestSparkSortAndSizeClustering extends HoodieSparkClientTestHarness {
    private HoodieWriteConfig config;
    private HoodieTestDataGenerator dataGen = new HoodieTestDataGenerator(0);

    public void setup(int i) throws IOException {
        setup(i, Collections.emptyMap());
    }

    public void setup(int i, Map<String, String> map) throws IOException {
        initPath();
        initSparkContexts();
        initTestDataGenerator();
        initFileSystem();
        Properties propertiesForKeyGen = getPropertiesForKeyGen(true);
        propertiesForKeyGen.putAll(map);
        propertiesForKeyGen.setProperty(KeyGeneratorOptions.RECORDKEY_FIELD_NAME.key(), "_row_key");
        this.metaClient = HoodieTestUtils.init(this.hadoopConf, this.basePath, HoodieTableType.COPY_ON_WRITE, propertiesForKeyGen);
        this.config = getConfigBuilder().withProps(propertiesForKeyGen).withAutoCommit(false).withStorageConfig(HoodieStorageConfig.newBuilder().parquetMaxFileSize(i).build()).withClusteringConfig(HoodieClusteringConfig.newBuilder().withClusteringPlanPartitionFilterMode(ClusteringPlanPartitionFilterMode.RECENT_DAYS).build()).build();
        this.writeClient = getHoodieWriteClient(this.config);
    }

    @AfterEach
    public void tearDown() throws IOException {
        cleanupResources();
    }

    @Test
    public void testClusteringWithRDD() throws IOException {
        writeAndClustering(false);
    }

    @Test
    public void testClusteringWithRow() throws IOException {
        writeAndClustering(true);
    }

    public void writeAndClustering(boolean z) throws IOException {
        setup(102400);
        this.config.setValue("hoodie.datasource.write.row.writer.enable", String.valueOf(z));
        this.config.setValue("hoodie.metadata.enable", "false");
        this.config.setValue("hoodie.clustering.plan.strategy.daybased.lookback.partitions", "1");
        this.config.setValue("hoodie.clustering.plan.strategy.target.file.max.bytes", String.valueOf(1048576));
        this.config.setValue("hoodie.clustering.plan.strategy.max.bytes.per.group", String.valueOf(2097152));
        writeData(this.writeClient.createNewInstantTime(), 1000, true);
        String str = (String) this.writeClient.scheduleClustering(Option.empty()).get();
        HoodieClusteringPlan hoodieClusteringPlan = (HoodieClusteringPlan) ClusteringUtils.getClusteringPlan(this.metaClient, HoodieTimeline.getReplaceCommitRequestedInstant(str)).map((v0) -> {
            return v0.getRight();
        }).get();
        Assertions.assertEquals(1, hoodieClusteringPlan.getInputGroups().size(), "Clustering plan will contain 1 input group");
        Assertions.assertEquals(2, ((HoodieClusteringGroup) hoodieClusteringPlan.getInputGroups().get(0)).getNumOutputFileGroups(), "Clustering plan will generate 2 output groups");
        Assertions.assertEquals(2, ((List) this.writeClient.cluster(str, true).getWriteStats().get()).size(), "Clustering should write 2 files");
        Assertions.assertEquals(1000, readRecords().size());
    }

    private List<WriteStatus> writeData(String str, int i, boolean z) {
        JavaRDD parallelize = this.jsc.parallelize(this.dataGen.generateInserts(str, Integer.valueOf(i)));
        this.metaClient = HoodieTableMetaClient.reload(this.metaClient);
        this.writeClient.startCommitWithTime(str);
        List<WriteStatus> collect = this.writeClient.insert(parallelize, str).collect();
        org.apache.hudi.testutils.Assertions.assertNoWriteErrors(collect);
        if (z) {
            Assertions.assertTrue(this.writeClient.commitStats(str, this.context.parallelize(collect, 1), (List) collect.stream().map((v0) -> {
                return v0.getStat();
            }).collect(Collectors.toList()), Option.empty(), this.metaClient.getCommitActionType()));
        }
        this.metaClient = HoodieTableMetaClient.reload(this.metaClient);
        return collect;
    }

    private List<Row> readRecords() {
        this.sparkSession.read().format("hudi").load(this.basePath + "/*/*/*/*").createOrReplaceTempView("clutering_table");
        return this.sparkSession.sqlContext().sql("select * from clutering_table").collectAsList();
    }

    public HoodieWriteConfig.Builder getConfigBuilder() {
        return HoodieWriteConfig.newBuilder().withPath(this.basePath).withSchema("{\"type\": \"record\",\"name\": \"triprec\",\"fields\": [ {\"name\": \"timestamp\",\"type\": \"long\"},{\"name\": \"_row_key\", \"type\": \"string\"},{\"name\": \"partition_path\", \"type\": [\"null\", \"string\"], \"default\": null },{\"name\": \"trip_type\", \"type\": {\"type\": \"enum\", \"name\": \"TripType\", \"symbols\": [\"UNKNOWN\", \"UBERX\", \"BLACK\"], \"default\": \"UNKNOWN\"}},{\"name\": \"rider\", \"type\": \"string\"},{\"name\": \"driver\", \"type\": \"string\"},{\"name\": \"begin_lat\", \"type\": \"double\"},{\"name\": \"begin_lon\", \"type\": \"double\"},{\"name\": \"end_lat\", \"type\": \"double\"},{\"name\": \"end_lon\", \"type\": \"double\"},{\"name\": \"distance_in_meters\", \"type\": \"int\"},{\"name\": \"seconds_since_epoch\", \"type\": \"long\"},{\"name\": \"weight\", \"type\": \"float\"},{\"name\": \"nation\", \"type\": \"bytes\"},{\"name\":\"current_date\",\"type\": {\"type\": \"int\", \"logicalType\": \"date\"}},{\"name\":\"current_ts\",\"type\": {\"type\": \"long\"}},{\"name\":\"height\",\"type\":{\"type\":\"fixed\",\"name\":\"abc\",\"size\":5,\"logicalType\":\"decimal\",\"precision\":10,\"scale\":6}},{\"name\": \"city_to_state\", \"type\": {\"type\": \"map\", \"values\": \"string\"}},{\"name\": \"fare\",\"type\": {\"type\":\"record\", \"name\":\"fare\",\"fields\": [{\"name\": \"amount\",\"type\": \"double\"},{\"name\": \"currency\", \"type\": \"string\"}]}},{\"name\": \"tip_history\", \"default\": [], \"type\": {\"type\": \"array\", \"default\": [], \"items\": {\"type\": \"record\", \"default\": null, \"name\": \"tip_history\", \"fields\": [{\"name\": \"amount\", \"type\": \"double\"}, {\"name\": \"currency\", \"type\": \"string\"}]}}},{\"name\": \"_hoodie_is_deleted\", \"type\": \"boolean\", \"default\": false} ]}").withParallelism(2, 2).withWriteStatusClass(MetadataMergeWriteStatus.class).forTable("clustering-table").withEmbeddedTimelineServerEnabled(true).withFileSystemViewConfig(FileSystemViewStorageConfig.newBuilder().withStorageType(FileSystemViewStorageType.EMBEDDED_KV_STORE).build());
    }
}
