package org.apache.hudi.utilities;

import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.attribute.FileAttribute;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.TreeSet;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import jodd.io.FileUtil;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hudi.DataSourceWriteOptions;
import org.apache.hudi.client.common.HoodieSparkEngineContext;
import org.apache.hudi.common.config.HoodieMetadataConfig;
import org.apache.hudi.common.config.HoodieTimeGeneratorConfig;
import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.common.fs.FSUtils;
import org.apache.hudi.common.model.FileSlice;
import org.apache.hudi.common.model.HoodieBaseFile;
import org.apache.hudi.common.model.HoodieColumnRangeMetadata;
import org.apache.hudi.common.model.HoodieFileGroup;
import org.apache.hudi.common.model.HoodieLogFile;
import org.apache.hudi.common.model.WriteOperationType;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
import org.apache.hudi.common.table.timeline.TimeGenerator;
import org.apache.hudi.common.table.timeline.TimeGenerators;
import org.apache.hudi.common.table.timeline.TimelineUtils;
import org.apache.hudi.common.table.view.FileSystemViewStorageType;
import org.apache.hudi.common.table.view.HoodieTableFileSystemView;
import org.apache.hudi.common.testutils.HoodieTestUtils;
import org.apache.hudi.common.testutils.RawTripTestPayload;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.config.HoodieCompactionConfig;
import org.apache.hudi.exception.HoodieException;
import org.apache.hudi.exception.HoodieIOException;
import org.apache.hudi.exception.HoodieValidationException;
import org.apache.hudi.hadoop.fs.HadoopFSUtils;
import org.apache.hudi.storage.HoodieStorage;
import org.apache.hudi.storage.StoragePath;
import org.apache.hudi.storage.StoragePathInfo;
import org.apache.hudi.storage.hadoop.HoodieHadoopStorage;
import org.apache.hudi.testutils.HoodieSparkClientTestBase;
import org.apache.hudi.utilities.HoodieMetadataTableValidator;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.RowFactory;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.types.DataTypes;
import org.apache.spark.sql.types.Metadata;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructType;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;
import org.junit.jupiter.params.provider.ValueSource;
import org.mockito.Mockito;

/* loaded from: input_file:org/apache/hudi/utilities/TestHoodieMetadataTableValidator.class */
public class TestHoodieMetadataTableValidator extends HoodieSparkClientTestBase {

    /* loaded from: input_file:org/apache/hudi/utilities/TestHoodieMetadataTableValidator$MockHoodieMetadataTableValidator.class */
    class MockHoodieMetadataTableValidator extends HoodieMetadataTableValidator {
        private List<String> metadataPartitionsToReturn;
        private List<String> fsPartitionsToReturn;
        private Option<String> partitionCreationTime;

        public MockHoodieMetadataTableValidator(JavaSparkContext javaSparkContext, HoodieMetadataTableValidator.Config config) {
            super(javaSparkContext, config);
        }

        void setMetadataPartitionsToReturn(List<String> list) {
            this.metadataPartitionsToReturn = list;
        }

        void setFsPartitionsToReturn(List<String> list) {
            this.fsPartitionsToReturn = list;
        }

        void setPartitionCreationTime(Option<String> option) {
            this.partitionCreationTime = option;
        }

        List<String> getPartitionsFromFileSystem(HoodieEngineContext hoodieEngineContext, StoragePath storagePath, HoodieStorage hoodieStorage, HoodieTimeline hoodieTimeline) {
            return this.fsPartitionsToReturn;
        }

        List<String> getPartitionsFromMDT(HoodieEngineContext hoodieEngineContext, StoragePath storagePath, HoodieStorage hoodieStorage) {
            return this.metadataPartitionsToReturn;
        }

        Option<String> getPartitionCreationInstant(HoodieStorage hoodieStorage, StoragePath storagePath, String str) {
            return this.partitionCreationTime;
        }
    }

    /* loaded from: input_file:org/apache/hudi/utilities/TestHoodieMetadataTableValidator$MockHoodieMetadataTableValidatorForRli.class */
    static class MockHoodieMetadataTableValidatorForRli extends HoodieMetadataTableValidator {
        private String destFilePath;
        private String originalFilePath;

        public MockHoodieMetadataTableValidatorForRli(JavaSparkContext javaSparkContext, HoodieMetadataTableValidator.Config config) {
            super(javaSparkContext, config);
        }

        JavaPairRDD<String, Pair<String, String>> getRecordLocationsFromRLI(HoodieSparkEngineContext hoodieSparkEngineContext, String str, String str2) {
            try {
                FileUtil.move(this.destFilePath, this.originalFilePath);
                return super.getRecordLocationsFromRLI(hoodieSparkEngineContext, str, str2);
            } catch (IOException e) {
                throw new HoodieException("Move should not have failed");
            }
        }

        public void setDestFilePath(String str) {
            this.destFilePath = str;
        }

        public void setOriginalFilePath(String str) {
            this.originalFilePath = str;
        }
    }

    private static Stream<Arguments> lastNFileSlicesTestArgs() {
        return Stream.of((Object[]) new Integer[]{-1, 1, 3, 4, 5}).flatMap(num -> {
            return Stream.of((Object[]) new Arguments[]{Arguments.of(new Object[]{num, true}), Arguments.of(new Object[]{num, false})});
        });
    }

    private static Stream<Arguments> viewStorageArgs() {
        return Stream.of((Object[]) new Arguments[]{Arguments.of(new Object[]{null, null, false}), Arguments.of(new Object[]{FileSystemViewStorageType.MEMORY.name(), FileSystemViewStorageType.MEMORY.name(), true}), Arguments.of(new Object[]{FileSystemViewStorageType.SPILLABLE_DISK.name(), FileSystemViewStorageType.SPILLABLE_DISK.name(), false}), Arguments.of(new Object[]{FileSystemViewStorageType.MEMORY.name(), FileSystemViewStorageType.SPILLABLE_DISK.name(), true})});
    }

    @Test
    public void testAggregateColumnStats() {
        HoodieColumnRangeMetadata create = HoodieColumnRangeMetadata.create("path/to/file1", "col1", 1, 5, 0L, 10L, 100L, 200L);
        HoodieColumnRangeMetadata create2 = HoodieColumnRangeMetadata.create("path/to/file1", "col1", 1, 10, 5L, 10L, 100L, 200L);
        HoodieColumnRangeMetadata create3 = HoodieColumnRangeMetadata.create("path/to/file1", "col2", 3, 8, 1L, 15L, 120L, 250L);
        HoodieColumnRangeMetadata create4 = HoodieColumnRangeMetadata.create("path/to/file1", "col2", 5, 9, 4L, 5L, 80L, 150L);
        ArrayList arrayList = new ArrayList();
        arrayList.add(create);
        arrayList.add(create2);
        arrayList.add(create3);
        arrayList.add(create4);
        int i = 0;
        int i2 = 0;
        TreeSet aggregateColumnStats = HoodieMetadataTableValidator.aggregateColumnStats("path/to/file1", arrayList);
        Assertions.assertEquals(2, aggregateColumnStats.size());
        Iterator it = aggregateColumnStats.iterator();
        while (it.hasNext()) {
            HoodieColumnRangeMetadata hoodieColumnRangeMetadata = (HoodieColumnRangeMetadata) it.next();
            if (hoodieColumnRangeMetadata.getColumnName().equals("col1")) {
                Assertions.assertEquals(1, hoodieColumnRangeMetadata.getMinValue());
                Assertions.assertEquals(10, hoodieColumnRangeMetadata.getMaxValue());
                i++;
            } else if (hoodieColumnRangeMetadata.getColumnName().equals("col2")) {
                Assertions.assertEquals(3, hoodieColumnRangeMetadata.getMinValue());
                Assertions.assertEquals(9, hoodieColumnRangeMetadata.getMaxValue());
                i2++;
            }
            Assertions.assertEquals(5L, hoodieColumnRangeMetadata.getNullCount());
            Assertions.assertEquals(20L, hoodieColumnRangeMetadata.getValueCount());
            Assertions.assertEquals(200L, hoodieColumnRangeMetadata.getTotalSize());
            Assertions.assertEquals(400L, hoodieColumnRangeMetadata.getTotalUncompressedSize());
        }
        Assertions.assertEquals(1, i);
        Assertions.assertEquals(1, i2);
    }

    @MethodSource({"viewStorageArgs"})
    @ParameterizedTest
    public void testMetadataTableValidation(String str, String str2, boolean z) throws Exception {
        HashMap hashMap = new HashMap();
        hashMap.put(DataSourceWriteOptions.TABLE_NAME().key(), "test_table");
        hashMap.put(DataSourceWriteOptions.TABLE_TYPE().key(), "MERGE_ON_READ");
        hashMap.put(DataSourceWriteOptions.RECORDKEY_FIELD().key(), "_row_key");
        hashMap.put(DataSourceWriteOptions.PRECOMBINE_FIELD().key(), "timestamp");
        hashMap.put(DataSourceWriteOptions.PARTITIONPATH_FIELD().key(), "partition_path");
        makeInsertDf("000", 5).cache().write().format("hudi").options(hashMap).option(DataSourceWriteOptions.OPERATION().key(), WriteOperationType.BULK_INSERT.value()).option(HoodieMetadataConfig.RECORD_INDEX_ENABLE_PROP.key(), "true").option(HoodieMetadataConfig.RECORD_INDEX_MIN_FILE_GROUP_COUNT_PROP.key(), "1").option(HoodieMetadataConfig.RECORD_INDEX_MAX_FILE_GROUP_COUNT_PROP.key(), "1").mode(SaveMode.Overwrite).save(this.basePath);
        makeUpdateDf("001", 5).cache().write().format("hudi").options(hashMap).option(DataSourceWriteOptions.OPERATION().key(), WriteOperationType.UPSERT.value()).option(HoodieMetadataConfig.RECORD_INDEX_ENABLE_PROP.key(), "true").option(HoodieMetadataConfig.RECORD_INDEX_MIN_FILE_GROUP_COUNT_PROP.key(), "1").option(HoodieMetadataConfig.RECORD_INDEX_MAX_FILE_GROUP_COUNT_PROP.key(), "1").mode(SaveMode.Append).save(this.basePath);
        if (z) {
            Iterator it = this.storage.listFiles(new StoragePath(this.basePath)).iterator();
            while (true) {
                if (!it.hasNext()) {
                    break;
                }
                StoragePath path = ((StoragePathInfo) it.next()).getPath();
                if (FSUtils.isLogFile(path)) {
                    HadoopFSUtils.getFs(path, new Configuration(false)).copyFromLocalFile(new Path(path.toString()), new Path(path.toString().substring(0, path.toString().lastIndexOf("-") + 1) + "000"));
                    break;
                }
            }
        }
        HoodieMetadataTableValidator.Config config = new HoodieMetadataTableValidator.Config();
        config.basePath = "file:" + this.basePath;
        config.validateLatestFileSlices = true;
        config.validateAllFileGroups = true;
        if (str != null && str2 != null) {
            config.viewStorageTypeForFSListing = str;
            config.viewStorageTypeForMetadata = str2;
        }
        HoodieMetadataTableValidator hoodieMetadataTableValidator = new HoodieMetadataTableValidator(this.jsc, config);
        Assertions.assertTrue(hoodieMetadataTableValidator.run());
        Assertions.assertFalse(hoodieMetadataTableValidator.hasValidationFailure());
        Assertions.assertTrue(hoodieMetadataTableValidator.getThrowables().isEmpty());
    }

    @Test
    void missingLogFileFailsValidation() throws Exception {
        FileSystem fs = HadoopFSUtils.getFs(this.tempDir.toString(), new Configuration(false));
        HashMap hashMap = new HashMap();
        hashMap.put(DataSourceWriteOptions.TABLE_NAME().key(), "test_table");
        hashMap.put("hoodie.table.name", "test_table");
        hashMap.put(DataSourceWriteOptions.TABLE_TYPE().key(), "MERGE_ON_READ");
        hashMap.put(DataSourceWriteOptions.RECORDKEY_FIELD().key(), "_row_key");
        hashMap.put(DataSourceWriteOptions.PRECOMBINE_FIELD().key(), "timestamp");
        hashMap.put(DataSourceWriteOptions.PARTITIONPATH_FIELD().key(), "partition_path");
        makeInsertDf("000", 5).cache().write().format("hudi").options(hashMap).option(DataSourceWriteOptions.OPERATION().key(), WriteOperationType.BULK_INSERT.value()).option(HoodieMetadataConfig.RECORD_INDEX_ENABLE_PROP.key(), "true").option(HoodieMetadataConfig.RECORD_INDEX_MIN_FILE_GROUP_COUNT_PROP.key(), "1").option(HoodieMetadataConfig.RECORD_INDEX_MAX_FILE_GROUP_COUNT_PROP.key(), "1").mode(SaveMode.Overwrite).save(this.basePath);
        String str = this.basePath + "/.hoodie/metadata";
        String path = this.tempDir.resolve("backup").toString();
        org.apache.hadoop.fs.FileUtil.copy(fs, new Path(str), fs, new Path(path), false, fs.getConf());
        makeUpdateDf("001", 5).cache().write().format("hudi").options(hashMap).option(DataSourceWriteOptions.OPERATION().key(), WriteOperationType.UPSERT.value()).option(HoodieMetadataConfig.RECORD_INDEX_ENABLE_PROP.key(), "true").option(HoodieMetadataConfig.RECORD_INDEX_MIN_FILE_GROUP_COUNT_PROP.key(), "1").option(HoodieMetadataConfig.RECORD_INDEX_MAX_FILE_GROUP_COUNT_PROP.key(), "1").mode(SaveMode.Append).save(this.basePath);
        fs.delete(new Path(str), true);
        org.apache.hadoop.fs.FileUtil.copy(fs, new Path(path), fs, new Path(str), true, fs.getConf());
        HoodieMetadataTableValidator.Config config = new HoodieMetadataTableValidator.Config();
        config.basePath = "file:" + this.basePath;
        config.validateLatestFileSlices = true;
        config.validateAllFileGroups = true;
        config.ignoreFailed = true;
        HoodieMetadataTableValidator hoodieMetadataTableValidator = new HoodieMetadataTableValidator(this.jsc, config);
        Assertions.assertFalse(hoodieMetadataTableValidator.run());
        Assertions.assertTrue(hoodieMetadataTableValidator.hasValidationFailure());
        Assertions.assertFalse(hoodieMetadataTableValidator.getThrowables().isEmpty());
    }

    @Test
    public void testSecondaryIndexValidation() throws IOException {
        this.storage.deleteDirectory(this.metaClient.getBasePath());
        this.sparkSession.sql("create table tbl (ts bigint, record_key_col string, not_record_key_col string, partition_key_col string ) using hudi options (primaryKey = 'record_key_col', type = 'mor', hoodie.metadata.enable = 'true', hoodie.metadata.record.index.enable = 'true', hoodie.datasource.write.recordkey.field = 'record_key_col', hoodie.enable.data.skipping = 'true', hoodie.datasource.write.precombine.field = 'ts', hoodie.datasource.write.payload.class = 'org.apache.hudi.common.model.OverwriteWithLatestAvroPayload') partitioned by(partition_key_col) location '" + this.basePath + "'");
        getRowDataset(1, "row1", "abc", "p1").write().format("hudi").mode(SaveMode.Append).save(this.basePath);
        getRowDataset(2, "row2", "ghi", "p2").write().format("hudi").mode(SaveMode.Append).save(this.basePath);
        getRowDataset(3, "row3", "def", "p2").write().format("hudi").mode(SaveMode.Append).save(this.basePath);
        this.sparkSession.sql("create index idx_not_record_key_col on tbl (not_record_key_col)");
        validateSecondaryIndex();
        getRowDataset(1, "row1", "cde", "p1").write().format("hudi").option("hoodie.metadata.enable", "true").option("hoodie.metadata.record.index.enable", "true").mode(SaveMode.Append).save(this.basePath);
        validateSecondaryIndex();
    }

    @Test
    public void testGetFSSecondaryKeyToRecordKeys() throws IOException {
        this.storage.deleteDirectory(this.metaClient.getBasePath());
        this.sparkSession.sql("create table tbl (ts bigint, record_key_col string, not_record_key_col string, partition_key_col string ) using hudi options (primaryKey = 'record_key_col', type = 'mor', hoodie.metadata.enable = 'true', hoodie.metadata.record.index.enable = 'true', hoodie.datasource.write.recordkey.field = 'record_key_col', hoodie.enable.data.skipping = 'true', hoodie.datasource.write.precombine.field = 'ts') partitioned by(partition_key_col) location '" + this.basePath + "'");
        getRowDataset(1, "row1", "abc", "p1").write().format("hudi").mode(SaveMode.Append).save(this.basePath);
        getRowDataset(2, "row2", "cde", "p2").write().format("hudi").mode(SaveMode.Append).save(this.basePath);
        getRowDataset(3, "row3", "def", "p2").write().format("hudi").mode(SaveMode.Append).save(this.basePath);
        HoodieMetadataTableValidator.Config config = new HoodieMetadataTableValidator.Config();
        config.basePath = "file:" + this.basePath;
        config.validateLatestFileSlices = true;
        config.validateAllFileGroups = true;
        config.ignoreFailed = true;
        HoodieMetadataTableValidator hoodieMetadataTableValidator = new HoodieMetadataTableValidator(this.jsc, config);
        this.metaClient = HoodieTableMetaClient.reload(this.metaClient);
        int i = 1;
        for (String str : new String[]{"abc", "cde", "def"}) {
            int i2 = i;
            i++;
            Assertions.assertEquals(Collections.singleton("row" + i2), (Set) hoodieMetadataTableValidator.getFSSecondaryKeyToRecordKeys(new HoodieSparkEngineContext(this.jsc, this.sqlContext), this.basePath, ((HoodieInstant) this.metaClient.getActiveTimeline().lastInstant().get()).requestedTime(), "not_record_key_col", Collections.singletonList(str)).get(str));
        }
    }

    private Dataset<Row> getRowDataset(Object... objArr) {
        return this.sparkSession.createDataFrame(Collections.singletonList(RowFactory.create(objArr)), new StructType().add(new StructField("ts", DataTypes.IntegerType, true, Metadata.empty())).add(new StructField("record_key_col", DataTypes.StringType, true, Metadata.empty())).add(new StructField("not_record_key_col", DataTypes.StringType, true, Metadata.empty())).add(new StructField("partition_key_col", DataTypes.StringType, true, Metadata.empty())));
    }

    @ValueSource(strings = {"MERGE_ON_READ", "COPY_ON_WRITE"})
    @ParameterizedTest
    public void testColumnStatsValidation(String str) {
        HashMap hashMap = new HashMap();
        hashMap.put(DataSourceWriteOptions.TABLE_NAME().key(), "test_table");
        hashMap.put("hoodie.table.name", "test_table");
        hashMap.put(DataSourceWriteOptions.TABLE_TYPE().key(), str);
        hashMap.put(DataSourceWriteOptions.RECORDKEY_FIELD().key(), "_row_key");
        hashMap.put(DataSourceWriteOptions.PRECOMBINE_FIELD().key(), "timestamp");
        hashMap.put(DataSourceWriteOptions.PARTITIONPATH_FIELD().key(), "partition_path");
        hashMap.put(HoodieMetadataConfig.ENABLE_METADATA_INDEX_COLUMN_STATS.key(), "true");
        makeInsertDf("000", 5).write().format("hudi").options(hashMap).option(DataSourceWriteOptions.OPERATION().key(), WriteOperationType.BULK_INSERT.value()).mode(SaveMode.Overwrite).save(this.basePath);
        validateColumnStats();
        makeUpdateDf("001", 5).write().format("hudi").options(hashMap).option(DataSourceWriteOptions.OPERATION().key(), WriteOperationType.UPSERT.value()).mode(SaveMode.Append).save(this.basePath);
        validateColumnStats();
    }

    @ValueSource(strings = {"MERGE_ON_READ", "COPY_ON_WRITE"})
    @ParameterizedTest
    public void testPartitionStatsValidation(String str) {
        HashMap hashMap = new HashMap();
        hashMap.put(DataSourceWriteOptions.TABLE_NAME().key(), "test_table");
        hashMap.put("hoodie.table.name", "test_table");
        hashMap.put(DataSourceWriteOptions.TABLE_TYPE().key(), str);
        hashMap.put(DataSourceWriteOptions.RECORDKEY_FIELD().key(), "_row_key");
        hashMap.put(DataSourceWriteOptions.PRECOMBINE_FIELD().key(), "timestamp");
        hashMap.put(DataSourceWriteOptions.PARTITIONPATH_FIELD().key(), "partition_path");
        hashMap.put(HoodieMetadataConfig.ENABLE_METADATA_INDEX_COLUMN_STATS.key(), "true");
        makeInsertDf("000", 5).write().format("hudi").options(hashMap).option(DataSourceWriteOptions.OPERATION().key(), WriteOperationType.BULK_INSERT.value()).option(HoodieMetadataConfig.ENABLE_METADATA_INDEX_PARTITION_STATS.key(), "true").mode(SaveMode.Overwrite).save(this.basePath);
        validatePartitionStats();
        makeUpdateDf("001", 5).write().format("hudi").options(hashMap).option(DataSourceWriteOptions.OPERATION().key(), WriteOperationType.UPSERT.value()).option(HoodieMetadataConfig.ENABLE_METADATA_INDEX_PARTITION_STATS.key(), "true").mode(SaveMode.Append).save(this.basePath);
        validatePartitionStats();
    }

    private void validateColumnStats() {
        HoodieMetadataTableValidator.Config config = new HoodieMetadataTableValidator.Config();
        config.basePath = this.basePath;
        config.validateLatestFileSlices = false;
        config.validateAllFileGroups = false;
        config.validateAllColumnStats = true;
        HoodieMetadataTableValidator hoodieMetadataTableValidator = new HoodieMetadataTableValidator(this.jsc, config);
        Assertions.assertTrue(hoodieMetadataTableValidator.run());
        Assertions.assertFalse(hoodieMetadataTableValidator.hasValidationFailure());
        Assertions.assertTrue(hoodieMetadataTableValidator.getThrowables().isEmpty());
    }

    private void validatePartitionStats() {
        HoodieMetadataTableValidator.Config config = new HoodieMetadataTableValidator.Config();
        config.basePath = this.basePath;
        config.validateLatestFileSlices = false;
        config.validateAllFileGroups = false;
        config.validatePartitionStats = true;
        HoodieMetadataTableValidator hoodieMetadataTableValidator = new HoodieMetadataTableValidator(this.jsc, config);
        Assertions.assertTrue(hoodieMetadataTableValidator.run());
        Assertions.assertFalse(hoodieMetadataTableValidator.hasValidationFailure());
        Assertions.assertTrue(hoodieMetadataTableValidator.getThrowables().isEmpty());
    }

    private void validateSecondaryIndex() {
        HoodieMetadataTableValidator.Config config = new HoodieMetadataTableValidator.Config();
        config.basePath = this.basePath;
        config.validateLatestFileSlices = false;
        config.validateAllFileGroups = false;
        config.validateSecondaryIndex = true;
        HoodieMetadataTableValidator hoodieMetadataTableValidator = new HoodieMetadataTableValidator(this.jsc, config);
        Assertions.assertTrue(hoodieMetadataTableValidator.run());
        Assertions.assertFalse(hoodieMetadataTableValidator.hasValidationFailure());
        Assertions.assertTrue(hoodieMetadataTableValidator.getThrowables().isEmpty());
    }

    @ValueSource(booleans = {true, false})
    @ParameterizedTest
    public void testAdditionalPartitionsinMDT(boolean z) throws IOException, InterruptedException {
        HashMap hashMap = new HashMap();
        hashMap.put(DataSourceWriteOptions.TABLE_NAME().key(), "test_table");
        hashMap.put(DataSourceWriteOptions.TABLE_TYPE().key(), "MERGE_ON_READ");
        hashMap.put(DataSourceWriteOptions.RECORDKEY_FIELD().key(), "_row_key");
        hashMap.put(DataSourceWriteOptions.PRECOMBINE_FIELD().key(), "timestamp");
        hashMap.put(DataSourceWriteOptions.PARTITIONPATH_FIELD().key(), "partition_path");
        makeInsertDf("000", 5).cache().write().format("hudi").options(hashMap).option(DataSourceWriteOptions.OPERATION().key(), WriteOperationType.BULK_INSERT.value()).mode(SaveMode.Overwrite).save(this.basePath);
        HoodieMetadataTableValidator.Config config = new HoodieMetadataTableValidator.Config();
        config.basePath = this.basePath;
        config.validateLatestFileSlices = true;
        config.validateAllFileGroups = true;
        MockHoodieMetadataTableValidator mockHoodieMetadataTableValidator = new MockHoodieMetadataTableValidator(this.jsc, config);
        HoodieSparkEngineContext hoodieSparkEngineContext = new HoodieSparkEngineContext(this.jsc);
        HoodieTableMetaClient hoodieTableMetaClient = (HoodieTableMetaClient) Mockito.mock(HoodieTableMetaClient.class);
        HoodieStorage hoodieStorage = (HoodieStorage) Mockito.mock(HoodieHadoopStorage.class);
        Mockito.when(hoodieTableMetaClient.getStorage()).thenReturn(hoodieStorage);
        Mockito.when(Boolean.valueOf(hoodieStorage.exists(new StoragePath(this.basePath + "/PARTITION1")))).thenReturn(true);
        Mockito.when(Boolean.valueOf(hoodieStorage.exists(new StoragePath(this.basePath + "/PARTITION2")))).thenReturn(true);
        Mockito.when(Boolean.valueOf(hoodieStorage.exists(new StoragePath(this.basePath + "/PARTITION3")))).thenReturn(true);
        List<String> asList = Arrays.asList("PARTITION1", "PARTITION2", "PARTITION3");
        mockHoodieMetadataTableValidator.setMetadataPartitionsToReturn(asList);
        mockHoodieMetadataTableValidator.setFsPartitionsToReturn(Arrays.asList("PARTITION1", "PARTITION2"));
        HoodieTimeline hoodieTimeline = (HoodieTimeline) Mockito.mock(HoodieTimeline.class);
        HoodieTimeline hoodieTimeline2 = (HoodieTimeline) Mockito.mock(HoodieTimeline.class);
        Mockito.when(hoodieTableMetaClient.getCommitsTimeline()).thenReturn(hoodieTimeline);
        Mockito.when(hoodieTimeline.filterCompletedInstants()).thenReturn(hoodieTimeline2);
        TimeGenerator timeGenerator = TimeGenerators.getTimeGenerator(HoodieTimeGeneratorConfig.defaultConfig(this.basePath), HadoopFSUtils.getStorageConf(this.jsc.hadoopConfiguration()));
        StoragePath storagePath = new StoragePath(this.basePath);
        if (!z) {
            Mockito.when(hoodieTimeline2.lastInstant()).thenReturn(Option.of(HoodieTestUtils.INSTANT_GENERATOR.createNewInstant(HoodieInstant.State.COMPLETED, "commit", TimelineUtils.generateInstantTime(true, timeGenerator))));
            Thread.sleep(100L);
            mockHoodieMetadataTableValidator.setPartitionCreationTime(Option.of(TimelineUtils.generateInstantTime(true, timeGenerator)));
            Assertions.assertEquals(asList, mockHoodieMetadataTableValidator.validatePartitions(hoodieSparkEngineContext, storagePath, hoodieTableMetaClient));
            return;
        }
        String generateInstantTime = TimelineUtils.generateInstantTime(true, timeGenerator);
        Thread.sleep(100L);
        Mockito.when(hoodieTimeline2.lastInstant()).thenReturn(Option.of(HoodieTestUtils.INSTANT_GENERATOR.createNewInstant(HoodieInstant.State.COMPLETED, "commit", TimelineUtils.generateInstantTime(true, timeGenerator))));
        mockHoodieMetadataTableValidator.setPartitionCreationTime(Option.of(generateInstantTime));
        Assertions.assertThrows(HoodieValidationException.class, () -> {
            mockHoodieMetadataTableValidator.validatePartitions(hoodieSparkEngineContext, storagePath, hoodieTableMetaClient);
        });
    }

    @MethodSource({"lastNFileSlicesTestArgs"})
    @ParameterizedTest
    public void testAdditionalFilesInMetadata(Integer num, boolean z) throws IOException {
        HashMap hashMap = new HashMap();
        hashMap.put(DataSourceWriteOptions.TABLE_NAME().key(), "test_table");
        hashMap.put(DataSourceWriteOptions.TABLE_TYPE().key(), "MERGE_ON_READ");
        hashMap.put(DataSourceWriteOptions.RECORDKEY_FIELD().key(), "_row_key");
        hashMap.put(DataSourceWriteOptions.PRECOMBINE_FIELD().key(), "timestamp");
        hashMap.put(HoodieCompactionConfig.INLINE_COMPACT_NUM_DELTA_COMMITS.key(), "2");
        Dataset cache = makeInsertDf("000", 10).cache();
        cache.write().format("hudi").options(hashMap).mode(SaveMode.Overwrite).save(this.basePath);
        for (int i = 0; i < 6; i++) {
            cache.write().format("hudi").options(hashMap).mode(SaveMode.Append).save(this.basePath);
        }
        HoodieMetadataTableValidator.Config config = new HoodieMetadataTableValidator.Config();
        config.basePath = "file:" + this.basePath;
        config.validateLatestFileSlices = true;
        config.validateAllFileGroups = true;
        config.ignoreFailed = z;
        HoodieMetadataTableValidator hoodieMetadataTableValidator = new HoodieMetadataTableValidator(this.jsc, config);
        HoodieSparkEngineContext hoodieSparkEngineContext = new HoodieSparkEngineContext(this.jsc);
        hoodieMetadataTableValidator.run();
        HoodieTableMetaClient build = HoodieTableMetaClient.builder().setBasePath(this.basePath).setConf(hoodieSparkEngineContext.getStorageConf()).build();
        java.nio.file.Path resolve = this.tempDir.resolve("temp_folder");
        Files.createDirectories(resolve, new FileAttribute[0]);
        Path path = new Path(resolve.toAbsolutePath().toString());
        HoodieLogFile hoodieLogFile = (HoodieLogFile) ((List) ((FileSlice) ((List) new HoodieTableFileSystemView(build, build.getActiveTimeline().filterCompletedAndCompactionInstants(), false).getLatestFileSlices("").filter(fileSlice -> {
            return fileSlice.getLogFiles().count() > 0;
        }).collect(Collectors.toList())).get(0)).getLogFiles().collect(Collectors.toList())).get(0);
        FileSystem fs = HadoopFSUtils.getFs(new Path(hoodieLogFile.getPath().toString()), new Configuration(false));
        fs.moveFromLocalFile(new Path(hoodieLogFile.getPath().toString()), path);
        HoodieMetadataTableValidator.Config config2 = new HoodieMetadataTableValidator.Config();
        config2.basePath = "file:" + this.basePath;
        config2.validateLatestFileSlices = true;
        config2.ignoreFailed = z;
        HoodieMetadataTableValidator hoodieMetadataTableValidator2 = new HoodieMetadataTableValidator(this.jsc, config2);
        if (z) {
            hoodieMetadataTableValidator2.run();
            Assertions.assertTrue(hoodieMetadataTableValidator2.hasValidationFailure());
            Assertions.assertTrue(hoodieMetadataTableValidator2.getThrowables().get(0) instanceof HoodieValidationException);
        } else {
            hoodieMetadataTableValidator2.getClass();
            Assertions.assertThrows(HoodieValidationException.class, hoodieMetadataTableValidator2::run);
        }
        fs.moveFromLocalFile(new Path(path + "/" + hoodieLogFile.getFileName()), new Path(this.basePath));
        HoodieMetadataTableValidator.Config config3 = new HoodieMetadataTableValidator.Config();
        config3.basePath = "file:" + this.basePath;
        config3.validateLatestFileSlices = true;
        config3.ignoreFailed = z;
        new HoodieMetadataTableValidator(this.jsc, config3).run();
        List list = (List) ((HoodieFileGroup) ((List) new HoodieTableFileSystemView(build, build.getActiveTimeline().filterCompletedAndCompactionInstants(), false).getAllFileGroups("").collect(Collectors.toList())).get(0)).getAllFileSlices().collect(Collectors.toList());
        fs.delete(new Path(((HoodieLogFile) ((List) ((FileSlice) list.get(list.size() - 1)).getLogFiles().collect(Collectors.toList())).get(0)).getPath().toString()));
        HoodieMetadataTableValidator.Config config4 = new HoodieMetadataTableValidator.Config();
        config4.basePath = "file:" + this.basePath;
        config4.validateLatestFileSlices = true;
        config4.validateAllFileGroups = true;
        config4.ignoreFailed = z;
        HoodieMetadataTableValidator hoodieMetadataTableValidator3 = new HoodieMetadataTableValidator(this.jsc, config4);
        if (z) {
            hoodieMetadataTableValidator3.run();
            Assertions.assertTrue(hoodieMetadataTableValidator3.hasValidationFailure());
            Assertions.assertTrue(hoodieMetadataTableValidator3.getThrowables().get(0) instanceof HoodieValidationException);
        } else {
            hoodieMetadataTableValidator3.getClass();
            Assertions.assertThrows(HoodieValidationException.class, hoodieMetadataTableValidator3::run);
        }
        HoodieMetadataTableValidator.Config config5 = new HoodieMetadataTableValidator.Config();
        config5.basePath = "file:" + this.basePath;
        config5.validateLatestFileSlices = true;
        config5.validateAllFileGroups = true;
        if (num.intValue() != -1) {
            config5.validateLastNFileSlices = num;
        }
        config5.ignoreFailed = z;
        HoodieMetadataTableValidator hoodieMetadataTableValidator4 = new HoodieMetadataTableValidator(this.jsc, config5);
        if (num.intValue() != -1 && num.intValue() < 4) {
            hoodieMetadataTableValidator4.run();
            Assertions.assertFalse(hoodieMetadataTableValidator4.hasValidationFailure());
        } else if (!z) {
            hoodieMetadataTableValidator4.getClass();
            Assertions.assertThrows(HoodieValidationException.class, hoodieMetadataTableValidator4::run);
        } else {
            hoodieMetadataTableValidator4.run();
            Assertions.assertTrue(hoodieMetadataTableValidator4.hasValidationFailure());
            Assertions.assertTrue(hoodieMetadataTableValidator4.getThrowables().get(0) instanceof HoodieValidationException);
        }
    }

    @ValueSource(booleans = {true, false})
    @ParameterizedTest
    public void testAdditionalPartitionsinMdtEndToEnd(boolean z) throws IOException {
        HashMap hashMap = new HashMap();
        hashMap.put(DataSourceWriteOptions.TABLE_NAME().key(), "test_table");
        hashMap.put("hoodie.table.name", "test_table");
        hashMap.put(DataSourceWriteOptions.TABLE_TYPE().key(), "MERGE_ON_READ");
        hashMap.put(DataSourceWriteOptions.RECORDKEY_FIELD().key(), "_row_key");
        hashMap.put(DataSourceWriteOptions.PRECOMBINE_FIELD().key(), "timestamp");
        hashMap.put(DataSourceWriteOptions.PARTITIONPATH_FIELD().key(), "partition_path");
        hashMap.put(HoodieCompactionConfig.INLINE_COMPACT_NUM_DELTA_COMMITS.key(), "2");
        Dataset cache = makeInsertDf("000", 100).cache();
        cache.write().format("hudi").options(hashMap).mode(SaveMode.Overwrite).save(this.basePath);
        for (int i = 0; i < 6; i++) {
            cache.write().format("hudi").options(hashMap).mode(SaveMode.Append).save(this.basePath);
        }
        HoodieMetadataTableValidator.Config config = new HoodieMetadataTableValidator.Config();
        config.basePath = "file:" + this.basePath;
        config.validateLatestFileSlices = true;
        config.validateAllFileGroups = true;
        config.ignoreFailed = z;
        HoodieMetadataTableValidator hoodieMetadataTableValidator = new HoodieMetadataTableValidator(this.jsc, config);
        new HoodieSparkEngineContext(this.jsc);
        hoodieMetadataTableValidator.run();
        Assertions.assertFalse(hoodieMetadataTableValidator.hasValidationFailure());
        HadoopFSUtils.getFs(this.basePath, new Configuration(false)).delete(new Path(this.basePath + "/2015/03/16"));
        HoodieMetadataTableValidator.Config config2 = new HoodieMetadataTableValidator.Config();
        config2.basePath = "file:" + this.basePath;
        config2.validateLatestFileSlices = true;
        config2.ignoreFailed = z;
        HoodieMetadataTableValidator hoodieMetadataTableValidator2 = new HoodieMetadataTableValidator(this.jsc, config2);
        if (!z) {
            hoodieMetadataTableValidator2.getClass();
            Assertions.assertThrows(HoodieValidationException.class, hoodieMetadataTableValidator2::run);
        } else {
            hoodieMetadataTableValidator2.run();
            Assertions.assertTrue(hoodieMetadataTableValidator2.hasValidationFailure());
            Assertions.assertTrue(hoodieMetadataTableValidator2.getThrowables().get(0) instanceof HoodieValidationException);
        }
    }

    @ValueSource(booleans = {true, false})
    @ParameterizedTest
    public void testRecordIndexMismatch(boolean z) throws IOException {
        HashMap hashMap = new HashMap();
        hashMap.put(DataSourceWriteOptions.TABLE_NAME().key(), "test_table");
        hashMap.put("hoodie.table.name", "test_table");
        hashMap.put(DataSourceWriteOptions.TABLE_TYPE().key(), "COPY_ON_WRITE");
        hashMap.put(DataSourceWriteOptions.RECORDKEY_FIELD().key(), "_row_key");
        hashMap.put(DataSourceWriteOptions.PRECOMBINE_FIELD().key(), "timestamp");
        hashMap.put(DataSourceWriteOptions.OPERATION().key(), "bulk_insert");
        hashMap.put(HoodieMetadataConfig.RECORD_INDEX_ENABLE_PROP.key(), "true");
        makeInsertDf("000", 50).cache().write().format("hudi").options(hashMap).mode(SaveMode.Overwrite).save(this.basePath);
        for (int i = 0; i < 6; i++) {
            makeInsertDf("000", Integer.valueOf((i + 1) * 100)).write().format("hudi").options(hashMap).mode(SaveMode.Append).save(this.basePath);
        }
        HoodieMetadataTableValidator.Config config = new HoodieMetadataTableValidator.Config();
        config.basePath = "file:" + this.basePath;
        config.validateLatestFileSlices = true;
        config.ignoreFailed = z;
        HoodieMetadataTableValidator hoodieMetadataTableValidator = new HoodieMetadataTableValidator(this.jsc, config);
        HoodieTableMetaClient build = HoodieTableMetaClient.builder().setBasePath(this.basePath).setConf(new HoodieSparkEngineContext(this.jsc).getStorageConf()).build();
        hoodieMetadataTableValidator.run();
        Assertions.assertFalse(hoodieMetadataTableValidator.hasValidationFailure());
        List list = (List) new HoodieTableFileSystemView(build, build.getActiveTimeline().filterCompletedAndCompactionInstants(), false).getLatestBaseFiles("").collect(Collectors.toList());
        HadoopFSUtils.getFs(this.basePath, new Configuration(false)).copyFromLocalFile(new Path(((HoodieBaseFile) list.get(0)).getStoragePath().toString()), new Path(((HoodieBaseFile) list.get(1)).getStoragePath().toString()));
        HoodieMetadataTableValidator.Config config2 = new HoodieMetadataTableValidator.Config();
        config2.basePath = "file:" + this.basePath;
        config2.validateLatestFileSlices = true;
        config2.validateRecordIndexContent = true;
        config2.ignoreFailed = z;
        HoodieMetadataTableValidator hoodieMetadataTableValidator2 = new HoodieMetadataTableValidator(this.jsc, config2);
        if (!z) {
            hoodieMetadataTableValidator2.getClass();
            Assertions.assertThrows(HoodieValidationException.class, hoodieMetadataTableValidator2::run);
        } else {
            hoodieMetadataTableValidator2.run();
            Assertions.assertTrue(hoodieMetadataTableValidator2.hasValidationFailure());
            Assertions.assertTrue(hoodieMetadataTableValidator2.getThrowables().get(0) instanceof HoodieValidationException);
        }
    }

    @Test
    public void testRliValidationFalsePositiveCase() throws IOException {
        HashMap hashMap = new HashMap();
        hashMap.put(DataSourceWriteOptions.TABLE_NAME().key(), "test_table");
        hashMap.put(DataSourceWriteOptions.TABLE_TYPE().key(), "MERGE_ON_READ");
        hashMap.put(DataSourceWriteOptions.RECORDKEY_FIELD().key(), "_row_key");
        hashMap.put(DataSourceWriteOptions.PRECOMBINE_FIELD().key(), "timestamp");
        hashMap.put(DataSourceWriteOptions.PARTITIONPATH_FIELD().key(), "partition_path");
        makeInsertDf("000", 5).cache().write().format("hudi").options(hashMap).option(DataSourceWriteOptions.OPERATION().key(), WriteOperationType.BULK_INSERT.value()).option(HoodieMetadataConfig.RECORD_INDEX_ENABLE_PROP.key(), "true").option(HoodieMetadataConfig.RECORD_INDEX_MIN_FILE_GROUP_COUNT_PROP.key(), "1").option(HoodieMetadataConfig.RECORD_INDEX_MAX_FILE_GROUP_COUNT_PROP.key(), "1").mode(SaveMode.Overwrite).save(this.basePath);
        makeUpdateDf("001", 5).cache().write().format("hudi").options(hashMap).option(DataSourceWriteOptions.OPERATION().key(), WriteOperationType.UPSERT.value()).option(HoodieMetadataConfig.RECORD_INDEX_ENABLE_PROP.key(), "true").option(HoodieMetadataConfig.RECORD_INDEX_MIN_FILE_GROUP_COUNT_PROP.key(), "1").option(HoodieMetadataConfig.RECORD_INDEX_MAX_FILE_GROUP_COUNT_PROP.key(), "1").mode(SaveMode.Append).save(this.basePath);
        makeInsertDf("002", 5).cache().write().format("hudi").options(hashMap).option(DataSourceWriteOptions.OPERATION().key(), WriteOperationType.BULK_INSERT.value()).option(HoodieMetadataConfig.RECORD_INDEX_ENABLE_PROP.key(), "true").option(HoodieMetadataConfig.RECORD_INDEX_MIN_FILE_GROUP_COUNT_PROP.key(), "1").option(HoodieMetadataConfig.RECORD_INDEX_MAX_FILE_GROUP_COUNT_PROP.key(), "1").mode(SaveMode.Append).save(this.basePath);
        HoodieMetadataTableValidator.Config config = new HoodieMetadataTableValidator.Config();
        config.basePath = "file://" + this.basePath;
        config.validateLatestFileSlices = true;
        config.validateAllFileGroups = true;
        HoodieInstant hoodieInstant = (HoodieInstant) HoodieTableMetaClient.builder().setBasePath(this.basePath).setConf(HadoopFSUtils.getStorageConfWithCopy(this.jsc.hadoopConfiguration())).build().getActiveTimeline().filterCompletedInstants().lastInstant().get();
        String str = this.basePath + "/.hoodie/timeline/" + HoodieTestUtils.INSTANT_FILE_NAME_GENERATOR.getFileName(hoodieInstant);
        String str2 = getTempLocation() + "/" + HoodieTestUtils.INSTANT_FILE_NAME_GENERATOR.getFileName(hoodieInstant);
        FileUtil.move(str, str2);
        MockHoodieMetadataTableValidatorForRli mockHoodieMetadataTableValidatorForRli = new MockHoodieMetadataTableValidatorForRli(this.jsc, config);
        mockHoodieMetadataTableValidatorForRli.setOriginalFilePath(str);
        mockHoodieMetadataTableValidatorForRli.setDestFilePath(str2);
        Assertions.assertTrue(mockHoodieMetadataTableValidatorForRli.run());
        Assertions.assertFalse(mockHoodieMetadataTableValidatorForRli.hasValidationFailure());
        Assertions.assertTrue(mockHoodieMetadataTableValidatorForRli.getThrowables().isEmpty());
    }

    private String getTempLocation() {
        try {
            java.nio.file.Path resolve = this.tempDir.resolve("temp_location");
            Files.createDirectories(resolve, new FileAttribute[0]);
            return resolve.toAbsolutePath().toString();
        } catch (IOException e) {
            throw new HoodieIOException(e.getMessage(), e);
        }
    }

    protected Dataset<Row> makeInsertDf(String str, Integer num) {
        return this.sparkSession.read().json(this.jsc.parallelize((List) this.dataGen.generateInserts(str, num).stream().map(hoodieRecord -> {
            return (String) RawTripTestPayload.recordToString(hoodieRecord).get();
        }).collect(Collectors.toList())));
    }

    protected Dataset<Row> makeUpdateDf(String str, Integer num) {
        try {
            return this.sparkSession.read().json(this.jsc.parallelize((List) this.dataGen.generateUpdates(str, num).stream().map(hoodieRecord -> {
                return (String) RawTripTestPayload.recordToString(hoodieRecord).get();
            }).collect(Collectors.toList())));
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }
}
