package org.apache.hudi.utilities;

import java.io.IOException;
import java.io.Serializable;
import java.lang.invoke.SerializedLambda;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Objects;
import java.util.Set;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import jline.internal.Log;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hudi.async.HoodieAsyncService;
import org.apache.hudi.client.common.HoodieSparkEngineContext;
import org.apache.hudi.com.beust.jcommander.JCommander;
import org.apache.hudi.com.beust.jcommander.Parameter;
import org.apache.hudi.common.bloom.BloomFilter;
import org.apache.hudi.common.config.HoodieMetadataConfig;
import org.apache.hudi.common.config.TypedProperties;
import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.common.fs.FSUtils;
import org.apache.hudi.common.model.FileSlice;
import org.apache.hudi.common.model.HoodieBaseFile;
import org.apache.hudi.common.model.HoodieColumnRangeMetadata;
import org.apache.hudi.common.model.HoodieFileFormat;
import org.apache.hudi.common.model.HoodieFileGroup;
import org.apache.hudi.common.model.HoodieLogFile;
import org.apache.hudi.common.model.HoodiePartitionMetadata;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.TableSchemaResolver;
import org.apache.hudi.common.table.log.HoodieLogFormat;
import org.apache.hudi.common.table.log.block.HoodieLogBlock;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.table.timeline.HoodieTimeline;
import org.apache.hudi.common.table.view.FileSystemViewManager;
import org.apache.hudi.common.table.view.FileSystemViewStorageConfig;
import org.apache.hudi.common.table.view.HoodieTableFileSystemView;
import org.apache.hudi.common.util.CleanerUtils;
import org.apache.hudi.common.util.FileIOUtils;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.ParquetUtils;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.exception.HoodieException;
import org.apache.hudi.exception.HoodieIOException;
import org.apache.hudi.exception.HoodieValidationException;
import org.apache.hudi.exception.TableNotFoundException;
import org.apache.hudi.io.storage.HoodieFileReaderFactory;
import org.apache.hudi.metadata.HoodieTableMetadata;
import org.apache.hudi.metadata.HoodieTableMetadataUtil;
import org.apache.hudi.utilities.util.BloomFilterData;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.apache.parquet.avro.AvroSchemaConverter;
import org.apache.parquet.schema.MessageType;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext;

/* loaded from: input_file:org/apache/hudi/utilities/HoodieMetadataTableValidator.class */
public class HoodieMetadataTableValidator implements Serializable {
    private static final Logger LOG = LogManager.getLogger(HoodieMetadataTableValidator.class);
    private transient JavaSparkContext jsc;
    private Config cfg;
    private TypedProperties props;
    private HoodieTableMetaClient metaClient;
    protected transient Option<AsyncMetadataTableValidateService> asyncMetadataTableValidateService;
    private final String taskLabels;

    /* loaded from: input_file:org/apache/hudi/utilities/HoodieMetadataTableValidator$AsyncMetadataTableValidateService.class */
    public class AsyncMetadataTableValidateService extends HoodieAsyncService {
        private final transient ExecutorService executor = Executors.newSingleThreadExecutor();

        public AsyncMetadataTableValidateService() {
        }

        protected Pair<CompletableFuture, ExecutorService> startService() {
            return Pair.of(CompletableFuture.supplyAsync(() -> {
                while (true) {
                    try {
                        long currentTimeMillis = System.currentTimeMillis();
                        HoodieMetadataTableValidator.this.doMetadataTableValidation();
                        long intValue = (HoodieMetadataTableValidator.this.cfg.minValidateIntervalSeconds.intValue() * 1000) - (System.currentTimeMillis() - currentTimeMillis);
                        if (intValue > 0) {
                            HoodieMetadataTableValidator.LOG.info("Last validate ran less than min validate interval: " + HoodieMetadataTableValidator.this.cfg.minValidateIntervalSeconds + " s, sleep: " + intValue + " ms.");
                            Thread.sleep(intValue);
                        }
                    } catch (InterruptedException e) {
                    } catch (HoodieValidationException e2) {
                        HoodieMetadataTableValidator.LOG.error("Shutting down AsyncMetadataTableValidateService due to HoodieValidationException", e2);
                        if (!HoodieMetadataTableValidator.this.cfg.ignoreFailed) {
                            throw e2;
                        }
                    }
                }
            }, this.executor), this.executor);
        }
    }

    /* loaded from: input_file:org/apache/hudi/utilities/HoodieMetadataTableValidator$Config.class */
    public static class Config implements Serializable {

        @Parameter(names = {"--base-path", "-sp"}, description = "Base path for the table", required = true)
        public String basePath = null;

        @Parameter(names = {"--continuous"}, description = "Running MetadataTableValidator in continuous. Can use --min-validate-interval-seconds to control validation frequency", required = false)
        public boolean continuous = false;

        @Parameter(names = {"--skip-data-files-for-cleaning"}, description = "Skip to compare the data files which are under deletion by cleaner", required = false)
        public boolean skipDataFilesForCleaning = false;

        @Parameter(names = {"--validate-latest-file-slices"}, description = "Validate latest file slices for all partitions.", required = false)
        public boolean validateLatestFileSlices = false;

        @Parameter(names = {"--validate-latest-base-files"}, description = "Validate latest base files for all partitions.", required = false)
        public boolean validateLatestBaseFiles = false;

        @Parameter(names = {"--validate-all-file-groups"}, description = "Validate all file groups, and all file slices within file groups.", required = false)
        public boolean validateAllFileGroups = false;

        @Parameter(names = {"--validate-all-column-stats"}, description = "Validate column stats for all columns in the schema", required = false)
        public boolean validateAllColumnStats = false;

        @Parameter(names = {"--validate-bloom-filters"}, description = "Validate bloom filters of base files", required = false)
        public boolean validateBloomFilters = false;

        @Parameter(names = {"--min-validate-interval-seconds"}, description = "the min validate interval of each validate when set --continuous, default is 10 minutes.")
        public Integer minValidateIntervalSeconds = 600;

        @Parameter(names = {"--parallelism", "-pl"}, description = "Parallelism for valuation", required = false)
        public int parallelism = 200;

        @Parameter(names = {"--ignore-failed", "-ig"}, description = "Ignore metadata validate failure and continue.", required = false)
        public boolean ignoreFailed = false;

        @Parameter(names = {"--spark-master", "-ms"}, description = "Spark master", required = false)
        public String sparkMaster = null;

        @Parameter(names = {"--spark-memory", "-sm"}, description = "spark memory to use", required = false)
        public String sparkMemory = "1g";

        @Parameter(names = {"--assume-date-partitioning"}, description = "Should HoodieWriteClient assume the data is partitioned by dates, i.e three levels from base path.This is a stop-gap to support tables created by versions < 0.3.1. Will be removed eventually", required = false)
        public Boolean assumeDatePartitioning = false;

        @Parameter(names = {"--props"}, description = "path to properties file on localfs or dfs, with configurations for hoodie client")
        public String propsFilePath = null;

        @Parameter(names = {"--hoodie-conf"}, description = "Any configuration that can be set in the properties file (using the CLI parameter \"--props\") can also be passed command line using this parameter. This can be repeated", splitter = IdentitySplitter.class)
        public List<String> configs = new ArrayList();

        @Parameter(names = {"--help", "-h"}, help = true)
        public Boolean help = false;

        public String toString() {
            return "MetadataTableValidatorConfig {\n   --base-path " + this.basePath + ", \n   --validate-latest-file-slices " + this.validateLatestFileSlices + ", \n   --validate-latest-base-files " + this.validateLatestBaseFiles + ", \n   --validate-all-file-groups " + this.validateAllFileGroups + ", \n   --validate-all-column-stats " + this.validateAllColumnStats + ", \n   --validate-bloom-filters " + this.validateBloomFilters + ", \n   --continuous " + this.continuous + ", \n   --skip-data-files-for-cleaning " + this.skipDataFilesForCleaning + ", \n   --ignore-failed " + this.ignoreFailed + ", \n   --min-validate-interval-seconds " + this.minValidateIntervalSeconds + ", \n   --parallelism " + this.parallelism + ", \n   --spark-master " + this.sparkMaster + ", \n   --spark-memory " + this.sparkMemory + ", \n   --assumeDatePartitioning-memory " + this.assumeDatePartitioning + ", \n   --props " + this.propsFilePath + ", \n   --hoodie-conf " + this.configs + "\n}";
        }

        public boolean equals(Object obj) {
            if (this == obj) {
                return true;
            }
            if (obj == null || getClass() != obj.getClass()) {
                return false;
            }
            Config config = (Config) obj;
            return this.basePath.equals(config.basePath) && Objects.equals(Boolean.valueOf(this.continuous), Boolean.valueOf(config.continuous)) && Objects.equals(Boolean.valueOf(this.skipDataFilesForCleaning), Boolean.valueOf(config.skipDataFilesForCleaning)) && Objects.equals(Boolean.valueOf(this.validateLatestFileSlices), Boolean.valueOf(config.validateLatestFileSlices)) && Objects.equals(Boolean.valueOf(this.validateLatestBaseFiles), Boolean.valueOf(config.validateLatestBaseFiles)) && Objects.equals(Boolean.valueOf(this.validateAllFileGroups), Boolean.valueOf(config.validateAllFileGroups)) && Objects.equals(Boolean.valueOf(this.validateAllColumnStats), Boolean.valueOf(config.validateAllColumnStats)) && Objects.equals(Boolean.valueOf(this.validateBloomFilters), Boolean.valueOf(config.validateBloomFilters)) && Objects.equals(this.minValidateIntervalSeconds, config.minValidateIntervalSeconds) && Objects.equals(Integer.valueOf(this.parallelism), Integer.valueOf(config.parallelism)) && Objects.equals(Boolean.valueOf(this.ignoreFailed), Boolean.valueOf(config.ignoreFailed)) && Objects.equals(this.sparkMaster, config.sparkMaster) && Objects.equals(this.sparkMemory, config.sparkMemory) && Objects.equals(this.assumeDatePartitioning, config.assumeDatePartitioning) && Objects.equals(this.propsFilePath, config.propsFilePath) && Objects.equals(this.configs, config.configs);
        }

        public int hashCode() {
            return Objects.hash(this.basePath, Boolean.valueOf(this.continuous), Boolean.valueOf(this.skipDataFilesForCleaning), Boolean.valueOf(this.validateLatestFileSlices), Boolean.valueOf(this.validateLatestBaseFiles), Boolean.valueOf(this.validateAllFileGroups), Boolean.valueOf(this.validateAllColumnStats), Boolean.valueOf(this.validateBloomFilters), this.minValidateIntervalSeconds, Integer.valueOf(this.parallelism), Boolean.valueOf(this.ignoreFailed), this.sparkMaster, this.sparkMemory, this.assumeDatePartitioning, this.propsFilePath, this.configs, this.help);
        }
    }

    /* loaded from: input_file:org/apache/hudi/utilities/HoodieMetadataTableValidator$FileSliceComparator.class */
    public static class FileSliceComparator implements Comparator<FileSlice>, Serializable {
        @Override // java.util.Comparator
        public int compare(FileSlice fileSlice, FileSlice fileSlice2) {
            return (fileSlice.getPartitionPath() + fileSlice.getFileId() + fileSlice.getBaseInstantTime()).compareTo(fileSlice2.getPartitionPath() + fileSlice2.getFileId() + fileSlice2.getBaseInstantTime());
        }
    }

    /* loaded from: input_file:org/apache/hudi/utilities/HoodieMetadataTableValidator$HoodieBaseFileComparator.class */
    public static class HoodieBaseFileComparator implements Comparator<HoodieBaseFile>, Serializable {
        @Override // java.util.Comparator
        public int compare(HoodieBaseFile hoodieBaseFile, HoodieBaseFile hoodieBaseFile2) {
            return hoodieBaseFile.getPath().compareTo(hoodieBaseFile2.getPath());
        }
    }

    /* loaded from: input_file:org/apache/hudi/utilities/HoodieMetadataTableValidator$HoodieColumnRangeMetadataComparator.class */
    public static class HoodieColumnRangeMetadataComparator implements Comparator<HoodieColumnRangeMetadata<Comparable>>, Serializable {
        @Override // java.util.Comparator
        public int compare(HoodieColumnRangeMetadata<Comparable> hoodieColumnRangeMetadata, HoodieColumnRangeMetadata<Comparable> hoodieColumnRangeMetadata2) {
            return hoodieColumnRangeMetadata.toString().compareTo(hoodieColumnRangeMetadata2.toString());
        }
    }

    /* loaded from: input_file:org/apache/hudi/utilities/HoodieMetadataTableValidator$HoodieFileGroupComparator.class */
    public static class HoodieFileGroupComparator implements Comparator<HoodieFileGroup>, Serializable {
        @Override // java.util.Comparator
        public int compare(HoodieFileGroup hoodieFileGroup, HoodieFileGroup hoodieFileGroup2) {
            return hoodieFileGroup.getFileGroupId().compareTo(hoodieFileGroup2.getFileGroupId());
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:org/apache/hudi/utilities/HoodieMetadataTableValidator$HoodieMetadataValidationContext.class */
    public static class HoodieMetadataValidationContext implements Serializable {
        private HoodieTableMetaClient metaClient;
        private HoodieTableFileSystemView fileSystemView;
        private HoodieTableMetadata tableMetadata;
        private boolean enableMetadataTable;
        private List<String> allColumnNameList;

        public HoodieMetadataValidationContext(HoodieEngineContext hoodieEngineContext, Config config, HoodieTableMetaClient hoodieTableMetaClient, boolean z) {
            this.metaClient = hoodieTableMetaClient;
            this.enableMetadataTable = z;
            HoodieMetadataConfig build = HoodieMetadataConfig.newBuilder().enable(z).withMetadataIndexBloomFilter(z).withMetadataIndexColumnStats(z).withAssumeDatePartitioning(config.assumeDatePartitioning.booleanValue()).build();
            this.fileSystemView = FileSystemViewManager.createInMemoryFileSystemView(hoodieEngineContext, hoodieTableMetaClient, build);
            this.tableMetadata = HoodieTableMetadata.create(hoodieEngineContext, build, hoodieTableMetaClient.getBasePath(), FileSystemViewStorageConfig.SPILLABLE_DIR.defaultValue());
            if (hoodieTableMetaClient.getCommitsTimeline().filterCompletedInstants().countInstants() > 0) {
                this.allColumnNameList = getAllColumnNames();
            }
        }

        public HoodieTableMetaClient getMetaClient() {
            return this.metaClient;
        }

        public List<HoodieBaseFile> getSortedLatestBaseFileList(String str) {
            return (List) this.fileSystemView.getLatestBaseFiles(str).sorted(new HoodieBaseFileComparator()).collect(Collectors.toList());
        }

        public List<FileSlice> getSortedLatestFileSliceList(String str) {
            return (List) this.fileSystemView.getLatestFileSlices(str).sorted(new FileSliceComparator()).collect(Collectors.toList());
        }

        public List<HoodieFileGroup> getSortedAllFileGroupList(String str) {
            return (List) this.fileSystemView.getAllFileGroups(str).sorted(new HoodieFileGroupComparator()).collect(Collectors.toList());
        }

        public List<HoodieColumnRangeMetadata<Comparable>> getSortedColumnStatsList(String str, List<String> list) {
            HoodieMetadataTableValidator.LOG.info("All column names for getting column stats: " + this.allColumnNameList);
            if (!this.enableMetadataTable) {
                return (List) list.stream().flatMap(str2 -> {
                    return new ParquetUtils().readRangeFromParquetMetadata(this.metaClient.getHadoopConf(), new Path(FSUtils.getPartitionPath(this.metaClient.getBasePath(), str), str2), this.allColumnNameList).stream();
                }).sorted(new HoodieColumnRangeMetadataComparator()).collect(Collectors.toList());
            }
            List list2 = (List) list.stream().map(str3 -> {
                return Pair.of(str, str3);
            }).collect(Collectors.toList());
            return (List) this.allColumnNameList.stream().flatMap(str4 -> {
                return ((List) this.tableMetadata.getColumnStats(list2, str4).values().stream().map(HoodieTableMetadataUtil::convertColumnStatsRecordToColumnRangeMetadata).collect(Collectors.toList())).stream();
            }).sorted(new HoodieColumnRangeMetadataComparator()).collect(Collectors.toList());
        }

        public List<BloomFilterData> getSortedBloomFilterList(String str, List<String> list) {
            if (!this.enableMetadataTable) {
                return (List) list.stream().map(str2 -> {
                    return readBloomFilterFromFile(str, str2);
                }).filter((v0) -> {
                    return v0.isPresent();
                }).map((v0) -> {
                    return v0.get();
                }).sorted().collect(Collectors.toList());
            }
            return (List) this.tableMetadata.getBloomFilters((List) list.stream().map(str3 -> {
                return Pair.of(str, str3);
            }).collect(Collectors.toList())).entrySet().stream().map(entry -> {
                return BloomFilterData.builder().setPartitionPath((String) ((Pair) entry.getKey()).getKey()).setFilename((String) ((Pair) entry.getKey()).getValue()).setBloomFilter(ByteBuffer.wrap(((BloomFilter) entry.getValue()).serializeToString().getBytes())).build();
            }).sorted().collect(Collectors.toList());
        }

        private List<String> getAllColumnNames() {
            try {
                return (List) new TableSchemaResolver(this.metaClient).getTableAvroSchema().getFields().stream().map(field -> {
                    return field.name();
                }).collect(Collectors.toList());
            } catch (Exception e) {
                throw new HoodieException("Failed to get all column names for " + this.metaClient.getBasePath());
            }
        }

        private Option<BloomFilterData> readBloomFilterFromFile(String str, String str2) {
            Path path = new Path(FSUtils.getPartitionPath(this.metaClient.getBasePath(), str), str2);
            try {
                BloomFilter readBloomFilter = HoodieFileReaderFactory.getFileReader(this.metaClient.getHadoopConf(), path).readBloomFilter();
                if (readBloomFilter != null) {
                    return Option.of(BloomFilterData.builder().setPartitionPath(str).setFilename(str2).setBloomFilter(ByteBuffer.wrap(readBloomFilter.serializeToString().getBytes())).build());
                }
                Log.error(new Object[]{"Failed to read bloom filter for " + path});
                return Option.empty();
            } catch (IOException e) {
                Log.error(new Object[]{"Failed to get file reader for " + path + " " + e.getMessage()});
                return Option.empty();
            }
        }
    }

    public HoodieMetadataTableValidator(HoodieTableMetaClient hoodieTableMetaClient) {
        this.metaClient = hoodieTableMetaClient;
        this.taskLabels = "";
    }

    public HoodieMetadataTableValidator(JavaSparkContext javaSparkContext, Config config) {
        this.jsc = javaSparkContext;
        this.cfg = config;
        this.props = config.propsFilePath == null ? UtilHelpers.buildProperties(config.configs) : readConfigFromFileSystem(javaSparkContext, config);
        this.metaClient = HoodieTableMetaClient.builder().setConf(javaSparkContext.hadoopConfiguration()).setBasePath(config.basePath).setLoadActiveTimelineOnLoad(true).build();
        this.asyncMetadataTableValidateService = config.continuous ? Option.of(new AsyncMetadataTableValidateService()) : Option.empty();
        this.taskLabels = generateValidationTaskLabels();
    }

    private String generateValidationTaskLabels() {
        ArrayList arrayList = new ArrayList();
        if (this.cfg.validateLatestBaseFiles) {
            arrayList.add("validate-latest-base-files");
        }
        if (this.cfg.validateLatestFileSlices) {
            arrayList.add("validate-latest-file-slices");
        }
        if (this.cfg.validateAllFileGroups) {
            arrayList.add("validate-all-file-groups");
        }
        if (this.cfg.validateAllColumnStats) {
            arrayList.add("validate-all-column-stats");
        }
        if (this.cfg.validateBloomFilters) {
            arrayList.add("validate-bloom-filters");
        }
        return String.join(",", arrayList);
    }

    private TypedProperties readConfigFromFileSystem(JavaSparkContext javaSparkContext, Config config) {
        return UtilHelpers.readConfig(javaSparkContext.hadoopConfiguration(), new Path(config.propsFilePath), config.configs).getProps(true);
    }

    public static void main(String[] strArr) {
        Config config = new Config();
        JCommander jCommander = new JCommander(config, null, strArr);
        if (config.help.booleanValue() || strArr.length == 0) {
            jCommander.usage();
            System.exit(1);
        }
        SparkConf buildSparkConf = UtilHelpers.buildSparkConf("Hoodie-Metadata-Table-Validator", config.sparkMaster);
        buildSparkConf.set("spark.executor.memory", config.sparkMemory);
        JavaSparkContext javaSparkContext = new JavaSparkContext(buildSparkConf);
        HoodieMetadataTableValidator hoodieMetadataTableValidator = new HoodieMetadataTableValidator(javaSparkContext, config);
        try {
            try {
                hoodieMetadataTableValidator.run();
                javaSparkContext.stop();
            } catch (Throwable th) {
                LOG.error("Fail to do hoodie metadata table validation for " + hoodieMetadataTableValidator.cfg, th);
                javaSparkContext.stop();
            }
        } catch (Throwable th2) {
            javaSparkContext.stop();
            throw th2;
        }
    }

    public void run() {
        try {
            try {
                LOG.info(this.cfg);
                if (this.cfg.continuous) {
                    LOG.info(" ****** do hoodie metadata table validation in CONTINUOUS mode ******");
                    doHoodieMetadataTableValidationContinuous();
                } else {
                    LOG.info(" ****** do hoodie metadata table validation once ******");
                    doHoodieMetadataTableValidationOnce();
                }
            } catch (Exception e) {
                throw new HoodieException("Unable to do hoodie metadata table validation in " + this.cfg.basePath, e);
            }
        } finally {
            if (this.asyncMetadataTableValidateService.isPresent()) {
                this.asyncMetadataTableValidateService.get().shutdown(true);
            }
        }
    }

    private void doHoodieMetadataTableValidationOnce() {
        try {
            doMetadataTableValidation();
        } catch (HoodieValidationException e) {
            LOG.error("Metadata table validation failed to HoodieValidationException", e);
            if (!this.cfg.ignoreFailed) {
                throw e;
            }
        }
    }

    private void doHoodieMetadataTableValidationContinuous() {
        this.asyncMetadataTableValidateService.ifPresent(asyncMetadataTableValidateService -> {
            asyncMetadataTableValidateService.start(null);
            try {
                asyncMetadataTableValidateService.waitForShutdown();
            } catch (Exception e) {
                throw new HoodieException(e.getMessage(), e);
            }
        });
    }

    public void doMetadataTableValidation() {
        boolean z = true;
        this.metaClient.reloadActiveTimeline();
        String basePath = this.metaClient.getBasePath();
        Set emptySet = Collections.emptySet();
        if (checkMetadataTableIsAvailable()) {
            if (this.cfg.skipDataFilesForCleaning) {
                emptySet = (Set) this.metaClient.getActiveTimeline().getCleanerTimeline().filterInflights().getInstants().flatMap(hoodieInstant -> {
                    try {
                        hoodieInstant = new HoodieInstant(HoodieInstant.State.REQUESTED, hoodieInstant.getAction(), hoodieInstant.getTimestamp());
                        return CleanerUtils.getCleanerPlan(this.metaClient, hoodieInstant).getFilePathsToBeDeletedPerPartition().values().stream().flatMap(list -> {
                            return list.stream().map(hoodieCleanFileInfo -> {
                                return new Path(hoodieCleanFileInfo.getFilePath()).getName();
                            });
                        });
                    } catch (IOException e) {
                        throw new HoodieIOException("Error reading cleaner metadata for " + hoodieInstant);
                    }
                }).filter(str -> {
                    return HoodieFileFormat.BASE_FILE_EXTENSIONS.contains(FSUtils.getFileExtension(str));
                }).collect(Collectors.toSet());
            }
            HoodieSparkEngineContext hoodieSparkEngineContext = new HoodieSparkEngineContext(this.jsc);
            List<String> validatePartitions = validatePartitions(hoodieSparkEngineContext, basePath);
            if (validatePartitions.isEmpty()) {
                LOG.warn("The result of getting all partitions is null or empty, skip current validation.");
                return;
            }
            HoodieMetadataValidationContext hoodieMetadataValidationContext = new HoodieMetadataValidationContext(hoodieSparkEngineContext, this.cfg, this.metaClient, true);
            HoodieMetadataValidationContext hoodieMetadataValidationContext2 = new HoodieMetadataValidationContext(hoodieSparkEngineContext, this.cfg, this.metaClient, false);
            Set set = emptySet;
            Iterator it = hoodieSparkEngineContext.parallelize(validatePartitions, validatePartitions.size()).map(str2 -> {
                try {
                    validateFilesInPartition(hoodieMetadataValidationContext, hoodieMetadataValidationContext2, str2, set);
                    LOG.info(String.format("Metadata table validation succeeded for partition %s (partition %s)", str2, this.taskLabels));
                    return true;
                } catch (HoodieValidationException e) {
                    LOG.error(String.format("Metadata table validation failed for partition %s due to HoodieValidationException (partition %s)", str2, this.taskLabels), e);
                    if (this.cfg.ignoreFailed) {
                        return false;
                    }
                    throw e;
                }
            }).collectAsList().iterator();
            while (it.hasNext()) {
                z &= ((Boolean) it.next()).booleanValue();
            }
            if (z) {
                LOG.info(String.format("Metadata table validation succeeded (%s).", this.taskLabels));
            } else {
                LOG.warn(String.format("Metadata table validation failed (%s).", this.taskLabels));
            }
        }
    }

    private boolean checkMetadataTableIsAvailable() {
        try {
            if (HoodieTableMetaClient.builder().setConf(this.jsc.hadoopConfiguration()).setBasePath(new Path(this.cfg.basePath, HoodieTableMetaClient.METADATA_TABLE_FOLDER_PATH).toString()).setLoadActiveTimelineOnLoad(true).build().getActiveTimeline().filterCompletedInstants().countInstants() != 0) {
                return true;
            }
            if (this.metaClient.getActiveTimeline().filterCompletedInstants().countInstants() != 0) {
                throw new HoodieValidationException("There is no completed instant for metadata table.");
            }
            LOG.info("There is no completed instant both in metadata table and corresponding data table.");
            return false;
        } catch (TableNotFoundException e) {
            LOG.warn("Metadata table is not found. Skip current validation.");
            return false;
        } catch (Exception e2) {
            LOG.warn("Metadata table is not available to read for now, ", e2);
            return false;
        }
    }

    private List<String> validatePartitions(HoodieSparkEngineContext hoodieSparkEngineContext, String str) {
        List<String> allPartitionPaths = FSUtils.getAllPartitionPaths(hoodieSparkEngineContext, str, false, this.cfg.assumeDatePartitioning.booleanValue());
        HoodieTimeline filterCompletedInstants = this.metaClient.getActiveTimeline().filterCompletedInstants();
        List list = (List) ((Stream) allPartitionPaths.stream().parallel()).filter(str2 -> {
            Option<String> readPartitionCreatedCommitTime = new HoodiePartitionMetadata(this.metaClient.getFs(), FSUtils.getPartitionPath(str, str2)).readPartitionCreatedCommitTime();
            if (readPartitionCreatedCommitTime.isPresent()) {
                return filterCompletedInstants.containsOrBeforeTimelineStarts(readPartitionCreatedCommitTime.get());
            }
            return false;
        }).collect(Collectors.toList());
        List<String> allPartitionPaths2 = FSUtils.getAllPartitionPaths(hoodieSparkEngineContext, str, true, this.cfg.assumeDatePartitioning.booleanValue());
        Collections.sort(list);
        Collections.sort(allPartitionPaths2);
        if (list.size() == allPartitionPaths2.size() && list.equals(allPartitionPaths2)) {
            return allPartitionPaths2;
        }
        String str3 = "Compare Partitions Failed! AllPartitionPathsFromFS : " + list + " and allPartitionPathsMeta : " + allPartitionPaths2;
        LOG.error(str3);
        throw new HoodieValidationException(str3);
    }

    private void validateFilesInPartition(HoodieMetadataValidationContext hoodieMetadataValidationContext, HoodieMetadataValidationContext hoodieMetadataValidationContext2, String str, Set<String> set) {
        if (this.cfg.validateLatestFileSlices) {
            validateLatestFileSlices(hoodieMetadataValidationContext, hoodieMetadataValidationContext2, str, set);
        }
        if (this.cfg.validateLatestBaseFiles) {
            validateLatestBaseFiles(hoodieMetadataValidationContext, hoodieMetadataValidationContext2, str, set);
        }
        if (this.cfg.validateAllFileGroups) {
            validateAllFileGroups(hoodieMetadataValidationContext, hoodieMetadataValidationContext2, str, set);
        }
        if (this.cfg.validateAllColumnStats) {
            validateAllColumnStats(hoodieMetadataValidationContext, hoodieMetadataValidationContext2, str, set);
        }
        if (this.cfg.validateBloomFilters) {
            validateBloomFilters(hoodieMetadataValidationContext, hoodieMetadataValidationContext2, str, set);
        }
    }

    private void validateAllFileGroups(HoodieMetadataValidationContext hoodieMetadataValidationContext, HoodieMetadataValidationContext hoodieMetadataValidationContext2, String str, Set<String> set) {
        List<FileSlice> list;
        List<FileSlice> list2;
        if (set.isEmpty()) {
            list = (List) hoodieMetadataValidationContext.getSortedAllFileGroupList(str).stream().flatMap((v0) -> {
                return v0.getAllFileSlices();
            }).sorted(new FileSliceComparator()).collect(Collectors.toList());
            list2 = (List) hoodieMetadataValidationContext2.getSortedAllFileGroupList(str).stream().flatMap((v0) -> {
                return v0.getAllFileSlices();
            }).sorted(new FileSliceComparator()).collect(Collectors.toList());
        } else {
            List<FileSlice> list3 = (List) hoodieMetadataValidationContext.getSortedAllFileGroupList(str).stream().flatMap((v0) -> {
                return v0.getAllFileSlices();
            }).sorted(new FileSliceComparator()).collect(Collectors.toList());
            List<FileSlice> list4 = (List) hoodieMetadataValidationContext2.getSortedAllFileGroupList(str).stream().flatMap((v0) -> {
                return v0.getAllFileSlices();
            }).sorted(new FileSliceComparator()).collect(Collectors.toList());
            list = filterFileSliceBasedOnInflightCleaning(list3, set);
            list2 = filterFileSliceBasedOnInflightCleaning(list4, set);
        }
        LOG.debug("All file slices from metadata: " + list + ". For partitions " + str);
        LOG.debug("All file slices from direct listing: " + list2 + ". For partitions " + str);
        validateFileSlices(list, list2, str, hoodieMetadataValidationContext2.getMetaClient(), "all file groups");
    }

    private void validateLatestBaseFiles(HoodieMetadataValidationContext hoodieMetadataValidationContext, HoodieMetadataValidationContext hoodieMetadataValidationContext2, String str, Set<String> set) {
        List<HoodieBaseFile> sortedLatestBaseFileList;
        List<HoodieBaseFile> sortedLatestBaseFileList2;
        if (set.isEmpty()) {
            sortedLatestBaseFileList = hoodieMetadataValidationContext.getSortedLatestBaseFileList(str);
            sortedLatestBaseFileList2 = hoodieMetadataValidationContext2.getSortedLatestBaseFileList(str);
        } else {
            sortedLatestBaseFileList = filterBaseFileBasedOnInflightCleaning(hoodieMetadataValidationContext.getSortedLatestBaseFileList(str), set);
            sortedLatestBaseFileList2 = filterBaseFileBasedOnInflightCleaning(hoodieMetadataValidationContext2.getSortedLatestBaseFileList(str), set);
        }
        LOG.debug("Latest base file from metadata: " + sortedLatestBaseFileList + ". For partitions " + str);
        LOG.debug("Latest base file from direct listing: " + sortedLatestBaseFileList2 + ". For partitions " + str);
        validate(sortedLatestBaseFileList, sortedLatestBaseFileList2, str, "latest base files");
    }

    private void validateLatestFileSlices(HoodieMetadataValidationContext hoodieMetadataValidationContext, HoodieMetadataValidationContext hoodieMetadataValidationContext2, String str, Set<String> set) {
        List<FileSlice> sortedLatestFileSliceList;
        List<FileSlice> sortedLatestFileSliceList2;
        if (set.isEmpty()) {
            sortedLatestFileSliceList = hoodieMetadataValidationContext.getSortedLatestFileSliceList(str);
            sortedLatestFileSliceList2 = hoodieMetadataValidationContext2.getSortedLatestFileSliceList(str);
        } else {
            sortedLatestFileSliceList = filterFileSliceBasedOnInflightCleaning(hoodieMetadataValidationContext.getSortedLatestFileSliceList(str), set);
            sortedLatestFileSliceList2 = filterFileSliceBasedOnInflightCleaning(hoodieMetadataValidationContext2.getSortedLatestFileSliceList(str), set);
        }
        LOG.debug("Latest file list from metadata: " + sortedLatestFileSliceList + ". For partition " + str);
        LOG.debug("Latest file list from direct listing: " + sortedLatestFileSliceList2 + ". For partition " + str);
        validateFileSlices(sortedLatestFileSliceList, sortedLatestFileSliceList2, str, hoodieMetadataValidationContext2.getMetaClient(), "latest file slices");
    }

    private List<FileSlice> filterFileSliceBasedOnInflightCleaning(List<FileSlice> list, Set<String> set) {
        return (List) list.stream().filter(fileSlice -> {
            return (fileSlice.getBaseFile().isPresent() && set.contains(fileSlice.getBaseFile().get().getFileName())) ? false : true;
        }).collect(Collectors.toList());
    }

    private List<HoodieBaseFile> filterBaseFileBasedOnInflightCleaning(List<HoodieBaseFile> list, Set<String> set) {
        return (List) list.stream().filter(hoodieBaseFile -> {
            return !set.contains(hoodieBaseFile.getFileName());
        }).collect(Collectors.toList());
    }

    private void validateAllColumnStats(HoodieMetadataValidationContext hoodieMetadataValidationContext, HoodieMetadataValidationContext hoodieMetadataValidationContext2, String str, Set<String> set) {
        List<String> latestBaseFileNames = getLatestBaseFileNames(hoodieMetadataValidationContext2, str, set);
        validate(hoodieMetadataValidationContext.getSortedColumnStatsList(str, latestBaseFileNames), hoodieMetadataValidationContext2.getSortedColumnStatsList(str, latestBaseFileNames), str, "column stats");
    }

    private void validateBloomFilters(HoodieMetadataValidationContext hoodieMetadataValidationContext, HoodieMetadataValidationContext hoodieMetadataValidationContext2, String str, Set<String> set) {
        List<String> latestBaseFileNames = getLatestBaseFileNames(hoodieMetadataValidationContext2, str, set);
        validate(hoodieMetadataValidationContext.getSortedBloomFilterList(str, latestBaseFileNames), hoodieMetadataValidationContext2.getSortedBloomFilterList(str, latestBaseFileNames), str, "bloom filters");
    }

    private List<String> getLatestBaseFileNames(HoodieMetadataValidationContext hoodieMetadataValidationContext, String str, Set<String> set) {
        return !set.isEmpty() ? (List) filterBaseFileBasedOnInflightCleaning(hoodieMetadataValidationContext.getSortedLatestBaseFileList(str), set).stream().map((v0) -> {
            return v0.getFileName();
        }).collect(Collectors.toList()) : (List) hoodieMetadataValidationContext.getSortedLatestBaseFileList(str).stream().map((v0) -> {
            return v0.getFileName();
        }).collect(Collectors.toList());
    }

    private <T> void validate(List<T> list, List<T> list2, String str, String str2) {
        if (list.size() == list2.size() && list.equals(list2)) {
            LOG.info(String.format("Validation of %s succeeded for partition %s", str2, str));
        } else {
            String format = String.format("Validation of %s for partition %s failed.\n%s from metadata: %s\n%s from file system and base files: %s", str2, str, str2, list, str2, list2);
            LOG.error(format);
            throw new HoodieValidationException(format);
        }
    }

    private void validateFileSlices(List<FileSlice> list, List<FileSlice> list2, String str, HoodieTableMetaClient hoodieTableMetaClient, String str2) {
        boolean z = false;
        if (list.size() != list2.size()) {
            z = true;
        } else if (!list.equals(list2)) {
            int i = 0;
            while (true) {
                if (i >= list.size()) {
                    break;
                }
                FileSlice fileSlice = list.get(i);
                FileSlice fileSlice2 = list2.get(i);
                if (!Objects.equals(fileSlice.getFileGroupId(), fileSlice2.getFileGroupId()) || !Objects.equals(fileSlice.getBaseInstantTime(), fileSlice2.getBaseInstantTime()) || !Objects.equals(fileSlice.getBaseFile(), fileSlice2.getBaseFile())) {
                    break;
                }
                if (!areFileSliceCommittedLogFilesMatching(fileSlice, fileSlice2, hoodieTableMetaClient)) {
                    z = true;
                    break;
                } else {
                    LOG.warn(String.format("There are uncommitted log files in the latest file slices but the committed log files match: %s %s", fileSlice, fileSlice2));
                    i++;
                }
            }
        }
        if (!z) {
            LOG.info(String.format("Validation of %s succeeded for partition %s", str2, str));
        } else {
            String format = String.format("Validation of %s for partition %s failed.\n%s from metadata: %s\n%s from file system and base files: %s", str2, str, str2, list, str2, list2);
            LOG.error(format);
            throw new HoodieValidationException(format);
        }
    }

    private boolean areFileSliceCommittedLogFilesMatching(FileSlice fileSlice, FileSlice fileSlice2, HoodieTableMetaClient hoodieTableMetaClient) {
        Set set = (Set) fileSlice.getLogFiles().map(hoodieLogFile -> {
            return hoodieLogFile.getPath().toString();
        }).collect(Collectors.toSet());
        Set set2 = (Set) fileSlice2.getLogFiles().map(hoodieLogFile2 -> {
            return hoodieLogFile2.getPath().toString();
        }).collect(Collectors.toSet());
        HashSet hashSet = new HashSet(set);
        hashSet.retainAll(set2);
        set.removeAll(hashSet);
        set2.removeAll(hashSet);
        FileSystem fs = hoodieTableMetaClient.getFs();
        HoodieTimeline commitsTimeline = hoodieTableMetaClient.getCommitsTimeline();
        if (hasCommittedLogFiles(fs, set, commitsTimeline)) {
            LOG.error("The first file slice has committed log files that cause mismatching: " + fileSlice);
            return false;
        }
        if (!hasCommittedLogFiles(fs, set2, commitsTimeline)) {
            return true;
        }
        LOG.error("The second file slice has committed log files that cause mismatching: " + fileSlice2);
        return false;
    }

    private boolean hasCommittedLogFiles(FileSystem fileSystem, Set<String> set, HoodieTimeline hoodieTimeline) {
        if (set.isEmpty()) {
            return false;
        }
        AvroSchemaConverter avroSchemaConverter = new AvroSchemaConverter();
        HoodieTimeline filterCompletedInstants = hoodieTimeline.filterCompletedInstants();
        HoodieTimeline filterInflights = hoodieTimeline.filterInflights();
        for (String str : set) {
            HoodieLogFormat.Reader reader = null;
            try {
                try {
                    MessageType readSchemaFromLogFile = TableSchemaResolver.readSchemaFromLogFile(fileSystem, new Path(str));
                    if (readSchemaFromLogFile == null) {
                        LOG.warn(String.format("Cannot read schema from log file %s. Skip the check as it's likely being written by an inflight instant.", str));
                        FileIOUtils.closeQuietly(null);
                    } else {
                        reader = HoodieLogFormat.newReader(fileSystem, new HoodieLogFile(new Path(str)), avroSchemaConverter.convert(readSchemaFromLogFile));
                        if (reader.hasNext()) {
                            String str2 = reader.next().getLogBlockHeader().get(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME);
                            if (filterCompletedInstants.containsOrBeforeTimelineStarts(str2) && !filterInflights.containsInstant(str2)) {
                                LOG.warn("Log file is committed: " + str);
                                FileIOUtils.closeQuietly(reader);
                                return true;
                            }
                            LOG.warn("Log file is uncommitted: " + str);
                        } else {
                            LOG.warn("There is no log block in " + str);
                        }
                        FileIOUtils.closeQuietly(reader);
                    }
                } catch (IOException e) {
                    LOG.warn(String.format("Cannot read log file %s: %s. Skip the check as it's likely being written by an inflight instant.", str, e.getMessage()), e);
                    FileIOUtils.closeQuietly(reader);
                }
            } catch (Throwable th) {
                FileIOUtils.closeQuietly(reader);
                throw th;
            }
        }
        return false;
    }

    private static /* synthetic */ Object $deserializeLambda$(SerializedLambda serializedLambda) {
        String implMethodName = serializedLambda.getImplMethodName();
        boolean z = -1;
        switch (implMethodName.hashCode()) {
            case -760292970:
                if (implMethodName.equals("lambda$doMetadataTableValidation$8043eeec$1")) {
                    z = false;
                    break;
                }
                break;
        }
        switch (z) {
            case false:
                if (serializedLambda.getImplMethodKind() == 7 && serializedLambda.getFunctionalInterfaceClass().equals("org/apache/hudi/common/function/SerializableFunction") && serializedLambda.getFunctionalInterfaceMethodName().equals("apply") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/lang/Object;)Ljava/lang/Object;") && serializedLambda.getImplClass().equals("org/apache/hudi/utilities/HoodieMetadataTableValidator") && serializedLambda.getImplMethodSignature().equals("(Lorg/apache/hudi/utilities/HoodieMetadataTableValidator$HoodieMetadataValidationContext;Lorg/apache/hudi/utilities/HoodieMetadataTableValidator$HoodieMetadataValidationContext;Ljava/util/Set;Ljava/lang/String;)Ljava/lang/Boolean;")) {
                    HoodieMetadataTableValidator hoodieMetadataTableValidator = (HoodieMetadataTableValidator) serializedLambda.getCapturedArg(0);
                    HoodieMetadataValidationContext hoodieMetadataValidationContext = (HoodieMetadataValidationContext) serializedLambda.getCapturedArg(1);
                    HoodieMetadataValidationContext hoodieMetadataValidationContext2 = (HoodieMetadataValidationContext) serializedLambda.getCapturedArg(2);
                    Set set = (Set) serializedLambda.getCapturedArg(3);
                    return str2 -> {
                        try {
                            validateFilesInPartition(hoodieMetadataValidationContext, hoodieMetadataValidationContext2, str2, set);
                            LOG.info(String.format("Metadata table validation succeeded for partition %s (partition %s)", str2, this.taskLabels));
                            return true;
                        } catch (HoodieValidationException e) {
                            LOG.error(String.format("Metadata table validation failed for partition %s due to HoodieValidationException (partition %s)", str2, this.taskLabels), e);
                            if (this.cfg.ignoreFailed) {
                                return false;
                            }
                            throw e;
                        }
                    };
                }
                break;
        }
        throw new IllegalArgumentException("Invalid lambda deserialization");
    }
}
