package org.apache.hudi.utilities;

import com.beust.jcommander.JCommander;
import com.beust.jcommander.Parameter;
import com.codahale.metrics.Histogram;
import com.codahale.metrics.Snapshot;
import com.codahale.metrics.UniformReservoir;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Serializable;
import java.nio.charset.StandardCharsets;
import java.time.LocalDate;
import java.time.format.DateTimeFormatter;
import java.time.format.DateTimeFormatterBuilder;
import java.time.format.DateTimeParseException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.Objects;
import java.util.ResourceBundle;
import java.util.Set;
import java.util.stream.Collectors;
import javax.annotation.Nullable;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.util.Strings;
import org.apache.hudi.cli.HoodieCliSparkConfig;
import org.apache.hudi.client.common.HoodieSparkEngineContext;
import org.apache.hudi.common.config.HoodieMetadataConfig;
import org.apache.hudi.common.config.TypedProperties;
import org.apache.hudi.common.engine.HoodieLocalEngineContext;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.view.FileSystemViewManager;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.exception.HoodieException;
import org.apache.hudi.exception.HoodieIOException;
import org.apache.hudi.exception.TableNotFoundException;
import org.apache.hudi.hadoop.fs.HadoopFSUtils;
import org.apache.hudi.metadata.HoodieTableMetadata;
import org.apache.hudi.storage.StorageConfiguration;
import org.apache.hudi.storage.hadoop.HoodieHadoopStorage;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/apache/hudi/utilities/TableSizeStats.class */
public class TableSizeStats implements Serializable {
    private static final long serialVersionUID = 1;
    private static final Logger LOG = LoggerFactory.getLogger((Class<?>) TableSizeStats.class);
    private static final DateTimeFormatter DATE_FORMATTER = new DateTimeFormatterBuilder().appendOptional(DateTimeFormatter.ofPattern("yyyy/M/d")).appendOptional(DateTimeFormatter.ofPattern("yyyy-M-d")).toFormatter();
    private static final String[] FILE_SIZE_UNITS = {"B", "KB", "MB", "GB", "TB"};
    private transient JavaSparkContext jsc;
    private Config cfg;
    private TypedProperties props;

    /* loaded from: input_file:org/apache/hudi/utilities/TableSizeStats$Config.class */
    public static class Config implements Serializable {

        @Parameter(names = {"--base-path", "-bp"}, description = "Base path for the table", required = false)
        public String basePath = null;

        @Parameter(names = {"--num-days", "-nd"}, description = "Consider files modified within this many days.", required = false)
        public long numDays = 0;

        @Parameter(names = {"--start-date", "-sd"}, description = "Consider files modified on or after this date.", required = false)
        public String startDate = null;

        @Parameter(names = {"--end-date", "-ed"}, description = "Consider files modified before this date.", required = false)
        public String endDate = null;

        @Parameter(names = {"--enable-table-stats", "-fs"}, description = "Show file-level stats.", required = false)
        public boolean tableStats = false;

        @Parameter(names = {"--enable-partition-stats", "-ps"}, description = "Show partition-level stats.", required = false)
        public boolean partitionStats = false;

        @Parameter(names = {"--props-path", "-pp"}, description = "Properties file containing base paths one per line", required = false)
        public String propsFilePath = null;

        @Parameter(names = {"--parallelism", "-pl"}, description = "Parallelism for valuation", required = false)
        public int parallelism = 200;

        @Parameter(names = {"--spark-master", "-ms"}, description = "Spark master", required = false)
        public String sparkMaster = null;

        @Parameter(names = {"--spark-memory", "-sm"}, description = "spark memory to use", required = false)
        public String sparkMemory = "1g";

        @Parameter(names = {"--hoodie-conf"}, description = "Any configuration that can be set in the properties file (using the CLI parameter \"--props\") can also be passed command line using this parameter. This can be repeated", splitter = IdentitySplitter.class)
        public List<String> configs = new ArrayList();

        @Parameter(names = {"--help", "-h"}, help = true)
        public Boolean help = false;

        public String toString() {
            return "TableSizeStats {\n   --base-path " + this.basePath + ", \n   --num-days " + this.numDays + ", \n   --start-date " + this.startDate + ", \n   --end-date " + this.endDate + ", \n   --enable-table-stats " + this.tableStats + ", \n   --enable-partition-stats " + this.partitionStats + ", \n   --parallelism " + this.parallelism + ", \n   --spark-master " + this.sparkMaster + ", \n   --spark-memory " + this.sparkMemory + ", \n   --props " + this.propsFilePath + ", \n   --hoodie-conf " + this.configs + "\n}";
        }

        public boolean equals(Object obj) {
            if (this == obj) {
                return true;
            }
            if (obj == null || getClass() != obj.getClass()) {
                return false;
            }
            Config config = (Config) obj;
            return this.basePath.equals(config.basePath) && Objects.equals(Long.valueOf(this.numDays), Long.valueOf(config.numDays)) && Objects.equals(this.startDate, config.startDate) && Objects.equals(this.endDate, config.endDate) && Objects.equals(Boolean.valueOf(this.tableStats), Boolean.valueOf(config.tableStats)) && Objects.equals(Boolean.valueOf(this.partitionStats), Boolean.valueOf(config.partitionStats)) && Objects.equals(Integer.valueOf(this.parallelism), Integer.valueOf(config.parallelism)) && Objects.equals(this.sparkMaster, config.sparkMaster) && Objects.equals(this.sparkMemory, config.sparkMemory) && Objects.equals(this.propsFilePath, config.propsFilePath) && Objects.equals(this.configs, config.configs);
        }

        public int hashCode() {
            return Objects.hash(this.basePath, Long.valueOf(this.numDays), this.startDate, this.endDate, Boolean.valueOf(this.tableStats), Boolean.valueOf(this.partitionStats), Integer.valueOf(this.parallelism), this.sparkMaster, this.sparkMemory, this.propsFilePath, this.configs, this.help);
        }
    }

    public TableSizeStats(JavaSparkContext javaSparkContext, Config config) {
        this.jsc = javaSparkContext;
        this.cfg = config;
        this.props = config.propsFilePath == null ? UtilHelpers.buildProperties(config.configs) : readConfigFromFileSystem(javaSparkContext, config);
    }

    private TypedProperties readConfigFromFileSystem(JavaSparkContext javaSparkContext, Config config) {
        return UtilHelpers.readConfig(javaSparkContext.hadoopConfiguration(), new Path(config.propsFilePath), config.configs).getProps(true);
    }

    public static void main(String[] strArr) {
        Config config = new Config();
        JCommander jCommander = new JCommander(config, (ResourceBundle) null, strArr);
        if (config.help.booleanValue() || strArr.length == 0) {
            jCommander.usage();
            System.exit(1);
        }
        SparkConf buildSparkConf = UtilHelpers.buildSparkConf("Table-Size-Stats", config.sparkMaster);
        buildSparkConf.set(HoodieCliSparkConfig.CLI_EXECUTOR_MEMORY, config.sparkMemory);
        JavaSparkContext javaSparkContext = new JavaSparkContext(buildSparkConf);
        try {
            try {
                new TableSizeStats(javaSparkContext, config).run();
                javaSparkContext.stop();
            } catch (TableNotFoundException e) {
                LOG.warn(String.format("The Hudi data table is not found: [%s].", config.basePath), e);
                javaSparkContext.stop();
            } catch (Throwable th) {
                LOG.error("Failed to get table size stats for " + config, th);
                javaSparkContext.stop();
            }
        } catch (Throwable th2) {
            javaSparkContext.stop();
            throw th2;
        }
    }

    public void run() {
        try {
            LOG.info(this.cfg.toString());
            LOG.info(" ****** Fetching table size stats ******");
            LocalDate[] userSpecifiedDateInterval = getUserSpecifiedDateInterval(this.cfg);
            if (this.cfg.propsFilePath != null) {
                Iterator<String> it = getFilePaths(this.cfg.propsFilePath, this.jsc.hadoopConfiguration()).iterator();
                while (it.hasNext()) {
                    logTableStats(it.next(), userSpecifiedDateInterval);
                }
            } else {
                if (this.cfg.basePath == null) {
                    throw new HoodieIOException("Base path needs to be set.");
                }
                logTableStats(this.cfg.basePath, userSpecifiedDateInterval);
            }
        } catch (Exception e) {
            throw new HoodieException("Unable to do fetch table size stats." + this.cfg.basePath, e);
        }
    }

    private void logTableStats(String str, LocalDate[] localDateArr) throws IOException {
        LOG.warn("Processing table " + str);
        HoodieMetadataConfig build = HoodieMetadataConfig.newBuilder().enable(isMetadataEnabled(str, this.jsc)).build();
        HoodieSparkEngineContext hoodieSparkEngineContext = new HoodieSparkEngineContext(this.jsc);
        StorageConfiguration storageConfWithCopy = HadoopFSUtils.getStorageConfWithCopy(this.jsc.hadoopConfiguration());
        List allPartitionPaths = HoodieTableMetadata.create(hoodieSparkEngineContext, new HoodieHadoopStorage(str, storageConfWithCopy), build, str).getAllPartitionPaths();
        if (localDateArr != null && getPartitionDate((String) allPartitionPaths.get(0)) == null) {
            throw new HoodieException("Cannot apply --start-date, --end-date, or --num-days when partition does not contain date. Interval: " + Arrays.toString(localDateArr) + ", Partition Name: " + ((String) allPartitionPaths.get(0)));
        }
        Histogram histogram = new Histogram(new UniformReservoir(1000000));
        allPartitionPaths.forEach(str2 -> {
            LocalDate localDate = null;
            LocalDate localDate2 = null;
            LocalDate localDate3 = null;
            if (localDateArr != null) {
                localDate = getPartitionDate(str2);
                localDate2 = localDateArr[0];
                localDate3 = localDateArr[1];
            }
            if (localDate != null && ((localDate2 != null || localDate3 != null) && ((localDate3 != null || (!localDate.isEqual(localDate2) && !localDate.isAfter(localDate2))) && (localDate2 != null || !localDate.isBefore(localDate3))))) {
                if (localDate2 == null || localDate3 == null) {
                    return;
                }
                if ((!localDate.isEqual(localDate2) && !localDate.isAfter(localDate2)) || !localDate.isBefore(localDate3)) {
                    return;
                }
            }
            List list = (List) FileSystemViewManager.createInMemoryFileSystemView(new HoodieLocalEngineContext(storageConfWithCopy), HoodieTableMetaClient.builder().setBasePath(str).setConf(storageConfWithCopy.newInstance()).build(), HoodieMetadataConfig.newBuilder().enable(false).build()).getLatestBaseFiles(str2).collect(Collectors.toList());
            Histogram histogram2 = (!this.cfg.partitionStats || str2.trim().length() <= 0) ? null : new Histogram(new UniformReservoir(1000000));
            list.forEach(hoodieBaseFile -> {
                if (histogram2 != null) {
                    histogram2.update(hoodieBaseFile.getFileSize());
                }
                histogram.update(hoodieBaseFile.getFileSize());
            });
            if (histogram2 != null) {
                logStats("Partition stats [name: " + str2 + (localDate != null ? ", has date: yes" : "") + "]", histogram2);
            }
        });
        if (this.cfg.tableStats) {
            logStats("Table stats [path: " + str + "]", histogram);
        } else {
            LOG.info("Total size: {}", getFileSizeUnit(Arrays.stream(histogram.getSnapshot().getValues()).sum()));
        }
    }

    private static boolean isMetadataEnabled(String str, JavaSparkContext javaSparkContext) {
        Set metadataPartitions = HoodieTableMetaClient.builder().setBasePath(str).setConf(HadoopFSUtils.getStorageConfWithCopy(javaSparkContext.hadoopConfiguration())).build().getTableConfig().getMetadataPartitions();
        return !metadataPartitions.isEmpty() && metadataPartitions.contains("files");
    }

    private static List<String> getFilePaths(String str, Configuration configuration) {
        ArrayList arrayList = new ArrayList();
        try {
            BufferedReader bufferedReader = new BufferedReader(new InputStreamReader((InputStream) HadoopFSUtils.getFs(str, (Configuration) Option.ofNullable(configuration).orElseGet(Configuration::new)).open(new Path(str)), StandardCharsets.UTF_8));
            Throwable th = null;
            try {
                try {
                    for (String readLine = bufferedReader.readLine(); readLine != null; readLine = bufferedReader.readLine()) {
                        arrayList.add(readLine);
                    }
                    if (bufferedReader != null) {
                        if (0 != 0) {
                            try {
                                bufferedReader.close();
                            } catch (Throwable th2) {
                                th.addSuppressed(th2);
                            }
                        } else {
                            bufferedReader.close();
                        }
                    }
                    return arrayList;
                } finally {
                }
            } finally {
            }
        } catch (IOException e) {
            LOG.error("Error reading in properties from dfs from file." + str);
            throw new HoodieIOException("Cannot read properties from dfs from file " + str, e);
        }
    }

    private static LocalDate[] getUserSpecifiedDateInterval(Config config) {
        LocalDate localDate = null;
        if (config.endDate != null) {
            try {
                localDate = LocalDate.parse(config.endDate, DATE_FORMATTER);
                LOG.info("Setting ending date to {}. ", localDate);
            } catch (DateTimeParseException e) {
                throw new HoodieException("Unable to parse --end-date. ", e);
            }
        } else {
            LOG.info("End date is not specified: {}.", (Object) null);
        }
        LocalDate localDate2 = null;
        if (config.startDate != null) {
            localDate2 = LocalDate.parse(config.startDate, DATE_FORMATTER);
            LOG.info("Setting starting date to {}.", localDate2);
        } else if (config.numDays == 0) {
            LOG.info("Start date not specified: {}.", (Object) null);
        } else {
            if (config.numDays <= 0) {
                throw new HoodieException("--num-days must specify a positive value.");
            }
            localDate = LocalDate.now();
            localDate2 = localDate.minusDays(config.numDays);
            LOG.info("Setting starting date to {} ({} - {} days). ", localDate2, localDate, Long.valueOf(config.numDays));
        }
        if (localDate2 != null && localDate != null && !localDate2.isBefore(localDate)) {
            throw new HoodieException("Starting date must be before ending date. Start Date: " + localDate2 + ", End Date: " + localDate);
        }
        if (localDate2 == null && localDate == null) {
            return null;
        }
        return new LocalDate[]{localDate2, localDate};
    }

    @Nullable
    private static LocalDate getPartitionDate(String str) {
        String[] split;
        String str2 = str;
        if (str.contains(Strings.DEFAULT_SEPARATOR) && (split = str.split(Strings.DEFAULT_SEPARATOR)) != null && split.length == 2) {
            str2 = split[1].trim();
        }
        try {
            return LocalDate.parse(str2, DATE_FORMATTER);
        } catch (DateTimeParseException e) {
            LOG.error("Partition name {} must conform to date format if --start-date, --end-date, or --num-days are specified. ", str, e);
            return null;
        }
    }

    private static String getFileSizeUnit(double d) {
        int i = 0;
        while (d > 1024.0d && i < FILE_SIZE_UNITS.length) {
            d /= 1024.0d;
            i++;
        }
        return String.format("%.2f %s", Double.valueOf(d), FILE_SIZE_UNITS[i]);
    }

    private static void logStats(String str, Histogram histogram) {
        LOG.info(str);
        Snapshot snapshot = histogram.getSnapshot();
        LOG.info("Number of files: {}", Integer.valueOf(snapshot.size()));
        LOG.info("Total size: {}", getFileSizeUnit(Arrays.stream(snapshot.getValues()).sum()));
        LOG.info("Minimum file size: {}", getFileSizeUnit(snapshot.getMin()));
        LOG.info("Maximum file size: {}", getFileSizeUnit(snapshot.getMax()));
        LOG.info("Average file size: {}", getFileSizeUnit(snapshot.getMean()));
        LOG.info("Median file size: {}", getFileSizeUnit(snapshot.getMedian()));
        LOG.info("P50 file size: {}", getFileSizeUnit(snapshot.getValue(0.5d)));
        LOG.info("P90 file size: {}", getFileSizeUnit(snapshot.getValue(0.9d)));
        LOG.info("P95 file size: {}", getFileSizeUnit(snapshot.getValue(0.95d)));
        LOG.info("P99 file size: {}", getFileSizeUnit(snapshot.getValue(0.99d)));
    }
}
