package org.apache.hudi.source;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Properties;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import javax.annotation.Nullable;
import org.apache.flink.annotation.VisibleForTesting;
import org.apache.flink.table.data.RowData;
import org.apache.flink.table.types.logical.RowType;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hudi.client.common.HoodieFlinkEngineContext;
import org.apache.hudi.common.config.HoodieMetadataConfig;
import org.apache.hudi.common.fs.FSUtils;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.configuration.FlinkOptions;
import org.apache.hudi.configuration.HadoopConfigurations;
import org.apache.hudi.index.bucket.BucketIdentifier;
import org.apache.hudi.org.apache.hadoop.hbase.util.Strings;
import org.apache.hudi.source.prune.DataPruner;
import org.apache.hudi.source.prune.PartitionPruners;
import org.apache.hudi.source.stats.ColumnStatsIndices;
import org.apache.hudi.storage.StoragePathInfo;
import org.apache.hudi.storage.hadoop.HoodieHadoopStorage;
import org.apache.hudi.util.DataTypeUtils;
import org.apache.hudi.util.StreamerUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/apache/hudi/source/FileIndex.class */
public class FileIndex {
    private static final Logger LOG = LoggerFactory.getLogger(FileIndex.class);
    private final Path path;
    private final RowType rowType;
    private final boolean tableExists;
    private final HoodieMetadataConfig metadataConfig;
    private final Configuration hadoopConf;
    private final PartitionPruners.PartitionPruner partitionPruner;
    private final DataPruner dataPruner;
    private final int dataBucket;
    private List<String> partitionPaths;

    /* loaded from: input_file:org/apache/hudi/source/FileIndex$Builder.class */
    public static class Builder {
        private Path path;
        private org.apache.flink.configuration.Configuration conf;
        private RowType rowType;
        private DataPruner dataPruner;
        private PartitionPruners.PartitionPruner partitionPruner;
        private int dataBucket;

        private Builder() {
            this.dataBucket = -1;
        }

        public Builder path(Path path) {
            this.path = path;
            return this;
        }

        public Builder conf(org.apache.flink.configuration.Configuration configuration) {
            this.conf = configuration;
            return this;
        }

        public Builder rowType(RowType rowType) {
            this.rowType = rowType;
            return this;
        }

        public Builder dataPruner(DataPruner dataPruner) {
            this.dataPruner = dataPruner;
            return this;
        }

        public Builder partitionPruner(PartitionPruners.PartitionPruner partitionPruner) {
            this.partitionPruner = partitionPruner;
            return this;
        }

        public Builder dataBucket(int i) {
            this.dataBucket = i;
            return this;
        }

        public FileIndex build() {
            return new FileIndex((Path) Objects.requireNonNull(this.path), (org.apache.flink.configuration.Configuration) Objects.requireNonNull(this.conf), (RowType) Objects.requireNonNull(this.rowType), this.dataPruner, this.partitionPruner, this.dataBucket);
        }
    }

    private FileIndex(Path path, org.apache.flink.configuration.Configuration configuration, RowType rowType, DataPruner dataPruner, PartitionPruners.PartitionPruner partitionPruner, int i) {
        this.path = path;
        this.rowType = rowType;
        this.hadoopConf = HadoopConfigurations.getHadoopConf(configuration);
        this.tableExists = StreamerUtil.tableExists(path.toString(), this.hadoopConf);
        this.metadataConfig = metadataConfig(configuration);
        this.dataPruner = isDataSkippingFeasible(configuration.getBoolean(FlinkOptions.READ_DATA_SKIPPING_ENABLED)) ? dataPruner : null;
        this.partitionPruner = partitionPruner;
        this.dataBucket = i;
    }

    public static Builder builder() {
        return new Builder();
    }

    public List<Map<String, String>> getPartitions(List<String> list, String str, boolean z) {
        if (list.size() == 0) {
            return Collections.emptyList();
        }
        List<String> orBuildPartitionPaths = getOrBuildPartitionPaths();
        if (orBuildPartitionPaths.size() == 1 && orBuildPartitionPaths.get(0).isEmpty()) {
            return Collections.emptyList();
        }
        ArrayList arrayList = new ArrayList();
        Iterator<String> it = orBuildPartitionPaths.iterator();
        while (it.hasNext()) {
            String[] split = it.next().split("/");
            LinkedHashMap linkedHashMap = new LinkedHashMap();
            if (z) {
                Arrays.stream(split).forEach(str2 -> {
                    String[] split2 = str2.split(Strings.DEFAULT_SEPARATOR);
                    if (split2.length == 2) {
                        linkedHashMap.put(split2[0], str.equals(split2[1]) ? null : split2[1]);
                    }
                });
            } else {
                for (int i = 0; i < list.size(); i++) {
                    linkedHashMap.put(list.get(i), str.equals(split[i]) ? null : split[i]);
                }
            }
            arrayList.add(linkedHashMap);
        }
        return arrayList;
    }

    public List<StoragePathInfo> getFilesInPartitions() {
        if (!this.tableExists) {
            return Collections.emptyList();
        }
        List<StoragePathInfo> list = (List) FSUtils.getFilesInPartitions(new HoodieFlinkEngineContext(this.hadoopConf), new HoodieHadoopStorage(this.path, this.hadoopConf), this.metadataConfig, this.path.toString(), (String[]) getOrBuildPartitionPaths().stream().map(str -> {
            return fullPartitionPath(this.path, str);
        }).toArray(i -> {
            return new String[i];
        })).values().stream().flatMap(list2 -> {
            return list2.stream();
        }).collect(Collectors.toList());
        if (list.size() == 0) {
            return list;
        }
        if (this.dataBucket >= 0) {
            String bucketIdStr = BucketIdentifier.bucketIdStr(this.dataBucket);
            List<StoragePathInfo> list3 = (List) list.stream().filter(storagePathInfo -> {
                return storagePathInfo.getPath().getName().contains(bucketIdStr);
            }).collect(Collectors.toList());
            logPruningMsg(list.size(), list3.size(), "bucket pruning");
            list = list3;
        }
        Set<String> candidateFilesInMetadataTable = candidateFilesInMetadataTable(list);
        if (candidateFilesInMetadataTable == null) {
            return list;
        }
        List<StoragePathInfo> list4 = (List) ((Stream) list.stream().parallel()).filter(storagePathInfo2 -> {
            return candidateFilesInMetadataTable.contains(storagePathInfo2.getPath().getName());
        }).collect(Collectors.toList());
        logPruningMsg(list.size(), list4.size(), "data skipping");
        return list4;
    }

    private static String fullPartitionPath(Path path, String str) {
        return str.isEmpty() ? path.toString() : new Path(path, str).toString();
    }

    @VisibleForTesting
    public void reset() {
        this.partitionPaths = null;
    }

    @Nullable
    private Set<String> candidateFilesInMetadataTable(List<StoragePathInfo> list) {
        if (this.dataPruner == null) {
            return null;
        }
        try {
            String[] referencedCols = this.dataPruner.getReferencedCols();
            Pair<List<RowData>, String[]> transposeColumnStatsIndex = ColumnStatsIndices.transposeColumnStatsIndex(ColumnStatsIndices.readColumnStatsIndex(this.path.toString(), this.metadataConfig, referencedCols), referencedCols, this.rowType);
            List<RowData> left = transposeColumnStatsIndex.getLeft();
            String[] right = transposeColumnStatsIndex.getRight();
            if (right.length == 0) {
                return null;
            }
            RowType.RowField[] projectRowFields = DataTypeUtils.projectRowFields(this.rowType, right);
            Set set = (Set) ((Stream) left.stream().parallel()).map(rowData -> {
                return rowData.getString(0).toString();
            }).collect(Collectors.toSet());
            Set<String> set2 = (Set) ((Stream) left.stream().parallel()).filter(rowData2 -> {
                return this.dataPruner.test(rowData2, projectRowFields);
            }).map(rowData3 -> {
                return rowData3.getString(0).toString();
            }).collect(Collectors.toSet());
            Set set3 = (Set) list.stream().map(storagePathInfo -> {
                return storagePathInfo.getPath().getName();
            }).collect(Collectors.toSet());
            set3.removeAll(set);
            set2.addAll(set3);
            return set2;
        } catch (Throwable th) {
            LOG.warn("Read column stats for data skipping error", th);
            return null;
        }
    }

    public List<String> getOrBuildPartitionPaths() {
        if (this.partitionPaths != null) {
            return this.partitionPaths;
        }
        List<String> allPartitionPaths = this.tableExists ? FSUtils.getAllPartitionPaths(new HoodieFlinkEngineContext(this.hadoopConf), new HoodieHadoopStorage(this.path, this.hadoopConf), this.metadataConfig, this.path.toString()) : Collections.emptyList();
        if (this.partitionPruner == null) {
            this.partitionPaths = allPartitionPaths;
        } else {
            this.partitionPaths = new ArrayList(this.partitionPruner.filter(allPartitionPaths));
        }
        return this.partitionPaths;
    }

    public static HoodieMetadataConfig metadataConfig(org.apache.flink.configuration.Configuration configuration) {
        Properties properties = new Properties();
        properties.put(HoodieMetadataConfig.ENABLE.key(), Boolean.valueOf(configuration.getBoolean(FlinkOptions.METADATA_ENABLED)));
        return HoodieMetadataConfig.newBuilder().fromProperties(properties).build();
    }

    private boolean isDataSkippingFeasible(boolean z) {
        if (!z) {
            return false;
        }
        if (this.metadataConfig.isEnabled()) {
            return true;
        }
        LOG.warn("Data skipping requires Metadata Table to be enabled! Disable the data skipping");
        return false;
    }

    private void logPruningMsg(int i, int i2, String str) {
        LOG.info("\n------------------------------------------------------------\n---------- action:        {}\n---------- total files:   {}\n---------- left files:    {}\n---------- skipping rate: {}\n------------------------------------------------------------", new Object[]{str, Integer.valueOf(i), Integer.valueOf(i2), Double.valueOf(percentage(i, i2))});
    }

    private static double percentage(double d, double d2) {
        return (d - d2) / d;
    }
}
