package org.apache.hudi.source.stats;

import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.concurrent.ConcurrentHashMap;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import javax.annotation.Nullable;
import org.apache.flink.table.data.GenericRowData;
import org.apache.flink.table.data.RowData;
import org.apache.flink.table.data.StringData;
import org.apache.flink.table.types.logical.LogicalType;
import org.apache.flink.table.types.logical.RowType;
import org.apache.hudi.avro.model.HoodieMetadataRecord;
import org.apache.hudi.client.common.HoodieFlinkEngineContext;
import org.apache.hudi.common.config.HoodieMetadataConfig;
import org.apache.hudi.common.data.HoodieData;
import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.util.ValidationUtils;
import org.apache.hudi.common.util.VisibleForTesting;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.common.util.collection.Tuple3;
import org.apache.hudi.common.util.hash.ColumnIndexID;
import org.apache.hudi.exception.HoodieException;
import org.apache.hudi.metadata.HoodieMetadataPayload;
import org.apache.hudi.metadata.HoodieTableMetadata;
import org.apache.hudi.metadata.HoodieTableMetadataUtil;
import org.apache.hudi.source.prune.ColumnStatsProbe;
import org.apache.hudi.storage.hadoop.HoodieHadoopStorage;
import org.apache.hudi.util.AvroToRowDataConverters;
import org.apache.hudi.util.DataTypeUtils;
import org.apache.hudi.util.FlinkClientUtil;
import org.apache.hudi.util.RowDataProjection;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/apache/hudi/source/stats/FileStatsIndex.class */
public class FileStatsIndex implements ColumnStatsIndex {
    private static final long serialVersionUID = 1;
    private static final Logger LOG = LoggerFactory.getLogger(FileStatsIndex.class);
    private final RowType rowType;
    private final String basePath;
    private final HoodieMetadataConfig metadataConfig;
    private HoodieTableMetadata metadataTable;

    public FileStatsIndex(String str, RowType rowType, HoodieMetadataConfig hoodieMetadataConfig) {
        this.basePath = str;
        this.rowType = rowType;
        this.metadataConfig = hoodieMetadataConfig;
    }

    @Override // org.apache.hudi.source.stats.ColumnStatsIndex
    public String getIndexPartitionName() {
        return HoodieTableMetadataUtil.PARTITION_NAME_COLUMN_STATS;
    }

    public HoodieTableMetadata getMetadataTable() {
        if (this.metadataTable == null) {
            this.metadataTable = HoodieTableMetadata.create(HoodieFlinkEngineContext.DEFAULT, new HoodieHadoopStorage(this.basePath, FlinkClientUtil.getHadoopConf()), this.metadataConfig, this.basePath);
        }
        return this.metadataTable;
    }

    @Override // org.apache.hudi.source.stats.ColumnStatsIndex
    public Set<String> computeCandidateFiles(ColumnStatsProbe columnStatsProbe, List<String> list) {
        if (columnStatsProbe == null) {
            return null;
        }
        try {
            return candidatesInMetadataTable(columnStatsProbe, readColumnStatsIndexByColumns(columnStatsProbe.getReferencedCols()), list);
        } catch (Throwable th) {
            LOG.warn("Read {} for data skipping error", getIndexPartitionName(), th);
            return null;
        }
    }

    @Override // org.apache.hudi.source.stats.ColumnStatsIndex
    public Set<String> computeCandidatePartitions(ColumnStatsProbe columnStatsProbe, List<String> list) {
        throw new UnsupportedOperationException("This method is not supported by " + getClass().getSimpleName());
    }

    protected Set<String> candidatesInMetadataTable(@Nullable ColumnStatsProbe columnStatsProbe, List<RowData> list, List<String> list2) {
        if (columnStatsProbe == null) {
            return null;
        }
        Pair<List<RowData>, String[]> transposeColumnStatsIndex = transposeColumnStatsIndex(list, columnStatsProbe.getReferencedCols());
        List<RowData> left = transposeColumnStatsIndex.getLeft();
        String[] right = transposeColumnStatsIndex.getRight();
        if (right.length == 0) {
            return null;
        }
        RowType.RowField[] projectRowFields = DataTypeUtils.projectRowFields(this.rowType, right);
        Set set = (Set) ((Stream) left.stream().parallel()).map(rowData -> {
            return rowData.getString(0).toString();
        }).collect(Collectors.toSet());
        Set<String> set2 = (Set) ((Stream) left.stream().parallel()).filter(rowData2 -> {
            return columnStatsProbe.test(rowData2, projectRowFields);
        }).map(rowData3 -> {
            return rowData3.getString(0).toString();
        }).collect(Collectors.toSet());
        list2.removeAll(set);
        set2.addAll(list2);
        return set2;
    }

    private static List<RowData> projectNestedColStatsColumns(List<RowData> list) {
        int pos = HoodieMetadataRecord.SCHEMA$.getField(HoodieMetadataPayload.SCHEMA_FIELD_ID_COLUMN_STATS).pos();
        RowDataProjection instanceV2 = RowDataProjection.instanceV2(ColumnStatsSchemas.COL_STATS_DATA_TYPE.getLogicalType(), ColumnStatsSchemas.COL_STATS_TARGET_POS);
        return (List) ((Stream) list.stream().parallel()).map(rowData -> {
            return instanceV2.project(rowData.getRow(pos, 9));
        }).collect(Collectors.toList());
    }

    @VisibleForTesting
    public Pair<List<RowData>, String[]> transposeColumnStatsIndex(List<RowData> list, String[] strArr) {
        Map map = (Map) this.rowType.getFields().stream().collect(Collectors.toMap((v0) -> {
            return v0.getName();
        }, (v0) -> {
            return v0.getType();
        }));
        Set set = (Set) list.stream().map(rowData -> {
            return rowData.getString(5).toString();
        }).collect(Collectors.toSet());
        Stream sorted = Arrays.stream(strArr).sorted();
        set.getClass();
        TreeSet treeSet = (TreeSet) sorted.filter((v1) -> {
            return r1.contains(v1);
        }).collect(Collectors.toCollection(TreeSet::new));
        ConcurrentHashMap concurrentHashMap = new ConcurrentHashMap();
        return Pair.of(foldRowsByFiles(treeSet, (Map) ((Stream) list.stream().parallel()).filter(rowData2 -> {
            return treeSet.contains(rowData2.getString(5).toString());
        }).map(rowData3 -> {
            return (rowData3.isNullAt(1) && rowData3.isNullAt(2)) ? rowData3 : unpackMinMaxVal(rowData3, (LogicalType) map.get(rowData3.getString(5).toString()), concurrentHashMap);
        }).collect(Collectors.groupingBy(rowData4 -> {
            return rowData4.getString(0);
        }))), treeSet.toArray(new String[0]));
    }

    private static List<RowData> foldRowsByFiles(TreeSet<String> treeSet, Map<StringData, List<RowData>> map) {
        return (List) ((Stream) map.values().stream().parallel()).map(list -> {
            StringData string = ((RowData) list.get(0)).getString(0);
            long j = ((RowData) list.get(0)).getLong(4);
            Map map2 = (Map) list.stream().collect(Collectors.toMap(rowData -> {
                return rowData.getString(5).toString();
            }, rowData2 -> {
                return rowData2;
            }));
            TreeMap treeMap = new TreeMap();
            treeSet.forEach(str -> {
            });
            List list = (List) treeMap.values().stream().map(rowData3 -> {
                if (rowData3 == null) {
                    return Tuple3.of(null, null, Long.valueOf(j));
                }
                GenericRowData genericRowData = (GenericRowData) rowData3;
                return Tuple3.of(genericRowData.getField(1), genericRowData.getField(2), genericRowData.getField(3));
            }).collect(Collectors.toList());
            GenericRowData genericRowData = new GenericRowData(2 + (3 * list.size()));
            genericRowData.setField(0, string);
            genericRowData.setField(1, Long.valueOf(j));
            for (int i = 0; i < list.size(); i++) {
                Tuple3 tuple3 = (Tuple3) list.get(i);
                int i2 = 2 + (3 * i);
                genericRowData.setField(i2, tuple3.f0);
                genericRowData.setField(i2 + 1, tuple3.f1);
                genericRowData.setField(i2 + 2, tuple3.f2);
            }
            return genericRowData;
        }).collect(Collectors.toList());
    }

    private static RowData unpackMinMaxVal(RowData rowData, LogicalType logicalType, Map<LogicalType, AvroToRowDataConverters.AvroToRowDataConverter> map) {
        RowData row = rowData.getRow(1, 1);
        RowData row2 = rowData.getRow(2, 1);
        ValidationUtils.checkState((row == null || row2 == null) ? false : true, "Invalid Column Stats record: either both min/max have to be null, or both have to be non-null");
        Object tryUnpackNonNullVal = tryUnpackNonNullVal(row, logicalType, map);
        Object tryUnpackNonNullVal2 = tryUnpackNonNullVal(row2, logicalType, map);
        GenericRowData genericRowData = new GenericRowData(rowData.getArity());
        genericRowData.setField(0, rowData.getString(0));
        genericRowData.setField(1, tryUnpackNonNullVal);
        genericRowData.setField(2, tryUnpackNonNullVal2);
        genericRowData.setField(3, Long.valueOf(rowData.getLong(3)));
        genericRowData.setField(4, Long.valueOf(rowData.getLong(4)));
        genericRowData.setField(5, rowData.getString(5));
        return genericRowData;
    }

    private static Object tryUnpackNonNullVal(RowData rowData, LogicalType logicalType, Map<LogicalType, AvroToRowDataConverters.AvroToRowDataConverter> map) {
        for (int i = 0; i < rowData.getArity(); i++) {
            Object field = ((GenericRowData) rowData).getField(i);
            if (field != null) {
                return doUnpack(field, logicalType, map);
            }
        }
        return null;
    }

    private static Object doUnpack(Object obj, LogicalType logicalType, Map<LogicalType, AvroToRowDataConverters.AvroToRowDataConverter> map) {
        return map.computeIfAbsent(logicalType, logicalType2 -> {
            return AvroToRowDataConverters.createConverter(logicalType, true);
        }).convert(obj);
    }

    @VisibleForTesting
    public List<RowData> readColumnStatsIndexByColumns(String[] strArr) {
        ValidationUtils.checkArgument(strArr.length > 0, "Column stats is only valid when push down filters have referenced columns");
        HoodieData<HoodieRecord<HoodieMetadataPayload>> recordsByKeyPrefixes = getMetadataTable().getRecordsByKeyPrefixes((List) Arrays.stream(strArr).map(str -> {
            return new ColumnIndexID(str).asBase64EncodedString();
        }).collect(Collectors.toList()), getIndexPartitionName(), false);
        AvroToRowDataConverters.AvroToRowDataConverter createRowConverter = AvroToRowDataConverters.createRowConverter(ColumnStatsSchemas.METADATA_DATA_TYPE.getLogicalType());
        return projectNestedColStatsColumns((List) ((Stream) recordsByKeyPrefixes.collectAsList().stream().parallel()).map(hoodieRecord -> {
            try {
                return (RowData) createRowConverter.convert(((HoodieMetadataPayload) hoodieRecord.getData()).getInsertValue(null, null).orElse(null));
            } catch (IOException e) {
                throw new HoodieException("Exception while getting insert value from metadata payload");
            }
        }).collect(Collectors.toList()));
    }
}
