package org.apache.hudi.common.util;

import java.io.IOException;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Properties;
import java.util.Set;
import java.util.UUID;
import java.util.stream.Collectors;
import org.apache.avro.JsonProperties;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericRecord;
import org.apache.hadoop.fs.Path;
import org.apache.hudi.avro.AvroSchemaUtils;
import org.apache.hudi.avro.HoodieAvroUtils;
import org.apache.hudi.avro.HoodieAvroWriteSupport;
import org.apache.hudi.common.bloom.BloomFilter;
import org.apache.hudi.common.bloom.BloomFilterFactory;
import org.apache.hudi.common.bloom.BloomFilterTypeCode;
import org.apache.hudi.common.config.TypedProperties;
import org.apache.hudi.common.model.HoodieColumnRangeMetadata;
import org.apache.hudi.common.model.HoodieKey;
import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.testutils.HoodieCommonTestHarness;
import org.apache.hudi.common.testutils.HoodieTestUtils;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.keygen.BaseKeyGenerator;
import org.apache.hudi.storage.StoragePath;
import org.apache.parquet.avro.AvroSchemaConverter;
import org.apache.parquet.hadoop.ParquetWriter;
import org.apache.parquet.hadoop.metadata.CompressionCodecName;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;

/* loaded from: input_file:org/apache/hudi/common/util/TestParquetUtils.class */
public class TestParquetUtils extends HoodieCommonTestHarness {
    private ParquetUtils parquetUtils;

    /* loaded from: input_file:org/apache/hudi/common/util/TestParquetUtils$TestBaseKeyGen.class */
    class TestBaseKeyGen extends BaseKeyGenerator {
        private String recordKeyField;
        private String partitionField;

        public TestBaseKeyGen(String str, String str2) {
            super(new TypedProperties());
            this.recordKeyField = str;
            this.partitionField = str2;
        }

        public String getRecordKey(GenericRecord genericRecord) {
            return genericRecord.get(this.recordKeyField).toString();
        }

        public String getPartitionPath(GenericRecord genericRecord) {
            return genericRecord.get(this.partitionField).toString();
        }

        public List<String> getRecordKeyFieldNames() {
            return Arrays.asList(this.recordKeyField);
        }

        public List<String> getPartitionPathFields() {
            return Arrays.asList(this.partitionField);
        }
    }

    public static List<Arguments> bloomFilterTypeCodes() {
        return Arrays.asList(Arguments.of(new Object[]{BloomFilterTypeCode.SIMPLE.name()}), Arguments.of(new Object[]{BloomFilterTypeCode.DYNAMIC_V0.name()}));
    }

    @BeforeEach
    public void setup() {
        initPath();
        this.parquetUtils = new ParquetUtils();
    }

    @MethodSource({"bloomFilterTypeCodes"})
    @ParameterizedTest
    public void testHoodieWriteSupport(String str) throws Exception {
        ArrayList arrayList = new ArrayList();
        for (int i = 0; i < 1000; i++) {
            arrayList.add(UUID.randomUUID().toString());
        }
        String uri = Paths.get(this.basePath, "test.parquet").toUri().toString();
        writeParquetFile(str, uri, arrayList);
        ArrayList arrayList2 = new ArrayList(this.parquetUtils.readRowKeys(HoodieTestUtils.getStorage(uri), new StoragePath(uri)));
        Collections.sort(arrayList2);
        Collections.sort(arrayList);
        Assertions.assertEquals(arrayList, arrayList2, "Did not read back the expected list of keys");
        BloomFilter readBloomFilterFromMetadata = this.parquetUtils.readBloomFilterFromMetadata(HoodieTestUtils.getStorage(uri), new StoragePath(uri));
        Iterator<String> it = arrayList.iterator();
        while (it.hasNext()) {
            Assertions.assertTrue(readBloomFilterFromMetadata.mightContain(it.next()), "key should be found in bloom filter");
        }
    }

    @MethodSource({"bloomFilterTypeCodes"})
    @ParameterizedTest
    public void testFilterParquetRowKeys(String str) throws Exception {
        ArrayList arrayList = new ArrayList();
        HashSet hashSet = new HashSet();
        for (int i = 0; i < 1000; i++) {
            String uuid = UUID.randomUUID().toString();
            arrayList.add(uuid);
            if (i % 100 == 0) {
                hashSet.add(uuid);
            }
        }
        String uri = Paths.get(this.basePath, "test.parquet").toUri().toString();
        writeParquetFile(str, uri, arrayList);
        Set filterRowKeys = this.parquetUtils.filterRowKeys(HoodieTestUtils.getStorage(uri), new StoragePath(uri), hashSet);
        Assertions.assertEquals(hashSet.size(), filterRowKeys.size(), "Filtered count does not match");
        Iterator it = filterRowKeys.iterator();
        while (it.hasNext()) {
            Assertions.assertTrue(hashSet.contains((String) it.next()), "filtered key must be in the given filter");
        }
    }

    @MethodSource({"bloomFilterTypeCodes"})
    @ParameterizedTest
    public void testFetchRecordKeyPartitionPathFromParquet(String str) throws Exception {
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        for (int i = 0; i < 1000; i++) {
            String uuid = UUID.randomUUID().toString();
            arrayList.add(uuid);
            arrayList2.add(new HoodieKey(uuid, "path1"));
        }
        String uri = Paths.get(this.basePath, "test.parquet").toUri().toString();
        writeParquetFile(str, uri, arrayList, HoodieAvroUtils.getRecordKeyPartitionPathSchema(), true, "path1");
        List fetchHoodieKeys = this.parquetUtils.fetchHoodieKeys(HoodieTestUtils.getStorage(uri), new StoragePath(uri));
        Assertions.assertEquals(arrayList.size(), fetchHoodieKeys.size(), "Total count does not match");
        Iterator it = fetchHoodieKeys.iterator();
        while (it.hasNext()) {
            Assertions.assertTrue(arrayList2.contains((HoodieKey) it.next()), "Record key must be in the given filter");
        }
    }

    @Test
    public void testFetchRecordKeyPartitionPathVirtualKeysFromParquet() throws Exception {
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        for (int i = 0; i < 1000; i++) {
            String uuid = UUID.randomUUID().toString();
            arrayList.add(uuid);
            arrayList2.add(new HoodieKey(uuid, "path1"));
        }
        String uri = Paths.get(this.basePath, "test.parquet").toUri().toString();
        writeParquetFile(BloomFilterTypeCode.SIMPLE.name(), uri, arrayList, getSchemaWithFields(Arrays.asList("abc", "def")), true, "path1", false, "abc", "def");
        List fetchHoodieKeys = this.parquetUtils.fetchHoodieKeys(HoodieTestUtils.getStorage(uri), new StoragePath(uri), Option.of(new TestBaseKeyGen("abc", "def")));
        Assertions.assertEquals(arrayList.size(), fetchHoodieKeys.size(), "Total count does not match");
        Iterator it = fetchHoodieKeys.iterator();
        while (it.hasNext()) {
            Assertions.assertTrue(arrayList2.contains((HoodieKey) it.next()), "Record key must be in the given filter");
        }
    }

    @Test
    public void testReadCounts() throws Exception {
        String uri = Paths.get(this.basePath, "test.parquet").toUri().toString();
        ArrayList arrayList = new ArrayList();
        for (int i = 0; i < 123; i++) {
            arrayList.add(UUID.randomUUID().toString());
        }
        writeParquetFile(BloomFilterTypeCode.SIMPLE.name(), uri, arrayList);
        Assertions.assertEquals(123L, this.parquetUtils.getRowCount(HoodieTestUtils.getStorage(uri), new StoragePath(uri)));
    }

    @Test
    public void testReadColumnStatsFromMetadata() throws Exception {
        ArrayList arrayList = new ArrayList();
        String str = "z";
        String str2 = "0";
        String str3 = "z";
        String str4 = "0";
        int i = 0;
        String str5 = "path1";
        for (int i2 = 0; i2 < 1000; i2++) {
            boolean z = i2 % 3 == 0;
            String uuid = UUID.randomUUID().toString();
            String valueOf = String.valueOf(i2);
            arrayList.add(Pair.of(Pair.of(uuid, valueOf), Boolean.valueOf(z)));
            str = str.compareTo(uuid) > 0 ? uuid : str;
            str2 = str2.compareTo(uuid) < 0 ? uuid : str2;
            if (z) {
                i++;
            } else {
                str3 = str3.compareTo(valueOf) > 0 ? valueOf : str3;
                str4 = str4.compareTo(valueOf) < 0 ? valueOf : str4;
            }
        }
        String storagePath = new StoragePath(this.basePath, "test.parquet").toString();
        String str6 = "id";
        String str7 = "partition";
        String str8 = "data";
        Schema schema = getSchema("id", "partition", "data");
        HoodieAvroWriteSupport hoodieAvroWriteSupport = new HoodieAvroWriteSupport(new AvroSchemaConverter().convert(schema), schema, Option.of(BloomFilterFactory.createBloomFilter(1000, 1.0E-4d, 10000, BloomFilterTypeCode.SIMPLE.name())), new Properties());
        ParquetWriter parquetWriter = new ParquetWriter(new Path(storagePath), hoodieAvroWriteSupport, CompressionCodecName.GZIP, 125829120, 1048576);
        Throwable th = null;
        try {
            arrayList.forEach(pair -> {
                GenericData.Record record = new GenericData.Record(schema);
                record.put(str6, ((Pair) pair.getLeft()).getLeft());
                record.put(str7, str5);
                if (((Boolean) pair.getRight()).booleanValue()) {
                    record.put(str8, (Object) null);
                } else {
                    record.put(str8, ((Pair) pair.getLeft()).getRight());
                }
                try {
                    parquetWriter.write(record);
                    hoodieAvroWriteSupport.add((String) ((Pair) pair.getLeft()).getLeft());
                } catch (IOException e) {
                    throw new RuntimeException(e);
                }
            });
            if (parquetWriter != null) {
                if (0 != 0) {
                    try {
                        parquetWriter.close();
                    } catch (Throwable th2) {
                        th.addSuppressed(th2);
                    }
                } else {
                    parquetWriter.close();
                }
            }
            ArrayList arrayList2 = new ArrayList();
            arrayList2.add("id");
            arrayList2.add("partition");
            arrayList2.add("data");
            List list = (List) this.parquetUtils.readColumnStatsFromMetadata(HoodieTestUtils.getStorage(storagePath), new StoragePath(storagePath), arrayList2).stream().sorted(Comparator.comparing((v0) -> {
                return v0.getColumnName();
            })).collect(Collectors.toList());
            Assertions.assertEquals(3, list.size(), "Should return column stats of 3 columns");
            validateColumnRangeMetadata((HoodieColumnRangeMetadata) list.get(0), "test.parquet", "data", str3, str4, i, 1000);
            validateColumnRangeMetadata((HoodieColumnRangeMetadata) list.get(1), "test.parquet", "id", str, str2, 0L, 1000);
            validateColumnRangeMetadata((HoodieColumnRangeMetadata) list.get(2), "test.parquet", "partition", "path1", "path1", 0L, 1000);
        } catch (Throwable th3) {
            if (parquetWriter != null) {
                if (0 != 0) {
                    try {
                        parquetWriter.close();
                    } catch (Throwable th4) {
                        th.addSuppressed(th4);
                    }
                } else {
                    parquetWriter.close();
                }
            }
            throw th3;
        }
    }

    private Schema getSchema(String str, String str2, String str3) {
        ArrayList arrayList = new ArrayList();
        Schema createRecord = Schema.createRecord("HoodieRecord", "", "", false);
        Schema.Field field = new Schema.Field(str, AvroSchemaUtils.createNullableSchema(Schema.Type.STRING), "", JsonProperties.NULL_VALUE);
        Schema.Field field2 = new Schema.Field(str2, AvroSchemaUtils.createNullableSchema(Schema.Type.STRING), "", JsonProperties.NULL_VALUE);
        Schema.Field field3 = new Schema.Field(str3, AvroSchemaUtils.createNullableSchema(Schema.Type.STRING), "", JsonProperties.NULL_VALUE);
        arrayList.add(field);
        arrayList.add(field2);
        arrayList.add(field3);
        createRecord.setFields(arrayList);
        return createRecord;
    }

    private void validateColumnRangeMetadata(HoodieColumnRangeMetadata hoodieColumnRangeMetadata, String str, String str2, String str3, String str4, long j, long j2) {
        Assertions.assertEquals(str, hoodieColumnRangeMetadata.getFilePath(), "File path does not match");
        Assertions.assertEquals(str2, hoodieColumnRangeMetadata.getColumnName(), "Column name does not match");
        Assertions.assertEquals(str3, hoodieColumnRangeMetadata.getMinValue(), "Min value does not match");
        Assertions.assertEquals(str4, hoodieColumnRangeMetadata.getMaxValue(), "Max value does not match");
        Assertions.assertEquals(j, hoodieColumnRangeMetadata.getNullCount(), "Null count does not match");
        Assertions.assertEquals(j2, hoodieColumnRangeMetadata.getValueCount(), "Value count does not match");
    }

    private void writeParquetFile(String str, String str2, List<String> list) throws Exception {
        writeParquetFile(str, str2, list, HoodieAvroUtils.getRecordKeySchema(), false, "");
    }

    private void writeParquetFile(String str, String str2, List<String> list, Schema schema, boolean z, String str3) throws Exception {
        writeParquetFile(str, str2, list, schema, z, str3, true, null, null);
    }

    private void writeParquetFile(String str, String str2, List<String> list, Schema schema, boolean z, String str3, boolean z2, String str4, String str5) throws Exception {
        HoodieAvroWriteSupport hoodieAvroWriteSupport = new HoodieAvroWriteSupport(new AvroSchemaConverter().convert(schema), schema, Option.of(BloomFilterFactory.createBloomFilter(1000, 1.0E-4d, 10000, str)), new Properties());
        ParquetWriter parquetWriter = new ParquetWriter(new Path(str2), hoodieAvroWriteSupport, CompressionCodecName.GZIP, 125829120, 1048576);
        for (String str6 : list) {
            GenericData.Record record = new GenericData.Record(schema);
            record.put(z2 ? HoodieRecord.RECORD_KEY_METADATA_FIELD : str4, str6);
            if (z) {
                record.put(z2 ? HoodieRecord.PARTITION_PATH_METADATA_FIELD : str5, str3);
            }
            parquetWriter.write(record);
            hoodieAvroWriteSupport.add(str6);
        }
        parquetWriter.close();
    }

    private static Schema getSchemaWithFields(List<String> list) {
        ArrayList arrayList = new ArrayList();
        Schema createRecord = Schema.createRecord("HoodieRecordKey", "", "", false);
        Iterator<String> it = list.iterator();
        while (it.hasNext()) {
            arrayList.add(new Schema.Field(it.next(), HoodieAvroUtils.METADATA_FIELD_SCHEMA, "", JsonProperties.NULL_VALUE));
        }
        createRecord.setFields(arrayList);
        return createRecord;
    }
}
