package org.apache.hudi.common.util;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.UUID;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericData;
import org.apache.hadoop.fs.Path;
import org.apache.hudi.avro.HoodieAvroWriteSupport;
import org.apache.hudi.common.HoodieCommonTestHarness;
import org.apache.hudi.common.bloom.filter.BloomFilter;
import org.apache.hudi.common.bloom.filter.BloomFilterFactory;
import org.apache.hudi.common.bloom.filter.BloomFilterTypeCode;
import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.model.HoodieTestUtils;
import org.apache.parquet.avro.AvroSchemaConverter;
import org.apache.parquet.hadoop.ParquetWriter;
import org.apache.parquet.hadoop.metadata.CompressionCodecName;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;

@RunWith(Parameterized.class)
/* loaded from: input_file:org/apache/hudi/common/util/TestParquetUtils.class */
public class TestParquetUtils extends HoodieCommonTestHarness {
    String bloomFilterTypeToTest;

    @Parameterized.Parameters
    public static Collection<Object[]> data() {
        return Arrays.asList(new Object[]{BloomFilterTypeCode.SIMPLE.name()}, new Object[]{BloomFilterTypeCode.DYNAMIC_V0.name()});
    }

    public TestParquetUtils(String str) {
        this.bloomFilterTypeToTest = str;
    }

    @Before
    public void setup() {
        initPath();
    }

    @Test
    public void testHoodieWriteSupport() throws Exception {
        ArrayList arrayList = new ArrayList();
        for (int i = 0; i < 1000; i++) {
            arrayList.add(UUID.randomUUID().toString());
        }
        String str = this.basePath + "/test.parquet";
        writeParquetFile(str, arrayList);
        ArrayList arrayList2 = new ArrayList(ParquetUtils.readRowKeysFromParquet(HoodieTestUtils.getDefaultHadoopConf(), new Path(str)));
        Collections.sort(arrayList2);
        Collections.sort(arrayList);
        Assert.assertEquals("Did not read back the expected list of keys", arrayList, arrayList2);
        BloomFilter readBloomFilterFromParquetMetadata = ParquetUtils.readBloomFilterFromParquetMetadata(HoodieTestUtils.getDefaultHadoopConf(), new Path(str));
        Iterator<String> it = arrayList.iterator();
        while (it.hasNext()) {
            Assert.assertTrue("key should be found in bloom filter", readBloomFilterFromParquetMetadata.mightContain(it.next()));
        }
    }

    @Test
    public void testFilterParquetRowKeys() throws Exception {
        ArrayList arrayList = new ArrayList();
        HashSet hashSet = new HashSet();
        for (int i = 0; i < 1000; i++) {
            String uuid = UUID.randomUUID().toString();
            arrayList.add(uuid);
            if (i % 100 == 0) {
                hashSet.add(uuid);
            }
        }
        String str = this.basePath + "/test.parquet";
        writeParquetFile(str, arrayList);
        Set filterParquetRowKeys = ParquetUtils.filterParquetRowKeys(HoodieTestUtils.getDefaultHadoopConf(), new Path(str), hashSet);
        Assert.assertEquals("Filtered count does not match", hashSet.size(), filterParquetRowKeys.size());
        Iterator it = filterParquetRowKeys.iterator();
        while (it.hasNext()) {
            Assert.assertTrue("filtered key must be in the given filter", hashSet.contains((String) it.next()));
        }
    }

    private void writeParquetFile(String str, List<String> list) throws Exception {
        Schema recordKeySchema = HoodieAvroUtils.getRecordKeySchema();
        BloomFilter createBloomFilter = BloomFilterFactory.createBloomFilter(1000, 1.0E-4d, 10000, this.bloomFilterTypeToTest);
        ParquetWriter parquetWriter = new ParquetWriter(new Path(str), new HoodieAvroWriteSupport(new AvroSchemaConverter().convert(recordKeySchema), recordKeySchema, createBloomFilter), CompressionCodecName.GZIP, 125829120, 1048576);
        for (String str2 : list) {
            GenericData.Record record = new GenericData.Record(recordKeySchema);
            record.put(HoodieRecord.RECORD_KEY_METADATA_FIELD, str2);
            parquetWriter.write(record);
            createBloomFilter.add(str2);
        }
        parquetWriter.close();
    }
}
