package org.apache.hudi.utilities.sources.helpers;

import java.io.IOException;
import java.util.stream.Stream;
import org.apache.avro.Schema;
import org.apache.avro.SchemaParseException;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.Path;
import org.apache.hudi.common.util.FileIOUtils;
import org.apache.hudi.exception.HoodieIOException;
import org.apache.hudi.hadoop.fs.HadoopFSUtils;
import org.apache.hudi.testutils.HoodieClientTestUtils;
import org.apache.hudi.utilities.testutils.SanitizationTestUtils;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.types.StructType;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;

/* loaded from: input_file:org/apache/hudi/utilities/sources/helpers/TestSanitizationUtils.class */
public class TestSanitizationUtils {
    protected static SparkSession spark;
    protected static JavaSparkContext jsc;

    @BeforeAll
    public static void start() {
        spark = SparkSession.builder().config(HoodieClientTestUtils.getSparkConfForTest(TestSanitizationUtils.class.getName())).getOrCreate();
        jsc = JavaSparkContext.fromSparkContext(spark.sparkContext());
    }

    @AfterAll
    public static void shutdown() {
        jsc.close();
        spark.close();
    }

    @MethodSource({"provideDataFiles"})
    @ParameterizedTest
    public void testSanitizeDataset(String str, String str2, StructType structType, StructType structType2) {
        Dataset load = spark.read().schema(structType2).format("json").load(str2);
        Dataset load2 = spark.read().schema(structType).format("json").load(str);
        Dataset sanitizeColumnNamesForAvro = SanitizationUtils.sanitizeColumnNamesForAvro(load2, SanitizationTestUtils.invalidCharMask);
        Assertions.assertEquals(load2.count(), sanitizeColumnNamesForAvro.count());
        Assertions.assertEquals(load.schema(), sanitizeColumnNamesForAvro.schema());
        Assertions.assertEquals(load.collectAsList(), sanitizeColumnNamesForAvro.collectAsList());
    }

    private void testSanitizeSchema(String str, Schema schema) {
        testSanitizeSchema(str, schema, true);
    }

    private void testSanitizeSchema(String str, Schema schema, boolean z) {
        testSanitizeSchema(str, schema, z, SanitizationTestUtils.invalidCharMask);
    }

    private void testSanitizeSchema(String str, Schema schema, boolean z, String str2) {
        Assertions.assertEquals(SanitizationUtils.parseAvroSchema(str, z, str2), schema);
    }

    @Test
    public void testGoodAvroSchema() {
        testSanitizeSchema(getJson("src/test/resources/streamer-config/file_schema_provider_valid.avsc"), SanitizationTestUtils.generateProperFormattedSchema());
    }

    @Test
    public void testBadAvroSchema() {
        testSanitizeSchema(getJson("src/test/resources/streamer-config/file_schema_provider_invalid.avsc"), SanitizationTestUtils.generateRenamedSchemaWithDefaultReplacement());
    }

    @Test
    public void testBadAvroSchemaAltCharMask() {
        testSanitizeSchema(getJson("src/test/resources/streamer-config/file_schema_provider_invalid.avsc"), SanitizationTestUtils.generateRenamedSchemaWithConfiguredReplacement(), true, "_");
    }

    @Test
    public void testBadAvroSchemaDisabledTest() {
        String json = getJson("src/test/resources/streamer-config/file_schema_provider_invalid.avsc");
        Assertions.assertThrows(SchemaParseException.class, () -> {
            testSanitizeSchema(json, SanitizationTestUtils.generateRenamedSchemaWithDefaultReplacement(), false);
        });
    }

    @Test
    private String getJson(String str) {
        try {
            FSDataInputStream open = HadoopFSUtils.getFs(str, jsc.hadoopConfiguration(), true).open(new Path(str));
            Throwable th = null;
            try {
                String readAsUTFString = FileIOUtils.readAsUTFString(open);
                if (open != null) {
                    if (0 != 0) {
                        try {
                            open.close();
                        } catch (Throwable th2) {
                            th.addSuppressed(th2);
                        }
                    } else {
                        open.close();
                    }
                }
                return readAsUTFString;
            } finally {
            }
        } catch (IOException e) {
            throw new HoodieIOException("can't read schema file", e);
        }
    }

    private static Stream<Arguments> provideDataFiles() {
        return SanitizationTestUtils.provideDataFiles();
    }
}
