package org.apache.iceberg.spark.source;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.iceberg.FileFormat;
import org.apache.iceberg.KryoHelpers;
import org.apache.iceberg.Schema;
import org.apache.iceberg.Table;
import org.apache.iceberg.TestHelpers;
import org.apache.iceberg.hadoop.HadoopTables;
import org.apache.iceberg.hive.HiveTableBaseTest;
import org.apache.iceberg.mapping.MappingUtil;
import org.apache.iceberg.mapping.NameMapping;
import org.apache.iceberg.mapping.NameMappingParser;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
import org.apache.iceberg.relocated.com.google.common.collect.Lists;
import org.apache.iceberg.relocated.com.google.common.collect.Sets;
import org.apache.iceberg.spark.SparkSchemaUtil;
import org.apache.iceberg.spark.SparkTableUtil;
import org.apache.iceberg.types.Types;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SQLContext;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.catalyst.TableIdentifier;
import org.assertj.core.api.Assertions;
import org.junit.After;
import org.junit.AfterClass;
import org.junit.Assert;
import org.junit.Assume;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Rule;
import org.junit.Test;
import org.junit.experimental.runners.Enclosed;
import org.junit.rules.TemporaryFolder;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;

@RunWith(Enclosed.class)
/* loaded from: input_file:org/apache/iceberg/spark/source/TestSparkTableUtil.class */
public class TestSparkTableUtil extends HiveTableBaseTest {
    private static final String TABLE_NAME = "hive_table";
    private static final String QUALIFIED_TABLE_NAME = String.format("%s.%s", "hivedb", TABLE_NAME);
    private static final Path TABLE_LOCATION_PATH = HiveTableBaseTest.getTableLocationPath(TABLE_NAME);
    private static final String TABLE_LOCATION_STR = TABLE_LOCATION_PATH.toString();
    private static SparkSession spark = null;

    /* loaded from: input_file:org/apache/iceberg/spark/source/TestSparkTableUtil$GetPartitions.class */
    public static class GetPartitions {

        @Rule
        public TemporaryFolder temp = new TemporaryFolder();
        private final FileFormat format = FileFormat.PARQUET;

        @Test
        public void testPartitionScan() throws Exception {
            TestSparkTableUtil.spark.createDataFrame(Lists.newArrayList(new ThreeColumnRecord[]{new ThreeColumnRecord(1, "ab", "data"), new ThreeColumnRecord(2, "b c", "data"), new ThreeColumnRecord(1, "b c", "data"), new ThreeColumnRecord(2, "ab", "data")}), ThreeColumnRecord.class).write().mode("overwrite").format(this.format.toString()).partitionBy(new String[]{"c1", "c2"}).saveAsTable("external_table");
            TableIdentifier parseTableIdentifier = TestSparkTableUtil.spark.sessionState().sqlParser().parseTableIdentifier("external_table");
            Map of = ImmutableMap.of("c1", "1", "c2", "ab");
            Map of2 = ImmutableMap.of("c1", "2", "c2", "b c");
            Map of3 = ImmutableMap.of("c1", "1", "c2", "b c");
            Map of4 = ImmutableMap.of("c1", "2", "c2", "ab");
            Assert.assertEquals("Wrong partitions fetched for c1=1", Sets.newHashSet(new Map[]{of, of3}), (Set) SparkTableUtil.getPartitions(TestSparkTableUtil.spark, parseTableIdentifier, ImmutableMap.of("c1", "1")).stream().map(sparkPartition -> {
                return sparkPartition.getValues();
            }).collect(Collectors.toSet()));
            Assert.assertEquals("Wrong partitions fetched for c1=2", Sets.newHashSet(new Map[]{of2, of4}), (Set) SparkTableUtil.getPartitions(TestSparkTableUtil.spark, parseTableIdentifier, ImmutableMap.of("c1", "2")).stream().map(sparkPartition2 -> {
                return sparkPartition2.getValues();
            }).collect(Collectors.toSet()));
            Assert.assertEquals("Wrong partitions fetched for c2=ab", Sets.newHashSet(new Map[]{of, of4}), (Set) SparkTableUtil.getPartitions(TestSparkTableUtil.spark, parseTableIdentifier, ImmutableMap.of("c2", "ab")).stream().map(sparkPartition3 -> {
                return sparkPartition3.getValues();
            }).collect(Collectors.toSet()));
            Assert.assertEquals("Wrong partitions fetched for c2=b c", Sets.newHashSet(new Map[]{of2, of3}), (Set) SparkTableUtil.getPartitions(TestSparkTableUtil.spark, parseTableIdentifier, ImmutableMap.of("c2", "b c")).stream().map(sparkPartition4 -> {
                return sparkPartition4.getValues();
            }).collect(Collectors.toSet()));
        }
    }

    /* loaded from: input_file:org/apache/iceberg/spark/source/TestSparkTableUtil$PartitionScan.class */
    public static class PartitionScan {
        @Before
        public void before() {
            TestSparkTableUtil.loadData(FileFormat.PARQUET);
        }

        @After
        public void after() throws IOException {
            TestSparkTableUtil.cleanupData();
        }

        @Test
        public void testPartitionScan() {
            Assert.assertEquals("There should be 3 partitions", 3L, SparkTableUtil.getPartitions(TestSparkTableUtil.spark, TestSparkTableUtil.QUALIFIED_TABLE_NAME).size());
            Assert.assertEquals("There should be 3 partitions", 3L, SparkTableUtil.partitionDF(TestSparkTableUtil.spark, TestSparkTableUtil.QUALIFIED_TABLE_NAME).count());
        }

        @Test
        public void testPartitionScanByFilter() {
            Assert.assertEquals("There should be 1 matching partition", 1L, SparkTableUtil.getPartitionsByFilter(TestSparkTableUtil.spark, TestSparkTableUtil.QUALIFIED_TABLE_NAME, "data = 'a'").size());
            Assert.assertEquals("There should be 1 matching partition", 1L, SparkTableUtil.partitionDFByFilter(TestSparkTableUtil.spark, TestSparkTableUtil.QUALIFIED_TABLE_NAME, "data = 'a'").count());
        }
    }

    @RunWith(Parameterized.class)
    /* loaded from: input_file:org/apache/iceberg/spark/source/TestSparkTableUtil$TableImport.class */
    public static class TableImport {
        private final FileFormat format;

        @Rule
        public TemporaryFolder temp = new TemporaryFolder();

        @Parameterized.Parameters(name = "format = {0}")
        public static Object[] parameters() {
            return new Object[]{"parquet", "orc"};
        }

        public TableImport(String str) {
            this.format = FileFormat.fromString(str);
        }

        @Before
        public void before() {
            TestSparkTableUtil.loadData(this.format);
        }

        @After
        public void after() throws IOException {
            TestSparkTableUtil.cleanupData();
        }

        @Test
        public void testImportPartitionedTable() throws Exception {
            File newFolder = this.temp.newFolder("partitioned_table");
            TestSparkTableUtil.spark.table(TestSparkTableUtil.QUALIFIED_TABLE_NAME).write().mode("overwrite").partitionBy(new String[]{"data"}).format(this.format.toString()).saveAsTable("test_partitioned_table");
            SparkTableUtil.importSparkTable(TestSparkTableUtil.spark, TestSparkTableUtil.spark.sessionState().sqlParser().parseTableIdentifier("test_partitioned_table"), new HadoopTables(TestSparkTableUtil.spark.sessionState().newHadoopConf()).create(SparkSchemaUtil.schemaForTable(TestSparkTableUtil.spark, TestSparkTableUtil.QUALIFIED_TABLE_NAME), SparkSchemaUtil.specForTable(TestSparkTableUtil.spark, TestSparkTableUtil.QUALIFIED_TABLE_NAME), ImmutableMap.of(), newFolder.getCanonicalPath()), this.temp.newFolder("staging-dir").toString());
            Assert.assertEquals("three values ", 3L, TestSparkTableUtil.spark.read().format("iceberg").load(newFolder.toString()).count());
        }

        @Test
        public void testImportUnpartitionedTable() throws Exception {
            File newFolder = this.temp.newFolder("unpartitioned_table");
            TestSparkTableUtil.spark.table(TestSparkTableUtil.QUALIFIED_TABLE_NAME).write().mode("overwrite").format(this.format.toString()).saveAsTable("test_unpartitioned_table");
            SparkTableUtil.importSparkTable(TestSparkTableUtil.spark, TestSparkTableUtil.spark.sessionState().sqlParser().parseTableIdentifier("test_unpartitioned_table"), new HadoopTables(TestSparkTableUtil.spark.sessionState().newHadoopConf()).create(SparkSchemaUtil.schemaForTable(TestSparkTableUtil.spark, TestSparkTableUtil.QUALIFIED_TABLE_NAME), SparkSchemaUtil.specForTable(TestSparkTableUtil.spark, TestSparkTableUtil.QUALIFIED_TABLE_NAME), ImmutableMap.of(), newFolder.getCanonicalPath()), this.temp.newFolder("staging-dir").toString());
            Assert.assertEquals("three values ", 3L, TestSparkTableUtil.spark.read().format("iceberg").load(newFolder.toString()).count());
        }

        @Test
        public void testImportAsHiveTable() throws Exception {
            TestSparkTableUtil.spark.table(TestSparkTableUtil.QUALIFIED_TABLE_NAME).write().mode("overwrite").format(this.format.toString()).saveAsTable("unpartitioned_table");
            TableIdentifier tableIdentifier = new TableIdentifier("unpartitioned_table");
            org.apache.iceberg.catalog.TableIdentifier of = org.apache.iceberg.catalog.TableIdentifier.of(new String[]{"hivedb", "test_unpartitioned_table_" + this.format});
            File newFolder = this.temp.newFolder("staging-dir");
            SparkTableUtil.importSparkTable(TestSparkTableUtil.spark, tableIdentifier, TestSparkTableUtil.catalog.createTable(of, SparkSchemaUtil.schemaForTable(TestSparkTableUtil.spark, "unpartitioned_table"), SparkSchemaUtil.specForTable(TestSparkTableUtil.spark, "unpartitioned_table")), newFolder.toString());
            Assert.assertEquals("three values ", 3L, TestSparkTableUtil.spark.read().format("iceberg").load(of.toString()).count());
            TestSparkTableUtil.spark.table(TestSparkTableUtil.QUALIFIED_TABLE_NAME).write().mode("overwrite").partitionBy(new String[]{"data"}).format(this.format.toString()).saveAsTable("partitioned_table");
            TableIdentifier tableIdentifier2 = new TableIdentifier("partitioned_table");
            org.apache.iceberg.catalog.TableIdentifier of2 = org.apache.iceberg.catalog.TableIdentifier.of(new String[]{"hivedb", "test_partitioned_table_" + this.format});
            SparkTableUtil.importSparkTable(TestSparkTableUtil.spark, tableIdentifier2, TestSparkTableUtil.catalog.createTable(of2, SparkSchemaUtil.schemaForTable(TestSparkTableUtil.spark, "partitioned_table"), SparkSchemaUtil.specForTable(TestSparkTableUtil.spark, "partitioned_table")), newFolder.toString());
            Assert.assertEquals("three values ", 3L, TestSparkTableUtil.spark.read().format("iceberg").load(of2.toString()).count());
        }

        @Test
        public void testImportWithNameMapping() throws Exception {
            TestSparkTableUtil.spark.table(TestSparkTableUtil.QUALIFIED_TABLE_NAME).write().mode("overwrite").format(this.format.toString()).saveAsTable("original_table");
            Schema schema = new Schema(new Types.NestedField[]{Types.NestedField.optional(1, "data", Types.StringType.get())});
            NameMapping create = MappingUtil.create(schema);
            String str = "target_table_" + this.format;
            TableIdentifier tableIdentifier = new TableIdentifier("original_table");
            org.apache.iceberg.catalog.TableIdentifier of = org.apache.iceberg.catalog.TableIdentifier.of(new String[]{"hivedb", str});
            Table createTable = TestSparkTableUtil.catalog.createTable(of, schema, SparkSchemaUtil.specForTable(TestSparkTableUtil.spark, "original_table"));
            createTable.updateProperties().set("schema.name-mapping.default", NameMappingParser.toJson(create)).commit();
            SparkTableUtil.importSparkTable(TestSparkTableUtil.spark, tableIdentifier, createTable, this.temp.newFolder("staging-dir").toString());
            Assert.assertEquals(Lists.newArrayList(new String[]{"b", "c"}), TestSparkTableUtil.spark.read().format("iceberg").load(of.toString()).select("data", new String[0]).sort("data", new String[0]).filter("data >= 'b'").as(Encoders.STRING()).collectAsList());
        }

        @Test
        public void testImportWithNameMappingForVectorizedParquetReader() throws Exception {
            Assume.assumeTrue("Applies only to parquet format.", FileFormat.PARQUET == this.format);
            TestSparkTableUtil.spark.table(TestSparkTableUtil.QUALIFIED_TABLE_NAME).write().mode("overwrite").format(this.format.toString()).saveAsTable("original_table");
            Schema schema = new Schema(new Types.NestedField[]{Types.NestedField.optional(1, "data", Types.StringType.get())});
            NameMapping create = MappingUtil.create(schema);
            TableIdentifier tableIdentifier = new TableIdentifier("original_table");
            Table createTable = TestSparkTableUtil.catalog.createTable(org.apache.iceberg.catalog.TableIdentifier.of(new String[]{"hivedb", "target_table_for_vectorization"}), schema, SparkSchemaUtil.specForTable(TestSparkTableUtil.spark, "original_table"));
            createTable.updateProperties().set("schema.name-mapping.default", NameMappingParser.toJson(create)).set("read.parquet.vectorization.enabled", "true").commit();
            SparkTableUtil.importSparkTable(TestSparkTableUtil.spark, tableIdentifier, createTable, this.temp.newFolder("staging-dir").toString());
            Assert.assertEquals(Lists.newArrayList(new String[]{"b", "c"}), TestSparkTableUtil.spark.read().format("iceberg").load("hivedb.target_table_for_vectorization").select("data", new String[0]).sort("data", new String[0]).filter("data >= 'b'").as(Encoders.STRING()).collectAsList());
        }

        @Test
        public void testImportPartitionedWithWhitespace() throws Exception {
            ArrayList newArrayList = Lists.newArrayList(new SimpleRecord[]{new SimpleRecord(1, "some key value")});
            File newFolder = this.temp.newFolder("partitioned_table");
            TestSparkTableUtil.spark.createDataFrame(newArrayList, SimpleRecord.class).withColumnRenamed("data", "dAtA sPaced").write().mode("overwrite").partitionBy(new String[]{"dAtA sPaced"}).format(this.format.toString()).saveAsTable("whitespacetable");
            SparkTableUtil.importSparkTable(TestSparkTableUtil.spark, TestSparkTableUtil.spark.sessionState().sqlParser().parseTableIdentifier("whitespacetable"), new HadoopTables(TestSparkTableUtil.spark.sessionState().newHadoopConf()).create(SparkSchemaUtil.schemaForTable(TestSparkTableUtil.spark, "whitespacetable"), SparkSchemaUtil.specForTable(TestSparkTableUtil.spark, "whitespacetable"), ImmutableMap.of(), newFolder.getCanonicalPath()), this.temp.newFolder("staging-dir").toString());
            Assert.assertEquals("Data should match", newArrayList, TestSparkTableUtil.spark.read().format("iceberg").load(newFolder.toString()).withColumnRenamed("dAtA sPaced", "data").as(Encoders.bean(SimpleRecord.class)).collectAsList());
        }

        @Test
        public void testImportUnpartitionedWithWhitespace() throws Exception {
            String str = "whitespacetable_" + this.format;
            ArrayList newArrayList = Lists.newArrayList(new SimpleRecord[]{new SimpleRecord(1, "some key value")});
            File newFolder = this.temp.newFolder("white space location");
            File newFolder2 = this.temp.newFolder("partitioned_table");
            TestSparkTableUtil.spark.createDataFrame(newArrayList, SimpleRecord.class).write().mode("overwrite").format(this.format.toString()).save(newFolder.getPath());
            TestSparkTableUtil.spark.catalog().createExternalTable(str, newFolder.getPath(), this.format.toString());
            SparkTableUtil.importSparkTable(TestSparkTableUtil.spark, TestSparkTableUtil.spark.sessionState().sqlParser().parseTableIdentifier(str), new HadoopTables(TestSparkTableUtil.spark.sessionState().newHadoopConf()).create(SparkSchemaUtil.schemaForTable(TestSparkTableUtil.spark, str), SparkSchemaUtil.specForTable(TestSparkTableUtil.spark, str), ImmutableMap.of(), newFolder2.getCanonicalPath()), this.temp.newFolder("staging-dir").toString());
            Assert.assertEquals("Data should match", newArrayList, TestSparkTableUtil.spark.read().format("iceberg").load(newFolder2.toString()).as(Encoders.bean(SimpleRecord.class)).collectAsList());
        }

        @Test
        public void testSparkPartitionKryoSerialization() throws IOException {
            SparkTableUtil.SparkPartition sparkPartition = new SparkTableUtil.SparkPartition(ImmutableMap.of("id", "2"), "s3://bucket/table/data/id=2", this.format.toString());
            Assertions.assertThat(sparkPartition).isEqualTo((SparkTableUtil.SparkPartition) KryoHelpers.roundTripSerialize(sparkPartition));
        }

        @Test
        public void testSparkPartitionJavaSerialization() throws IOException, ClassNotFoundException {
            SparkTableUtil.SparkPartition sparkPartition = new SparkTableUtil.SparkPartition(ImmutableMap.of("id", "2"), "s3://bucket/table/data/id=2", this.format.toString());
            Assertions.assertThat(sparkPartition).isEqualTo((SparkTableUtil.SparkPartition) TestHelpers.roundTripSerialize(sparkPartition));
        }
    }

    @BeforeClass
    public static void startSpark() {
        spark = SparkSession.builder().master("local[2]").enableHiveSupport().config("spark.hadoop.hive.metastore.uris", HiveTableBaseTest.hiveConf.get(HiveConf.ConfVars.METASTOREURIS.varname)).config("hive.exec.dynamic.partition", "true").config("hive.exec.dynamic.partition.mode", "nonstrict").config("spark.sql.legacy.allowCreatingManagedTableUsingNonemptyLocation", "true").getOrCreate();
    }

    @AfterClass
    public static void stopSpark() {
        SparkSession sparkSession = spark;
        spark = null;
        sparkSession.stop();
    }

    static void loadData(FileFormat fileFormat) {
        new SQLContext(spark).sql(String.format("CREATE TABLE %s (\n    id int COMMENT 'unique id'\n)\nPARTITIONED BY (data string)\nSTORED AS %s\nLOCATION '%s'", QUALIFIED_TABLE_NAME, fileFormat, TABLE_LOCATION_STR));
        spark.createDataFrame(Lists.newArrayList(new SimpleRecord[]{new SimpleRecord(1, "a"), new SimpleRecord(2, "b"), new SimpleRecord(3, "c")}), SimpleRecord.class).select("id", new String[]{"data"}).orderBy("data", new String[0]).write().mode("append").insertInto(QUALIFIED_TABLE_NAME);
    }

    static void cleanupData() throws IOException {
        new SQLContext(spark).sql(String.format("DROP TABLE IF EXISTS %s", QUALIFIED_TABLE_NAME));
        TABLE_LOCATION_PATH.getFileSystem(HiveTableBaseTest.hiveConf).delete(TABLE_LOCATION_PATH, true);
    }
}
