package org.apache.iceberg.spark.actions;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.iceberg.BlobMetadata;
import org.apache.iceberg.Files;
import org.apache.iceberg.PartitionSpec;
import org.apache.iceberg.Schema;
import org.apache.iceberg.StatisticsFile;
import org.apache.iceberg.Table;
import org.apache.iceberg.actions.ComputeTableStats;
import org.apache.iceberg.data.FileHelpers;
import org.apache.iceberg.data.GenericRecord;
import org.apache.iceberg.data.Record;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
import org.apache.iceberg.relocated.com.google.common.collect.Lists;
import org.apache.iceberg.spark.CatalogTestBase;
import org.apache.iceberg.spark.Spark3Util;
import org.apache.iceberg.spark.SparkSchemaUtil;
import org.apache.iceberg.spark.data.RandomData;
import org.apache.iceberg.spark.source.SimpleRecord;
import org.apache.iceberg.types.Types;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.catalyst.analysis.NoSuchTableException;
import org.apache.spark.sql.catalyst.parser.ParseException;
import org.assertj.core.api.Assertions;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.TestTemplate;

/* loaded from: input_file:org/apache/iceberg/spark/actions/TestComputeTableStatsAction.class */
public class TestComputeTableStatsAction extends CatalogTestBase {
    private static final Types.StructType LEAF_STRUCT_TYPE = Types.StructType.of(new Types.NestedField[]{Types.NestedField.optional(1, "leafLongCol", Types.LongType.get()), Types.NestedField.optional(2, "leafDoubleCol", Types.DoubleType.get())});
    private static final Types.StructType NESTED_STRUCT_TYPE = Types.StructType.of(new Types.NestedField[]{Types.NestedField.required(3, "leafStructCol", LEAF_STRUCT_TYPE)});
    private static final Schema NESTED_SCHEMA = new Schema(new Types.NestedField[]{Types.NestedField.required(4, "nestedStructCol", NESTED_STRUCT_TYPE)});
    private static final Schema SCHEMA_WITH_NESTED_COLUMN = new Schema(new Types.NestedField[]{Types.NestedField.required(4, "nestedStructCol", NESTED_STRUCT_TYPE), Types.NestedField.required(5, "stringCol", Types.StringType.get())});

    @TestTemplate
    public void testLoadingTableDirectly() {
        sql("CREATE TABLE %s (id int, data string) USING iceberg", this.tableName);
        sql("INSERT into %s values(1, 'abcd')", this.tableName);
        StatisticsFile statisticsFile = ((ComputeTableStats.Result) SparkActions.get().computeTableStats(this.validationCatalog.loadTable(this.tableIdent)).execute()).statisticsFile();
        Assertions.assertThat(statisticsFile.fileSizeInBytes()).isGreaterThan(0L);
        Assertions.assertThat(statisticsFile.blobMetadata()).hasSize(2);
    }

    @TestTemplate
    public void testComputeTableStatsAction() throws NoSuchTableException, ParseException {
        sql("CREATE TABLE %s (id int, data string) USING iceberg", this.tableName);
        Table loadIcebergTable = Spark3Util.loadIcebergTable(spark, this.tableName);
        loadIcebergTable.updateProperties().set("read.split.target-size", "100").set("write.parquet.row-group-size-bytes", "100").commit();
        spark.createDataset(Lists.newArrayList(new SimpleRecord[]{new SimpleRecord(1, "a"), new SimpleRecord(1, "a"), new SimpleRecord(2, "b"), new SimpleRecord(3, "c"), new SimpleRecord(4, "d")}), Encoders.bean(SimpleRecord.class)).writeTo(this.tableName).append();
        SparkActions sparkActions = SparkActions.get();
        loadIcebergTable.refresh();
        Assertions.assertThat((ComputeTableStats.Result) sparkActions.computeTableStats(loadIcebergTable).columns(new String[]{"id", "data"}).execute()).isNotNull();
        List statisticsFiles = loadIcebergTable.statisticsFiles();
        Assertions.assertThat(statisticsFiles).hasSize(1);
        StatisticsFile statisticsFile = (StatisticsFile) statisticsFiles.get(0);
        Assertions.assertThat(statisticsFile.fileSizeInBytes()).isGreaterThan(0L);
        Assertions.assertThat(statisticsFile.blobMetadata()).hasSize(2);
        Assertions.assertThat(((BlobMetadata) statisticsFile.blobMetadata().get(0)).properties()).containsEntry("ndv", "4");
    }

    @TestTemplate
    public void testComputeTableStatsActionWithoutExplicitColumns() throws NoSuchTableException, ParseException {
        sql("CREATE TABLE %s (id int, data string) USING iceberg", this.tableName);
        spark.createDataset(Lists.newArrayList(new SimpleRecord[]{new SimpleRecord(1, "a"), new SimpleRecord(2, "b"), new SimpleRecord(3, "c"), new SimpleRecord(4, "d")}), Encoders.bean(SimpleRecord.class)).coalesce(1).writeTo(this.tableName).append();
        Table loadIcebergTable = Spark3Util.loadIcebergTable(spark, this.tableName);
        Assertions.assertThat((ComputeTableStats.Result) SparkActions.get().computeTableStats(loadIcebergTable).execute()).isNotNull();
        Assertions.assertThat(loadIcebergTable.statisticsFiles()).hasSize(1);
        StatisticsFile statisticsFile = (StatisticsFile) loadIcebergTable.statisticsFiles().get(0);
        Assertions.assertThat(statisticsFile.fileSizeInBytes()).isGreaterThan(0L);
        Assertions.assertThat(statisticsFile.blobMetadata()).hasSize(2);
        Assertions.assertThat(((BlobMetadata) statisticsFile.blobMetadata().get(0)).properties()).containsEntry("ndv", "4");
        Assertions.assertThat(((BlobMetadata) statisticsFile.blobMetadata().get(1)).properties()).containsEntry("ndv", "4");
    }

    @TestTemplate
    public void testComputeTableStatsForInvalidColumns() throws NoSuchTableException, ParseException {
        sql("CREATE TABLE %s (id int, data string) USING iceberg", this.tableName);
        sql("INSERT into %s values(1, 'abcd')", this.tableName);
        Table loadIcebergTable = Spark3Util.loadIcebergTable(spark, this.tableName);
        SparkActions sparkActions = SparkActions.get();
        Assertions.assertThatThrownBy(() -> {
            sparkActions.computeTableStats(loadIcebergTable).columns(new String[]{"id1"}).execute();
        }).isInstanceOf(IllegalArgumentException.class).hasMessageStartingWith("Can't find column id1 in table");
    }

    @TestTemplate
    public void testComputeTableStatsWithNoSnapshots() throws NoSuchTableException, ParseException {
        sql("CREATE TABLE %s (id int, data string) USING iceberg", this.tableName);
        Assertions.assertThat(((ComputeTableStats.Result) SparkActions.get().computeTableStats(Spark3Util.loadIcebergTable(spark, this.tableName)).columns(new String[]{"id"}).execute()).statisticsFile()).isNull();
    }

    @TestTemplate
    public void testComputeTableStatsWithNullValues() throws NoSuchTableException, ParseException {
        sql("CREATE TABLE %s (id int, data string) USING iceberg", this.tableName);
        spark.createDataset(Lists.newArrayList(new SimpleRecord[]{new SimpleRecord(1, null), new SimpleRecord(1, "a"), new SimpleRecord(2, "b"), new SimpleRecord(3, "c"), new SimpleRecord(4, "d")}), Encoders.bean(SimpleRecord.class)).coalesce(1).writeTo(this.tableName).append();
        Table loadIcebergTable = Spark3Util.loadIcebergTable(spark, this.tableName);
        Assertions.assertThat((ComputeTableStats.Result) SparkActions.get().computeTableStats(loadIcebergTable).columns(new String[]{"data"}).execute()).isNotNull();
        List statisticsFiles = loadIcebergTable.statisticsFiles();
        Assertions.assertThat(statisticsFiles).hasSize(1);
        StatisticsFile statisticsFile = (StatisticsFile) statisticsFiles.get(0);
        Assertions.assertThat(statisticsFile.fileSizeInBytes()).isGreaterThan(0L);
        Assertions.assertThat(statisticsFile.blobMetadata()).hasSize(1);
        Assertions.assertThat(((BlobMetadata) statisticsFile.blobMetadata().get(0)).properties()).containsEntry("ndv", "4");
    }

    @TestTemplate
    public void testComputeTableStatsWithSnapshotHavingDifferentSchemas() throws NoSuchTableException, ParseException {
        SparkActions sparkActions = SparkActions.get();
        sql("CREATE TABLE %s (id int, data string) USING iceberg", this.tableName);
        sql("INSERT into %s values(1, 'abcd')", this.tableName);
        long snapshotId = Spark3Util.loadIcebergTable(spark, this.tableName).currentSnapshot().snapshotId();
        Table loadIcebergTable = Spark3Util.loadIcebergTable(spark, this.tableName);
        Assertions.assertThatNoException().isThrownBy(() -> {
            sparkActions.computeTableStats(loadIcebergTable).columns(new String[]{"data"}).execute();
        });
        sql("ALTER TABLE %s DROP COLUMN %s", this.tableName, "data");
        sql("INSERT into %s values(1)", this.tableName);
        loadIcebergTable.refresh();
        long snapshotId2 = Spark3Util.loadIcebergTable(spark, this.tableName).currentSnapshot().snapshotId();
        Assertions.assertThatNoException().isThrownBy(() -> {
            sparkActions.computeTableStats(loadIcebergTable).snapshot(snapshotId).columns(new String[]{"data"}).execute();
        });
        Assertions.assertThatThrownBy(() -> {
            sparkActions.computeTableStats(loadIcebergTable).snapshot(snapshotId2).columns(new String[]{"data"}).execute();
        }).isInstanceOf(IllegalArgumentException.class).hasMessageStartingWith("Can't find column data in table");
    }

    @TestTemplate
    public void testComputeTableStatsWhenSnapshotIdNotSpecified() throws NoSuchTableException, ParseException {
        sql("CREATE TABLE %s (id int, data string) USING iceberg", this.tableName);
        sql("INSERT into %s values(1, 'abcd')", this.tableName);
        Table loadIcebergTable = Spark3Util.loadIcebergTable(spark, this.tableName);
        Assertions.assertThat((ComputeTableStats.Result) SparkActions.get().computeTableStats(loadIcebergTable).columns(new String[]{"data"}).execute()).isNotNull();
        List statisticsFiles = loadIcebergTable.statisticsFiles();
        Assertions.assertThat(statisticsFiles).hasSize(1);
        StatisticsFile statisticsFile = (StatisticsFile) statisticsFiles.get(0);
        Assertions.assertThat(statisticsFile.fileSizeInBytes()).isGreaterThan(0L);
        Assertions.assertThat(statisticsFile.blobMetadata()).hasSize(1);
        Assertions.assertThat(((BlobMetadata) statisticsFile.blobMetadata().get(0)).properties()).containsEntry("ndv", "1");
    }

    @TestTemplate
    public void testComputeTableStatsWithNestedSchema() throws NoSuchTableException, ParseException, IOException {
        ArrayList newArrayList = Lists.newArrayList(new Record[]{createNestedRecord()});
        Table createTable = this.validationCatalog.createTable(this.tableIdent, SCHEMA_WITH_NESTED_COLUMN, PartitionSpec.unpartitioned(), ImmutableMap.of());
        createTable.newAppend().appendFile(FileHelpers.writeDataFile(createTable, Files.localOutput(this.temp.toFile()), newArrayList)).commit();
        Table loadIcebergTable = Spark3Util.loadIcebergTable(spark, this.tableName);
        SparkActions.get().computeTableStats(loadIcebergTable).execute();
        loadIcebergTable.refresh();
        List statisticsFiles = loadIcebergTable.statisticsFiles();
        Assertions.assertThat(statisticsFiles).hasSize(1);
        StatisticsFile statisticsFile = (StatisticsFile) statisticsFiles.get(0);
        Assertions.assertThat(statisticsFile.fileSizeInBytes()).isGreaterThan(0L);
        Assertions.assertThat(statisticsFile.blobMetadata()).hasSize(1);
    }

    @TestTemplate
    public void testComputeTableStatsWithNoComputableColumns() throws IOException {
        ArrayList newArrayList = Lists.newArrayList(new Record[]{createNestedRecord()});
        Table createTable = this.validationCatalog.createTable(this.tableIdent, NESTED_SCHEMA, PartitionSpec.unpartitioned(), ImmutableMap.of());
        createTable.newAppend().appendFile(FileHelpers.writeDataFile(createTable, Files.localOutput(this.temp.toFile()), newArrayList)).commit();
        createTable.refresh();
        SparkActions sparkActions = SparkActions.get();
        Assertions.assertThatThrownBy(() -> {
            sparkActions.computeTableStats(createTable).execute();
        }).isInstanceOf(IllegalArgumentException.class).hasMessage("No columns found to compute stats");
    }

    @TestTemplate
    public void testComputeTableStatsOnByteColumn() throws NoSuchTableException, ParseException {
        testComputeTableStats("byte_col", "TINYINT");
    }

    @TestTemplate
    public void testComputeTableStatsOnShortColumn() throws NoSuchTableException, ParseException {
        testComputeTableStats("short_col", "SMALLINT");
    }

    @TestTemplate
    public void testComputeTableStatsOnIntColumn() throws NoSuchTableException, ParseException {
        testComputeTableStats("int_col", "INT");
    }

    @TestTemplate
    public void testComputeTableStatsOnLongColumn() throws NoSuchTableException, ParseException {
        testComputeTableStats("long_col", "BIGINT");
    }

    @TestTemplate
    public void testComputeTableStatsOnTimestampColumn() throws NoSuchTableException, ParseException {
        testComputeTableStats("timestamp_col", "TIMESTAMP");
    }

    @TestTemplate
    public void testComputeTableStatsOnTimestampNtzColumn() throws NoSuchTableException, ParseException {
        testComputeTableStats("timestamp_col", "TIMESTAMP_NTZ");
    }

    @TestTemplate
    public void testComputeTableStatsOnDateColumn() throws NoSuchTableException, ParseException {
        testComputeTableStats("date_col", "DATE");
    }

    @TestTemplate
    public void testComputeTableStatsOnDecimalColumn() throws NoSuchTableException, ParseException {
        testComputeTableStats("decimal_col", "DECIMAL(20, 2)");
    }

    @TestTemplate
    public void testComputeTableStatsOnBinaryColumn() throws NoSuchTableException, ParseException {
        testComputeTableStats("binary_col", "BINARY");
    }

    public void testComputeTableStats(String str, String str2) throws NoSuchTableException, ParseException {
        sql("CREATE TABLE %s (id int, %s %s) USING iceberg", this.tableName, str, str2);
        Table loadIcebergTable = Spark3Util.loadIcebergTable(spark, this.tableName);
        append(this.tableName, randomDataDF(loadIcebergTable.schema()));
        SparkActions sparkActions = SparkActions.get();
        loadIcebergTable.refresh();
        Assertions.assertThat((ComputeTableStats.Result) sparkActions.computeTableStats(loadIcebergTable).columns(new String[]{str}).execute()).isNotNull();
        List statisticsFiles = loadIcebergTable.statisticsFiles();
        Assertions.assertThat(statisticsFiles).hasSize(1);
        StatisticsFile statisticsFile = (StatisticsFile) statisticsFiles.get(0);
        Assertions.assertThat(statisticsFile.fileSizeInBytes()).isGreaterThan(0L);
        Assertions.assertThat(statisticsFile.blobMetadata()).hasSize(1);
        Assertions.assertThat(((BlobMetadata) statisticsFile.blobMetadata().get(0)).properties()).containsKey("ndv");
    }

    private GenericRecord createNestedRecord() {
        GenericRecord create = GenericRecord.create(SCHEMA_WITH_NESTED_COLUMN);
        GenericRecord create2 = GenericRecord.create(NESTED_STRUCT_TYPE);
        GenericRecord create3 = GenericRecord.create(LEAF_STRUCT_TYPE);
        create3.set(0, 0L);
        create3.set(1, Double.valueOf(0.0d));
        create2.set(0, create3);
        create.set(0, create2);
        create.set(1, "data");
        return create;
    }

    private Dataset<Row> randomDataDF(Schema schema) {
        JavaRDD parallelize = sparkContext.parallelize(Lists.newArrayList(RandomData.generateSpark(schema, 10, 0L)));
        return spark.internalCreateDataFrame(JavaRDD.toRDD(parallelize), SparkSchemaUtil.convert(schema), false);
    }

    private void append(String str, Dataset<Row> dataset) throws NoSuchTableException {
        dataset.coalesce(1).writeTo(str).option("fanout-enabled", "true").append();
    }

    @AfterEach
    public void removeTable() {
        sql("DROP TABLE IF EXISTS %s", this.tableName);
    }
}
