package org.apache.iceberg.spark.actions;

import java.util.List;
import org.apache.iceberg.MockFileScanTask;
import org.apache.iceberg.PartitionSpec;
import org.apache.iceberg.Schema;
import org.apache.iceberg.SortOrder;
import org.apache.iceberg.Table;
import org.apache.iceberg.actions.SizeBasedDataRewriter;
import org.apache.iceberg.actions.SizeBasedFileRewriter;
import org.apache.iceberg.catalog.TableIdentifier;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet;
import org.apache.iceberg.relocated.com.google.common.collect.Iterables;
import org.apache.iceberg.spark.TestBase;
import org.apache.iceberg.types.Types;
import org.assertj.core.api.Assertions;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.Test;

/* loaded from: input_file:org/apache/iceberg/spark/actions/TestSparkFileRewriter.class */
public class TestSparkFileRewriter extends TestBase {
    private static final TableIdentifier TABLE_IDENT = TableIdentifier.of(new String[]{"default", "tbl"});
    private static final Schema SCHEMA = new Schema(new Types.NestedField[]{Types.NestedField.required(1, "id", Types.IntegerType.get()), Types.NestedField.required(2, "dep", Types.StringType.get())});
    private static final PartitionSpec SPEC = PartitionSpec.builderFor(SCHEMA).identity("dep").build();
    private static final SortOrder SORT_ORDER = ((SortOrder.Builder) SortOrder.builderFor(SCHEMA).asc("id")).build();

    @AfterEach
    public void removeTable() {
        catalog.dropTable(TABLE_IDENT);
    }

    @Test
    public void testBinPackDataSelectFiles() {
        SparkBinPackDataRewriter sparkBinPackDataRewriter = new SparkBinPackDataRewriter(spark, catalog.createTable(TABLE_IDENT, SCHEMA));
        checkDataFileSizeFiltering(sparkBinPackDataRewriter);
        checkDataFilesDeleteThreshold(sparkBinPackDataRewriter);
        checkDataFileGroupWithEnoughFiles(sparkBinPackDataRewriter);
        checkDataFileGroupWithEnoughData(sparkBinPackDataRewriter);
        checkDataFileGroupWithTooMuchData(sparkBinPackDataRewriter);
        checkDataFilesWithHighFileScopedDeleteRatio(sparkBinPackDataRewriter);
    }

    @Test
    public void testSortDataSelectFiles() {
        SparkSortDataRewriter sparkSortDataRewriter = new SparkSortDataRewriter(spark, catalog.createTable(TABLE_IDENT, SCHEMA), SORT_ORDER);
        checkDataFileSizeFiltering(sparkSortDataRewriter);
        checkDataFilesDeleteThreshold(sparkSortDataRewriter);
        checkDataFileGroupWithEnoughFiles(sparkSortDataRewriter);
        checkDataFileGroupWithEnoughData(sparkSortDataRewriter);
        checkDataFileGroupWithTooMuchData(sparkSortDataRewriter);
        checkDataFilesWithHighFileScopedDeleteRatio(sparkSortDataRewriter);
    }

    @Test
    public void testZOrderDataSelectFiles() {
        SparkZOrderDataRewriter sparkZOrderDataRewriter = new SparkZOrderDataRewriter(spark, catalog.createTable(TABLE_IDENT, SCHEMA), ImmutableList.of("id"));
        checkDataFileSizeFiltering(sparkZOrderDataRewriter);
        checkDataFilesDeleteThreshold(sparkZOrderDataRewriter);
        checkDataFileGroupWithEnoughFiles(sparkZOrderDataRewriter);
        checkDataFileGroupWithEnoughData(sparkZOrderDataRewriter);
        checkDataFileGroupWithTooMuchData(sparkZOrderDataRewriter);
        checkDataFilesWithHighFileScopedDeleteRatio(sparkZOrderDataRewriter);
    }

    private void checkDataFileSizeFiltering(SizeBasedDataRewriter sizeBasedDataRewriter) {
        ImmutableList of = ImmutableList.of(new MockFileScanTask(100L), new MockFileScanTask(450L), new MockFileScanTask(1000L));
        sizeBasedDataRewriter.init(ImmutableMap.of("min-file-size-bytes", "250", "target-file-size-bytes", "500", "max-file-size-bytes", "750", "delete-file-threshold", String.valueOf(Integer.MAX_VALUE)));
        Iterable planFileGroups = sizeBasedDataRewriter.planFileGroups(of);
        Assertions.assertThat(planFileGroups).as("Must have 1 group", new Object[0]).hasSize(1);
        Assertions.assertThat((List) Iterables.getOnlyElement(planFileGroups)).as("Must rewrite 2 files", new Object[0]).hasSize(2);
    }

    private void checkDataFilesDeleteThreshold(SizeBasedDataRewriter sizeBasedDataRewriter) {
        ImmutableList of = ImmutableList.of(MockFileScanTask.mockTaskWithDeletes(1000L, 3), MockFileScanTask.mockTaskWithDeletes(1000L, 1));
        sizeBasedDataRewriter.init(ImmutableMap.of("min-file-size-bytes", "1", "target-file-size-bytes", "2000", "max-file-size-bytes", "5000", "delete-file-threshold", "2"));
        Iterable planFileGroups = sizeBasedDataRewriter.planFileGroups(of);
        Assertions.assertThat(planFileGroups).as("Must have 1 group", new Object[0]).hasSize(1);
        Assertions.assertThat((List) Iterables.getOnlyElement(planFileGroups)).as("Must rewrite 1 file", new Object[0]).hasSize(1);
    }

    private void checkDataFileGroupWithEnoughFiles(SizeBasedDataRewriter sizeBasedDataRewriter) {
        ImmutableList of = ImmutableList.of(new MockFileScanTask(100L), new MockFileScanTask(100L), new MockFileScanTask(100L), new MockFileScanTask(100L));
        sizeBasedDataRewriter.init(ImmutableMap.of("min-input-files", "3", "min-file-size-bytes", "150", "target-file-size-bytes", "1000", "max-file-size-bytes", "5000", "delete-file-threshold", String.valueOf(Integer.MAX_VALUE)));
        Iterable planFileGroups = sizeBasedDataRewriter.planFileGroups(of);
        Assertions.assertThat(planFileGroups).as("Must have 1 group", new Object[0]).hasSize(1);
        Assertions.assertThat((List) Iterables.getOnlyElement(planFileGroups)).as("Must rewrite 4 files", new Object[0]).hasSize(4);
    }

    private void checkDataFileGroupWithEnoughData(SizeBasedDataRewriter sizeBasedDataRewriter) {
        ImmutableList of = ImmutableList.of(new MockFileScanTask(100L), new MockFileScanTask(100L), new MockFileScanTask(100L));
        sizeBasedDataRewriter.init(ImmutableMap.of("min-input-files", "5", "min-file-size-bytes", "200", "target-file-size-bytes", "250", "max-file-size-bytes", "500", "delete-file-threshold", String.valueOf(Integer.MAX_VALUE)));
        Iterable planFileGroups = sizeBasedDataRewriter.planFileGroups(of);
        Assertions.assertThat(planFileGroups).as("Must have 1 group", new Object[0]).hasSize(1);
        Assertions.assertThat((List) Iterables.getOnlyElement(planFileGroups)).as("Must rewrite 3 files", new Object[0]).hasSize(3);
    }

    private void checkDataFileGroupWithTooMuchData(SizeBasedDataRewriter sizeBasedDataRewriter) {
        ImmutableList of = ImmutableList.of(new MockFileScanTask(2000L));
        sizeBasedDataRewriter.init(ImmutableMap.of("min-input-files", "5", "min-file-size-bytes", "200", "target-file-size-bytes", "250", "max-file-size-bytes", "500", "delete-file-threshold", String.valueOf(Integer.MAX_VALUE)));
        Iterable planFileGroups = sizeBasedDataRewriter.planFileGroups(of);
        Assertions.assertThat(planFileGroups).as("Must have 1 group", new Object[0]).hasSize(1);
        Assertions.assertThat((List) Iterables.getOnlyElement(planFileGroups)).as("Must rewrite big file", new Object[0]).hasSize(1);
    }

    private void checkDataFilesWithHighFileScopedDeleteRatio(SizeBasedDataRewriter sizeBasedDataRewriter) {
        ImmutableList of = ImmutableList.of(MockFileScanTask.mockTaskWithFileScopedDeleteRecords(1000L, 100L, 1, 30L), MockFileScanTask.mockTaskWithFileScopedDeleteRecords(1000L, 100L, 1, 29L));
        sizeBasedDataRewriter.init(ImmutableMap.of("min-file-size-bytes", "0", "delete-file-threshold", "10"));
        Iterable planFileGroups = sizeBasedDataRewriter.planFileGroups(of);
        Assertions.assertThat(planFileGroups).as("Must have 1 group", new Object[0]).hasSize(1);
        Assertions.assertThat((List) Iterables.getOnlyElement(planFileGroups)).as("Must rewrite 1 file", new Object[0]).hasSize(1);
    }

    @Test
    public void testInvalidConstructorUsagesSortData() {
        Table createTable = catalog.createTable(TABLE_IDENT, SCHEMA);
        Assertions.assertThatThrownBy(() -> {
            new SparkSortDataRewriter(spark, createTable);
        }).hasMessageContaining("Cannot sort data without a valid sort order").hasMessageContaining("is unsorted and no sort order is provided");
        Assertions.assertThatThrownBy(() -> {
            new SparkSortDataRewriter(spark, createTable, (SortOrder) null);
        }).hasMessageContaining("Cannot sort data without a valid sort order").hasMessageContaining("the provided sort order is null or empty");
        Assertions.assertThatThrownBy(() -> {
            new SparkSortDataRewriter(spark, createTable, SortOrder.unsorted());
        }).hasMessageContaining("Cannot sort data without a valid sort order").hasMessageContaining("the provided sort order is null or empty");
    }

    @Test
    public void testInvalidConstructorUsagesZOrderData() {
        Table createTable = catalog.createTable(TABLE_IDENT, SCHEMA, SPEC);
        Assertions.assertThatThrownBy(() -> {
            new SparkZOrderDataRewriter(spark, createTable, (List) null);
        }).hasMessageContaining("Cannot ZOrder when no columns are specified");
        Assertions.assertThatThrownBy(() -> {
            new SparkZOrderDataRewriter(spark, createTable, ImmutableList.of());
        }).hasMessageContaining("Cannot ZOrder when no columns are specified");
        Assertions.assertThatThrownBy(() -> {
            new SparkZOrderDataRewriter(spark, createTable, ImmutableList.of("dep"));
        }).hasMessageContaining("Cannot ZOrder").hasMessageContaining("all columns provided were identity partition columns");
        Assertions.assertThatThrownBy(() -> {
            new SparkZOrderDataRewriter(spark, createTable, ImmutableList.of("DeP"));
        }).hasMessageContaining("Cannot ZOrder").hasMessageContaining("all columns provided were identity partition columns");
    }

    @Test
    public void testBinPackDataValidOptions() {
        Assertions.assertThat(new SparkBinPackDataRewriter(spark, catalog.createTable(TABLE_IDENT, SCHEMA)).validOptions()).as("Rewriter must report all supported options", new Object[0]).isEqualTo(ImmutableSet.of("target-file-size-bytes", "min-file-size-bytes", "max-file-size-bytes", "min-input-files", "rewrite-all", "max-file-group-size-bytes", new String[]{"delete-file-threshold", "delete-ratio-threshold"}));
    }

    @Test
    public void testSortDataValidOptions() {
        Assertions.assertThat(new SparkSortDataRewriter(spark, catalog.createTable(TABLE_IDENT, SCHEMA), SORT_ORDER).validOptions()).as("Rewriter must report all supported options", new Object[0]).isEqualTo(ImmutableSet.of("shuffle-partitions-per-file", "target-file-size-bytes", "min-file-size-bytes", "max-file-size-bytes", "min-input-files", "rewrite-all", new String[]{"max-file-group-size-bytes", "delete-file-threshold", "delete-ratio-threshold", "compression-factor"}));
    }

    @Test
    public void testZOrderDataValidOptions() {
        Assertions.assertThat(new SparkZOrderDataRewriter(spark, catalog.createTable(TABLE_IDENT, SCHEMA), ImmutableList.of("id")).validOptions()).as("Rewriter must report all supported options", new Object[0]).isEqualTo(ImmutableSet.of("shuffle-partitions-per-file", "target-file-size-bytes", "min-file-size-bytes", "max-file-size-bytes", "min-input-files", "rewrite-all", new String[]{"max-file-group-size-bytes", "delete-file-threshold", "delete-ratio-threshold", "compression-factor", "max-output-size", "var-length-contribution"}));
    }

    @Test
    public void testInvalidValuesForBinPackDataOptions() {
        SparkBinPackDataRewriter sparkBinPackDataRewriter = new SparkBinPackDataRewriter(spark, catalog.createTable(TABLE_IDENT, SCHEMA));
        validateSizeBasedRewriterOptions(sparkBinPackDataRewriter);
        ImmutableMap of = ImmutableMap.of("delete-file-threshold", "-1");
        Assertions.assertThatThrownBy(() -> {
            sparkBinPackDataRewriter.init(of);
        }).isInstanceOf(IllegalArgumentException.class).hasMessageContaining("'delete-file-threshold' is set to -1 but must be >= 0");
        ImmutableMap of2 = ImmutableMap.of("delete-ratio-threshold", "-1");
        Assertions.assertThatThrownBy(() -> {
            sparkBinPackDataRewriter.init(of2);
        }).isInstanceOf(IllegalArgumentException.class).hasMessageContaining("'delete-ratio-threshold' is set to -1.0 but must be > 0");
        ImmutableMap of3 = ImmutableMap.of("delete-ratio-threshold", "127");
        Assertions.assertThatThrownBy(() -> {
            sparkBinPackDataRewriter.init(of3);
        }).isInstanceOf(IllegalArgumentException.class).hasMessageContaining("'delete-ratio-threshold' is set to 127.0 but must be <= 1");
    }

    @Test
    public void testInvalidValuesForSortDataOptions() {
        SparkSortDataRewriter sparkSortDataRewriter = new SparkSortDataRewriter(spark, catalog.createTable(TABLE_IDENT, SCHEMA), SORT_ORDER);
        validateSizeBasedRewriterOptions(sparkSortDataRewriter);
        ImmutableMap of = ImmutableMap.of("delete-file-threshold", "-1");
        Assertions.assertThatThrownBy(() -> {
            sparkSortDataRewriter.init(of);
        }).isInstanceOf(IllegalArgumentException.class).hasMessageContaining("'delete-file-threshold' is set to -1 but must be >= 0");
        ImmutableMap of2 = ImmutableMap.of("compression-factor", "0");
        Assertions.assertThatThrownBy(() -> {
            sparkSortDataRewriter.init(of2);
        }).isInstanceOf(IllegalArgumentException.class).hasMessageContaining("'compression-factor' is set to 0.0 but must be > 0");
        ImmutableMap of3 = ImmutableMap.of("delete-ratio-threshold", "-1");
        Assertions.assertThatThrownBy(() -> {
            sparkSortDataRewriter.init(of3);
        }).isInstanceOf(IllegalArgumentException.class).hasMessageContaining("'delete-ratio-threshold' is set to -1.0 but must be > 0");
        ImmutableMap of4 = ImmutableMap.of("delete-ratio-threshold", "127");
        Assertions.assertThatThrownBy(() -> {
            sparkSortDataRewriter.init(of4);
        }).isInstanceOf(IllegalArgumentException.class).hasMessageContaining("'delete-ratio-threshold' is set to 127.0 but must be <= 1");
    }

    @Test
    public void testInvalidValuesForZOrderDataOptions() {
        SparkZOrderDataRewriter sparkZOrderDataRewriter = new SparkZOrderDataRewriter(spark, catalog.createTable(TABLE_IDENT, SCHEMA), ImmutableList.of("id"));
        validateSizeBasedRewriterOptions(sparkZOrderDataRewriter);
        ImmutableMap of = ImmutableMap.of("delete-file-threshold", "-1");
        Assertions.assertThatThrownBy(() -> {
            sparkZOrderDataRewriter.init(of);
        }).isInstanceOf(IllegalArgumentException.class).hasMessageContaining("'delete-file-threshold' is set to -1 but must be >= 0");
        ImmutableMap of2 = ImmutableMap.of("compression-factor", "0");
        Assertions.assertThatThrownBy(() -> {
            sparkZOrderDataRewriter.init(of2);
        }).isInstanceOf(IllegalArgumentException.class).hasMessageContaining("'compression-factor' is set to 0.0 but must be > 0");
        ImmutableMap of3 = ImmutableMap.of("max-output-size", "0");
        Assertions.assertThatThrownBy(() -> {
            sparkZOrderDataRewriter.init(of3);
        }).isInstanceOf(IllegalArgumentException.class).hasMessageContaining("Cannot have the interleaved ZOrder value use less than 1 byte").hasMessageContaining("'max-output-size' was set to 0");
        ImmutableMap of4 = ImmutableMap.of("var-length-contribution", "0");
        Assertions.assertThatThrownBy(() -> {
            sparkZOrderDataRewriter.init(of4);
        }).isInstanceOf(IllegalArgumentException.class).hasMessageContaining("Cannot use less than 1 byte for variable length types with ZOrder").hasMessageContaining("'var-length-contribution' was set to 0");
        ImmutableMap of5 = ImmutableMap.of("delete-ratio-threshold", "-1");
        Assertions.assertThatThrownBy(() -> {
            sparkZOrderDataRewriter.init(of5);
        }).isInstanceOf(IllegalArgumentException.class).hasMessageContaining("'delete-ratio-threshold' is set to -1.0 but must be > 0");
        ImmutableMap of6 = ImmutableMap.of("delete-ratio-threshold", "127");
        Assertions.assertThatThrownBy(() -> {
            sparkZOrderDataRewriter.init(of6);
        }).isInstanceOf(IllegalArgumentException.class).hasMessageContaining("'delete-ratio-threshold' is set to 127.0 but must be <= 1");
    }

    private void validateSizeBasedRewriterOptions(SizeBasedFileRewriter<?, ?> sizeBasedFileRewriter) {
        ImmutableMap of = ImmutableMap.of("target-file-size-bytes", "0");
        Assertions.assertThatThrownBy(() -> {
            sizeBasedFileRewriter.init(of);
        }).hasMessageContaining("'target-file-size-bytes' is set to 0 but must be > 0");
        ImmutableMap of2 = ImmutableMap.of("min-file-size-bytes", "-1");
        Assertions.assertThatThrownBy(() -> {
            sizeBasedFileRewriter.init(of2);
        }).hasMessageContaining("'min-file-size-bytes' is set to -1 but must be >= 0");
        ImmutableMap of3 = ImmutableMap.of("target-file-size-bytes", "3", "min-file-size-bytes", "5");
        Assertions.assertThatThrownBy(() -> {
            sizeBasedFileRewriter.init(of3);
        }).hasMessageContaining("'target-file-size-bytes' (3) must be > 'min-file-size-bytes' (5)").hasMessageContaining("all new files will be smaller than the min threshold");
        ImmutableMap of4 = ImmutableMap.of("target-file-size-bytes", "5", "max-file-size-bytes", "3");
        Assertions.assertThatThrownBy(() -> {
            sizeBasedFileRewriter.init(of4);
        }).hasMessageContaining("'target-file-size-bytes' (5) must be < 'max-file-size-bytes' (3)").hasMessageContaining("all new files will be larger than the max threshold");
        ImmutableMap of5 = ImmutableMap.of("min-input-files", "0");
        Assertions.assertThatThrownBy(() -> {
            sizeBasedFileRewriter.init(of5);
        }).hasMessageContaining("'min-input-files' is set to 0 but must be > 0");
        ImmutableMap of6 = ImmutableMap.of("max-file-group-size-bytes", "0");
        Assertions.assertThatThrownBy(() -> {
            sizeBasedFileRewriter.init(of6);
        }).hasMessageContaining("'max-file-group-size-bytes' is set to 0 but must be > 0");
    }
}
