package org.apache.iceberg.actions;

import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.stream.IntStream;
import org.apache.hadoop.conf.Configuration;
import org.apache.iceberg.DataFile;
import org.apache.iceberg.FileScanTask;
import org.apache.iceberg.PartitionSpec;
import org.apache.iceberg.Schema;
import org.apache.iceberg.Table;
import org.apache.iceberg.expressions.Expressions;
import org.apache.iceberg.hadoop.HadoopTables;
import org.apache.iceberg.io.CloseableIterable;
import org.apache.iceberg.io.CloseableIterator;
import org.apache.iceberg.relocated.com.google.common.collect.Lists;
import org.apache.iceberg.relocated.com.google.common.collect.Maps;
import org.apache.iceberg.spark.SparkTestBase;
import org.apache.iceberg.spark.source.ThreeColumnRecord;
import org.apache.iceberg.types.Types;
import org.apache.spark.sql.AnalysisException;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.Row;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;

/* loaded from: input_file:org/apache/iceberg/actions/TestRewriteDataFilesAction.class */
public abstract class TestRewriteDataFilesAction extends SparkTestBase {
    private static final HadoopTables TABLES = new HadoopTables(new Configuration());
    private static final Schema SCHEMA = new Schema(new Types.NestedField[]{Types.NestedField.optional(1, "c1", Types.IntegerType.get()), Types.NestedField.optional(2, "c2", Types.StringType.get()), Types.NestedField.optional(3, "c3", Types.StringType.get())});

    @Rule
    public TemporaryFolder temp = new TemporaryFolder();
    private String tableLocation = null;

    @Before
    public void setupTableLocation() throws Exception {
        this.tableLocation = this.temp.newFolder().toURI().toString();
    }

    @Test
    public void testRewriteDataFilesEmptyTable() {
        Table create = TABLES.create(SCHEMA, PartitionSpec.unpartitioned(), Maps.newHashMap(), this.tableLocation);
        Assert.assertNull("Table must be empty", create.currentSnapshot());
        Actions.forTable(create).rewriteDataFiles().execute();
        Assert.assertNull("Table must stay empty", create.currentSnapshot());
    }

    @Test
    public void testRewriteDataFilesUnpartitionedTable() {
        Table create = TABLES.create(SCHEMA, PartitionSpec.unpartitioned(), Maps.newHashMap(), this.tableLocation);
        ArrayList newArrayList = Lists.newArrayList(new ThreeColumnRecord[]{new ThreeColumnRecord(1, null, "AAAA"), new ThreeColumnRecord(1, "BBBBBBBBBB", "BBBB")});
        writeRecords(newArrayList);
        ArrayList newArrayList2 = Lists.newArrayList(new ThreeColumnRecord[]{new ThreeColumnRecord(2, "CCCCCCCCCC", "CCCC"), new ThreeColumnRecord(2, "DDDDDDDDDD", "DDDD")});
        writeRecords(newArrayList2);
        create.refresh();
        Assert.assertEquals("Should have 4 data files before rewrite", 4L, Lists.newArrayList(CloseableIterable.transform(create.newScan().planFiles(), (v0) -> {
            return v0.file();
        })).size());
        RewriteDataFilesActionResult execute = Actions.forTable(create).rewriteDataFiles().execute();
        Assert.assertEquals("Action should rewrite 4 data files", 4L, execute.deletedDataFiles().size());
        Assert.assertEquals("Action should add 1 data file", 1L, execute.addedDataFiles().size());
        create.refresh();
        Assert.assertEquals("Should have 1 data files before rewrite", 1L, Lists.newArrayList(CloseableIterable.transform(create.newScan().planFiles(), (v0) -> {
            return v0.file();
        })).size());
        ArrayList newArrayList3 = Lists.newArrayList();
        newArrayList3.addAll(newArrayList);
        newArrayList3.addAll(newArrayList2);
        Assert.assertEquals("Rows must match", newArrayList3, spark.read().format("iceberg").load(this.tableLocation).sort("c1", new String[]{"c2"}).as(Encoders.bean(ThreeColumnRecord.class)).collectAsList());
    }

    @Test
    public void testRewriteDataFilesPartitionedTable() {
        Table create = TABLES.create(SCHEMA, PartitionSpec.builderFor(SCHEMA).identity("c1").truncate("c2", 2).build(), Maps.newHashMap(), this.tableLocation);
        ArrayList newArrayList = Lists.newArrayList(new ThreeColumnRecord[]{new ThreeColumnRecord(1, "AAAAAAAAAA", "AAAA"), new ThreeColumnRecord(1, "AAAAAAAAAA", "CCCC")});
        writeRecords(newArrayList);
        ArrayList newArrayList2 = Lists.newArrayList(new ThreeColumnRecord[]{new ThreeColumnRecord(1, "BBBBBBBBBB", "BBBB"), new ThreeColumnRecord(1, "BBBBBBBBBB", "DDDD")});
        writeRecords(newArrayList2);
        ArrayList newArrayList3 = Lists.newArrayList(new ThreeColumnRecord[]{new ThreeColumnRecord(2, "AAAAAAAAAA", "EEEE"), new ThreeColumnRecord(2, "AAAAAAAAAA", "GGGG")});
        writeRecords(newArrayList3);
        ArrayList newArrayList4 = Lists.newArrayList(new ThreeColumnRecord[]{new ThreeColumnRecord(2, "BBBBBBBBBB", "FFFF"), new ThreeColumnRecord(2, "BBBBBBBBBB", "HHHH")});
        writeRecords(newArrayList4);
        create.refresh();
        Assert.assertEquals("Should have 8 data files before rewrite", 8L, Lists.newArrayList(CloseableIterable.transform(create.newScan().planFiles(), (v0) -> {
            return v0.file();
        })).size());
        RewriteDataFilesActionResult execute = Actions.forTable(create).rewriteDataFiles().execute();
        Assert.assertEquals("Action should rewrite 8 data files", 8L, execute.deletedDataFiles().size());
        Assert.assertEquals("Action should add 4 data file", 4L, execute.addedDataFiles().size());
        create.refresh();
        Assert.assertEquals("Should have 4 data files before rewrite", 4L, Lists.newArrayList(CloseableIterable.transform(create.newScan().planFiles(), (v0) -> {
            return v0.file();
        })).size());
        ArrayList newArrayList5 = Lists.newArrayList();
        newArrayList5.addAll(newArrayList);
        newArrayList5.addAll(newArrayList2);
        newArrayList5.addAll(newArrayList3);
        newArrayList5.addAll(newArrayList4);
        Assert.assertEquals("Rows must match", newArrayList5, spark.read().format("iceberg").load(this.tableLocation).sort("c1", new String[]{"c2", "c3"}).as(Encoders.bean(ThreeColumnRecord.class)).collectAsList());
    }

    @Test
    public void testRewriteDataFilesWithFilter() {
        Table create = TABLES.create(SCHEMA, PartitionSpec.builderFor(SCHEMA).identity("c1").truncate("c2", 2).build(), Maps.newHashMap(), this.tableLocation);
        ArrayList newArrayList = Lists.newArrayList(new ThreeColumnRecord[]{new ThreeColumnRecord(1, "AAAAAAAAAA", "AAAA"), new ThreeColumnRecord(1, "AAAAAAAAAA", "CCCC")});
        writeRecords(newArrayList);
        ArrayList newArrayList2 = Lists.newArrayList(new ThreeColumnRecord[]{new ThreeColumnRecord(1, "BBBBBBBBBB", "BBBB"), new ThreeColumnRecord(1, "BBBBBBBBBB", "DDDD")});
        writeRecords(newArrayList2);
        ArrayList newArrayList3 = Lists.newArrayList(new ThreeColumnRecord[]{new ThreeColumnRecord(2, "AAAAAAAAAA", "EEEE"), new ThreeColumnRecord(2, "AAAAAAAAAA", "GGGG")});
        writeRecords(newArrayList3);
        ArrayList newArrayList4 = Lists.newArrayList(new ThreeColumnRecord[]{new ThreeColumnRecord(2, "BBBBBBBBBB", "FFFF"), new ThreeColumnRecord(2, "BBBBBBBBBB", "HHHH")});
        writeRecords(newArrayList4);
        create.refresh();
        Assert.assertEquals("Should have 8 data files before rewrite", 8L, Lists.newArrayList(CloseableIterable.transform(create.newScan().planFiles(), (v0) -> {
            return v0.file();
        })).size());
        RewriteDataFilesActionResult execute = Actions.forTable(create).rewriteDataFiles().filter(Expressions.equal("c1", 1)).filter(Expressions.startsWith("c2", "AA")).execute();
        Assert.assertEquals("Action should rewrite 2 data files", 2L, execute.deletedDataFiles().size());
        Assert.assertEquals("Action should add 1 data file", 1L, execute.addedDataFiles().size());
        create.refresh();
        Assert.assertEquals("Should have 7 data files before rewrite", 7L, Lists.newArrayList(CloseableIterable.transform(create.newScan().planFiles(), (v0) -> {
            return v0.file();
        })).size());
        ArrayList newArrayList5 = Lists.newArrayList();
        newArrayList5.addAll(newArrayList);
        newArrayList5.addAll(newArrayList2);
        newArrayList5.addAll(newArrayList3);
        newArrayList5.addAll(newArrayList4);
        Assert.assertEquals("Rows must match", newArrayList5, spark.read().format("iceberg").load(this.tableLocation).sort("c1", new String[]{"c2", "c3"}).as(Encoders.bean(ThreeColumnRecord.class)).collectAsList());
    }

    @Test
    public void testRewriteLargeTableHasResiduals() {
        PartitionSpec build = PartitionSpec.builderFor(SCHEMA).build();
        HashMap newHashMap = Maps.newHashMap();
        newHashMap.put("write.parquet.row-group-size-bytes", "100");
        Table create = TABLES.create(SCHEMA, build, newHashMap, this.tableLocation);
        ArrayList newArrayList = Lists.newArrayList();
        for (int i = 0; i < 100; i++) {
            newArrayList.add(new ThreeColumnRecord(Integer.valueOf(i), String.valueOf(i), String.valueOf(i % 4)));
        }
        writeDF(spark.createDataFrame(newArrayList, ThreeColumnRecord.class));
        create.refresh();
        CloseableIterator it = create.newScan().ignoreResiduals().filter(Expressions.equal("c3", "0")).planFiles().iterator();
        while (it.hasNext()) {
            Assert.assertEquals("Residuals must be ignored", Expressions.alwaysTrue(), ((FileScanTask) it.next()).residual());
        }
        Assert.assertEquals("Should have 2 data files before rewrite", 2L, Lists.newArrayList(CloseableIterable.transform(r0, (v0) -> {
            return v0.file();
        })).size());
        RewriteDataFilesActionResult execute = Actions.forTable(create).rewriteDataFiles().filter(Expressions.equal("c3", "0")).execute();
        Assert.assertEquals("Action should rewrite 2 data files", 2L, execute.deletedDataFiles().size());
        Assert.assertEquals("Action should add 1 data file", 1L, execute.addedDataFiles().size());
        create.refresh();
        Assert.assertEquals("Rows must match", newArrayList, spark.read().format("iceberg").load(this.tableLocation).sort("c1", new String[0]).as(Encoders.bean(ThreeColumnRecord.class)).collectAsList());
    }

    @Test
    public void testRewriteDataFilesForLargeFile() throws AnalysisException {
        Table create = TABLES.create(SCHEMA, PartitionSpec.unpartitioned(), Maps.newHashMap(), this.tableLocation);
        Assert.assertNull("Table must be empty", create.currentSnapshot());
        ArrayList newArrayList = Lists.newArrayList();
        IntStream.range(0, 2000).forEach(i -> {
            newArrayList.add(new ThreeColumnRecord(Integer.valueOf(i), "foo" + i, "bar" + i));
        });
        writeDF(spark.createDataFrame(newArrayList, ThreeColumnRecord.class).repartition(1));
        writeRecords(Lists.newArrayList(new ThreeColumnRecord[]{new ThreeColumnRecord(1, "BBBBBBBBBB", "BBBB"), new ThreeColumnRecord(1, "DDDDDDDDDD", "DDDD")}));
        create.refresh();
        DataFile dataFile = (DataFile) Collections.max(Lists.newArrayList(CloseableIterable.transform(create.newScan().planFiles(), (v0) -> {
            return v0.file();
        })), Comparator.comparingLong((v0) -> {
            return v0.fileSizeInBytes();
        }));
        Assert.assertEquals("Should have 3 files before rewrite", 3L, r0.size());
        spark.read().format("iceberg").load(this.tableLocation).createTempView("origin");
        long count = spark.read().format("iceberg").load(this.tableLocation).count();
        List<Object[]> sql = sql("SELECT * from origin sort by c2", new Object[0]);
        RewriteDataFilesActionResult execute = Actions.forTable(create).rewriteDataFiles().targetSizeInBytes(dataFile.fileSizeInBytes() - 10).splitOpenFileCost(1L).execute();
        Assert.assertEquals("Action should delete 4 data files", 4L, execute.deletedDataFiles().size());
        Assert.assertEquals("Action should add 2 data files", 2L, execute.addedDataFiles().size());
        spark.read().format("iceberg").load(this.tableLocation).createTempView("postRewrite");
        long count2 = spark.read().format("iceberg").load(this.tableLocation).count();
        List<Object[]> sql2 = sql("SELECT * from postRewrite sort by c2", new Object[0]);
        Assert.assertEquals(count, count2);
        assertEquals("Rows should be unchanged", sql, sql2);
    }

    private void writeRecords(List<ThreeColumnRecord> list) {
        writeDF(spark.createDataFrame(list, ThreeColumnRecord.class));
    }

    private void writeDF(Dataset<Row> dataset) {
        dataset.select("c1", new String[]{"c2", "c3"}).write().format("iceberg").mode("append").save(this.tableLocation);
    }

    @Test
    public void testRewriteToOutputPartitionSpec() {
        Table create = TABLES.create(SCHEMA, PartitionSpec.builderFor(SCHEMA).identity("c1").build(), Maps.newHashMap(), this.tableLocation);
        create.updateSpec().addField(Expressions.truncate("c2", 2)).commit();
        Assert.assertEquals("Should have 2 partitions specs", 2L, create.specs().size());
        ArrayList newArrayList = Lists.newArrayList(new ThreeColumnRecord[]{new ThreeColumnRecord(1, "AAAAAAAAAA", "AAAA"), new ThreeColumnRecord(1, "AAAAAAAAAA", "CCCC")});
        writeRecords(newArrayList);
        ArrayList newArrayList2 = Lists.newArrayList(new ThreeColumnRecord[]{new ThreeColumnRecord(1, "BBBBBBBBBB", "BBBB"), new ThreeColumnRecord(1, "BBBBBBBBBB", "DDDD")});
        writeRecords(newArrayList2);
        ArrayList newArrayList3 = Lists.newArrayList(new ThreeColumnRecord[]{new ThreeColumnRecord(2, "AAAAAAAAAA", "EEEE"), new ThreeColumnRecord(2, "AAAAAAAAAA", "GGGG")});
        writeRecords(newArrayList3);
        ArrayList newArrayList4 = Lists.newArrayList(new ThreeColumnRecord[]{new ThreeColumnRecord(2, "BBBBBBBBBB", "FFFF"), new ThreeColumnRecord(2, "BBBBBBBBBB", "HHHH")});
        writeRecords(newArrayList4);
        create.refresh();
        Assert.assertEquals("Should have 8 data files before rewrite", 8L, Lists.newArrayList(CloseableIterable.transform(create.newScan().planFiles(), (v0) -> {
            return v0.file();
        })).size());
        Assert.assertEquals("Rows must match", newArrayList2, spark.read().format("iceberg").load(this.tableLocation).sort("c1", new String[]{"c2", "c3"}).filter("c1 = 1 AND c2 = 'BBBBBBBBBB'").as(Encoders.bean(ThreeColumnRecord.class)).collectAsList());
        RewriteDataFilesActionResult execute = Actions.forTable(create).rewriteDataFiles().outputSpecId(0).execute();
        Assert.assertEquals("Action should rewrite 8 data files", 8L, execute.deletedDataFiles().size());
        Assert.assertEquals("Action should add 2 data file", 2L, execute.addedDataFiles().size());
        Assert.assertTrue(execute.deletedDataFiles().stream().allMatch(dataFile -> {
            return dataFile.specId() == 1;
        }));
        Assert.assertTrue(execute.addedDataFiles().stream().allMatch(dataFile2 -> {
            return dataFile2.specId() == 0;
        }));
        create.refresh();
        Assert.assertEquals("Should have 2 data files after rewrite", 2L, Lists.newArrayList(CloseableIterable.transform(create.newScan().planFiles(), (v0) -> {
            return v0.file();
        })).size());
        ArrayList newArrayList5 = Lists.newArrayList();
        newArrayList5.addAll(newArrayList);
        newArrayList5.addAll(newArrayList2);
        newArrayList5.addAll(newArrayList3);
        newArrayList5.addAll(newArrayList4);
        Dataset load = spark.read().format("iceberg").load(this.tableLocation);
        Assert.assertEquals("Rows must match", newArrayList5, load.sort("c1", new String[]{"c2", "c3"}).as(Encoders.bean(ThreeColumnRecord.class)).collectAsList());
        Assert.assertEquals("Rows must match", newArrayList2, load.sort("c1", new String[]{"c2", "c3"}).filter("c1 = 1 AND c2 = 'BBBBBBBBBB'").as(Encoders.bean(ThreeColumnRecord.class)).collectAsList());
        ArrayList newArrayList6 = Lists.newArrayList(new ThreeColumnRecord[]{new ThreeColumnRecord(3, "CCCCCCCCCC", "FFFF"), new ThreeColumnRecord(3, "CCCCCCCCCC", "HHHH")});
        writeRecords(newArrayList6);
        newArrayList5.addAll(newArrayList6);
        Assert.assertEquals("Rows must match", newArrayList5, load.sort("c1", new String[]{"c2", "c3"}).as(Encoders.bean(ThreeColumnRecord.class)).collectAsList());
    }
}
