package org.apache.iceberg.spark.actions;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.UUID;
import java.util.function.Function;
import java.util.function.Predicate;
import java.util.stream.Collectors;
import org.apache.hadoop.fs.Path;
import org.apache.iceberg.ContentFile;
import org.apache.iceberg.DataFile;
import org.apache.iceberg.DeleteFile;
import org.apache.iceberg.FileFormat;
import org.apache.iceberg.HasTableOperations;
import org.apache.iceberg.ManifestContent;
import org.apache.iceberg.ManifestFile;
import org.apache.iceberg.ManifestFiles;
import org.apache.iceberg.ManifestWriter;
import org.apache.iceberg.MetadataTableType;
import org.apache.iceberg.PartitionSpec;
import org.apache.iceberg.Partitioning;
import org.apache.iceberg.RollingManifestWriter;
import org.apache.iceberg.Snapshot;
import org.apache.iceberg.Table;
import org.apache.iceberg.TableOperations;
import org.apache.iceberg.actions.ImmutableRewriteManifests;
import org.apache.iceberg.actions.RewriteManifests;
import org.apache.iceberg.exceptions.CleanableFailure;
import org.apache.iceberg.exceptions.CommitStateUnknownException;
import org.apache.iceberg.exceptions.ValidationException;
import org.apache.iceberg.io.OutputFile;
import org.apache.iceberg.io.SupportsBulkOperations;
import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
import org.apache.iceberg.relocated.com.google.common.collect.Iterables;
import org.apache.iceberg.relocated.com.google.common.collect.Lists;
import org.apache.iceberg.spark.SparkContentFile;
import org.apache.iceberg.spark.SparkDataFile;
import org.apache.iceberg.spark.SparkDeleteFile;
import org.apache.iceberg.spark.source.SerializableTableWithSize;
import org.apache.iceberg.types.Types;
import org.apache.iceberg.util.PropertyUtil;
import org.apache.iceberg.util.ThreadPools;
import org.apache.spark.api.java.function.MapPartitionsFunction;
import org.apache.spark.broadcast.Broadcast;
import org.apache.spark.sql.Column;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoder;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.types.StructType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/apache/iceberg/spark/actions/RewriteManifestsSparkAction.class */
public class RewriteManifestsSparkAction extends BaseSnapshotUpdateSparkAction<RewriteManifestsSparkAction> implements RewriteManifests {
    public static final String USE_CACHING = "use-caching";
    public static final boolean USE_CACHING_DEFAULT = false;
    private static final Logger LOG = LoggerFactory.getLogger(RewriteManifestsSparkAction.class);
    private static final RewriteManifests.Result EMPTY_RESULT = ImmutableRewriteManifests.Result.builder().rewrittenManifests(ImmutableList.of()).addedManifests(ImmutableList.of()).build();
    private final Table table;
    private final int formatVersion;
    private final long targetManifestSizeBytes;
    private final boolean shouldStageManifests;
    private PartitionSpec spec;
    private Predicate<ManifestFile> predicate;
    private String outputLocation;

    /* JADX INFO: Access modifiers changed from: package-private */
    /* renamed from: org.apache.iceberg.spark.actions.RewriteManifestsSparkAction$1, reason: invalid class name */
    /* loaded from: input_file:org/apache/iceberg/spark/actions/RewriteManifestsSparkAction$1.class */
    public static /* synthetic */ class AnonymousClass1 {
        static final /* synthetic */ int[] $SwitchMap$org$apache$iceberg$ManifestContent = new int[ManifestContent.values().length];

        static {
            try {
                $SwitchMap$org$apache$iceberg$ManifestContent[ManifestContent.DATA.ordinal()] = 1;
            } catch (NoSuchFieldError e) {
            }
            try {
                $SwitchMap$org$apache$iceberg$ManifestContent[ManifestContent.DELETES.ordinal()] = 2;
            } catch (NoSuchFieldError e2) {
            }
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:org/apache/iceberg/spark/actions/RewriteManifestsSparkAction$ManifestWriterFactory.class */
    public static class ManifestWriterFactory implements Serializable {
        private final Broadcast<Table> tableBroadcast;
        private final int formatVersion;
        private final int specId;
        private final String outputLocation;
        private final long maxManifestSizeBytes;

        ManifestWriterFactory(Broadcast<Table> broadcast, int i, int i2, String str, long j) {
            this.tableBroadcast = broadcast;
            this.formatVersion = i;
            this.specId = i2;
            this.outputLocation = str;
            this.maxManifestSizeBytes = j;
        }

        public RollingManifestWriter<DataFile> newRollingManifestWriter() {
            return new RollingManifestWriter<>(this::newManifestWriter, this.maxManifestSizeBytes);
        }

        private ManifestWriter<DataFile> newManifestWriter() {
            return ManifestFiles.write(this.formatVersion, spec(), newOutputFile(), (Long) null);
        }

        public RollingManifestWriter<DeleteFile> newRollingDeleteManifestWriter() {
            return new RollingManifestWriter<>(this::newDeleteManifestWriter, this.maxManifestSizeBytes);
        }

        private ManifestWriter<DeleteFile> newDeleteManifestWriter() {
            return ManifestFiles.writeDeleteManifest(this.formatVersion, spec(), newOutputFile(), (Long) null);
        }

        private PartitionSpec spec() {
            return (PartitionSpec) table().specs().get(Integer.valueOf(this.specId));
        }

        private OutputFile newOutputFile() {
            return table().io().newOutputFile(newManifestLocation());
        }

        private String newManifestLocation() {
            return new Path(this.outputLocation, FileFormat.AVRO.addExtension("optimized-m-" + UUID.randomUUID())).toString();
        }

        private Table table() {
            return (Table) this.tableBroadcast.value();
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:org/apache/iceberg/spark/actions/RewriteManifestsSparkAction$WriteDataManifests.class */
    public static class WriteDataManifests extends WriteManifests<DataFile> {
        WriteDataManifests(ManifestWriterFactory manifestWriterFactory, Types.StructType structType, Types.StructType structType2, StructType structType3) {
            super(manifestWriterFactory, structType, structType2, structType3);
        }

        /* JADX INFO: Access modifiers changed from: protected */
        @Override // org.apache.iceberg.spark.actions.RewriteManifestsSparkAction.WriteManifests
        /* renamed from: newFileWrapper, reason: merged with bridge method [inline-methods] */
        public SparkContentFile<DataFile> newFileWrapper2() {
            return new SparkDataFile(combinedFileType(), fileType(), sparkFileType());
        }

        @Override // org.apache.iceberg.spark.actions.RewriteManifestsSparkAction.WriteManifests
        protected RollingManifestWriter<DataFile> newManifestWriter() {
            return writers().newRollingManifestWriter();
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:org/apache/iceberg/spark/actions/RewriteManifestsSparkAction$WriteDeleteManifests.class */
    public static class WriteDeleteManifests extends WriteManifests<DeleteFile> {
        WriteDeleteManifests(ManifestWriterFactory manifestWriterFactory, Types.StructType structType, Types.StructType structType2, StructType structType3) {
            super(manifestWriterFactory, structType, structType2, structType3);
        }

        /* JADX INFO: Access modifiers changed from: protected */
        @Override // org.apache.iceberg.spark.actions.RewriteManifestsSparkAction.WriteManifests
        /* renamed from: newFileWrapper */
        public SparkContentFile<DeleteFile> newFileWrapper2() {
            return new SparkDeleteFile(combinedFileType(), fileType(), sparkFileType());
        }

        @Override // org.apache.iceberg.spark.actions.RewriteManifestsSparkAction.WriteManifests
        protected RollingManifestWriter<DeleteFile> newManifestWriter() {
            return writers().newRollingDeleteManifestWriter();
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:org/apache/iceberg/spark/actions/RewriteManifestsSparkAction$WriteManifests.class */
    public static abstract class WriteManifests<F extends ContentFile<F>> implements MapPartitionsFunction<Row, ManifestFile> {
        private static final Encoder<ManifestFile> MANIFEST_ENCODER = Encoders.javaSerialization(ManifestFile.class);
        private final ManifestWriterFactory writers;
        private final Types.StructType combinedFileType;
        private final Types.StructType fileType;
        private final StructType sparkFileType;

        WriteManifests(ManifestWriterFactory manifestWriterFactory, Types.StructType structType, Types.StructType structType2, StructType structType3) {
            this.writers = manifestWriterFactory;
            this.combinedFileType = structType;
            this.fileType = structType2;
            this.sparkFileType = structType3;
        }

        /* renamed from: newFileWrapper */
        protected abstract SparkContentFile<F> newFileWrapper2();

        protected abstract RollingManifestWriter<F> newManifestWriter();

        public Dataset<ManifestFile> apply(Dataset<Row> dataset) {
            return dataset.mapPartitions(this, MANIFEST_ENCODER);
        }

        public Iterator<ManifestFile> call(Iterator<Row> it) throws Exception {
            SparkContentFile<F> newFileWrapper2 = newFileWrapper2();
            RollingManifestWriter<F> newManifestWriter = newManifestWriter();
            while (it.hasNext()) {
                try {
                    Row next = it.next();
                    newManifestWriter.existing(newFileWrapper2.wrap(next.getStruct(3)), next.getLong(0), next.getLong(1), next.isNullAt(2) ? null : Long.valueOf(next.getLong(2)));
                } finally {
                    newManifestWriter.close();
                }
            }
            return newManifestWriter.toManifestFiles().iterator();
        }

        protected ManifestWriterFactory writers() {
            return this.writers;
        }

        protected Types.StructType combinedFileType() {
            return this.combinedFileType;
        }

        protected Types.StructType fileType() {
            return this.fileType;
        }

        protected StructType sparkFileType() {
            return this.sparkFileType;
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public RewriteManifestsSparkAction(SparkSession sparkSession, Table table) {
        super(sparkSession);
        this.predicate = manifestFile -> {
            return true;
        };
        this.table = table;
        this.spec = table.spec();
        this.targetManifestSizeBytes = PropertyUtil.propertyAsLong(table.properties(), "commit.manifest.target-size-bytes", 8388608L);
        TableOperations operations = ((HasTableOperations) table).operations();
        this.outputLocation = new Path(operations.metadataFileLocation("file")).getParent().toString();
        this.formatVersion = operations.current().formatVersion();
        this.shouldStageManifests = this.formatVersion == 1 && !PropertyUtil.propertyAsBoolean(table.properties(), "compatibility.snapshot-id-inheritance.enabled", false);
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // org.apache.iceberg.spark.actions.BaseSparkAction
    public RewriteManifestsSparkAction self() {
        return this;
    }

    /* renamed from: specId, reason: merged with bridge method [inline-methods] */
    public RewriteManifestsSparkAction m142specId(int i) {
        Preconditions.checkArgument(this.table.specs().containsKey(Integer.valueOf(i)), "Invalid spec id %s", i);
        this.spec = (PartitionSpec) this.table.specs().get(Integer.valueOf(i));
        return this;
    }

    public RewriteManifestsSparkAction rewriteIf(Predicate<ManifestFile> predicate) {
        this.predicate = predicate;
        return this;
    }

    /* renamed from: stagingLocation, reason: merged with bridge method [inline-methods] */
    public RewriteManifestsSparkAction m140stagingLocation(String str) {
        if (this.shouldStageManifests) {
            this.outputLocation = str;
        } else {
            LOG.warn("Ignoring provided staging location as new manifests will be committed directly");
        }
        return this;
    }

    /* renamed from: execute, reason: merged with bridge method [inline-methods] */
    public RewriteManifests.Result m143execute() {
        return (RewriteManifests.Result) withJobGroupInfo(newJobGroupInfo("REWRITE-MANIFESTS", String.format("Rewriting manifests in %s", this.table.name())), this::doExecute);
    }

    private RewriteManifests.Result doExecute() {
        ArrayList newArrayList = Lists.newArrayList();
        ArrayList newArrayList2 = Lists.newArrayList();
        RewriteManifests.Result rewriteManifests = rewriteManifests(ManifestContent.DATA);
        Iterables.addAll(newArrayList, rewriteManifests.rewrittenManifests());
        Iterables.addAll(newArrayList2, rewriteManifests.addedManifests());
        RewriteManifests.Result rewriteManifests2 = rewriteManifests(ManifestContent.DELETES);
        Iterables.addAll(newArrayList, rewriteManifests2.rewrittenManifests());
        Iterables.addAll(newArrayList2, rewriteManifests2.addedManifests());
        if (newArrayList.isEmpty()) {
            return EMPTY_RESULT;
        }
        replaceManifests(newArrayList, newArrayList2);
        return ImmutableRewriteManifests.Result.builder().rewrittenManifests(newArrayList).addedManifests(newArrayList2).build();
    }

    private RewriteManifests.Result rewriteManifests(ManifestContent manifestContent) {
        List<ManifestFile> findMatchingManifests = findMatchingManifests(manifestContent);
        if (findMatchingManifests.isEmpty()) {
            return EMPTY_RESULT;
        }
        int targetNumManifests = targetNumManifests(totalSizeBytes(findMatchingManifests));
        if (targetNumManifests == 1 && findMatchingManifests.size() == 1) {
            return EMPTY_RESULT;
        }
        Dataset<Row> buildManifestEntryDF = buildManifestEntryDF(findMatchingManifests);
        return ImmutableRewriteManifests.Result.builder().rewrittenManifests(findMatchingManifests).addedManifests(this.spec.isUnpartitioned() ? writeUnpartitionedManifests(manifestContent, buildManifestEntryDF, targetNumManifests) : writePartitionedManifests(manifestContent, buildManifestEntryDF, targetNumManifests)).build();
    }

    private Dataset<Row> buildManifestEntryDF(List<ManifestFile> list) {
        Dataset df = spark().createDataset(Lists.transform(list, (v0) -> {
            return v0.path();
        }), Encoders.STRING()).toDF(new String[]{"manifest"});
        Dataset selectExpr = loadMetadataTable(this.table, MetadataTableType.ENTRIES).filter("status < 2").selectExpr(new String[]{"input_file_name() as manifest", "snapshot_id", "sequence_number", "file_sequence_number", "data_file"});
        return selectExpr.join(df, df.col("manifest").equalTo(selectExpr.col("manifest")), "left_semi").select("snapshot_id", new String[]{"sequence_number", "file_sequence_number", "data_file"});
    }

    private List<ManifestFile> writeUnpartitionedManifests(ManifestContent manifestContent, Dataset<Row> dataset, int i) {
        return newWriteManifestsFunc(manifestContent, dataset.schema()).apply(dataset.repartition(i)).collectAsList();
    }

    private List<ManifestFile> writePartitionedManifests(ManifestContent manifestContent, Dataset<Row> dataset, int i) {
        return (List) withReusableDS(dataset, dataset2 -> {
            return newWriteManifestsFunc(manifestContent, dataset2.schema()).apply(repartitionAndSort(dataset2, dataset2.col("data_file.partition"), i)).collectAsList();
        });
    }

    private WriteManifests<?> newWriteManifestsFunc(ManifestContent manifestContent, StructType structType) {
        ManifestWriterFactory manifestWriters = manifestWriters();
        StructType dataType = structType.apply("data_file").dataType();
        Types.StructType type = DataFile.getType(Partitioning.partitionType(this.table));
        Types.StructType type2 = DataFile.getType(this.spec.partitionType());
        return manifestContent == ManifestContent.DATA ? new WriteDataManifests(manifestWriters, type, type2, dataType) : new WriteDeleteManifests(manifestWriters, type, type2, dataType);
    }

    private Dataset<Row> repartitionAndSort(Dataset<Row> dataset, Column column, int i) {
        return dataset.repartitionByRange(i, new Column[]{column}).sortWithinPartitions(new Column[]{column});
    }

    private <T, U> U withReusableDS(Dataset<T> dataset, Function<Dataset<T>, U> function) {
        boolean propertyAsBoolean = PropertyUtil.propertyAsBoolean(options(), USE_CACHING, false);
        Dataset<T> cache = propertyAsBoolean ? dataset.cache() : dataset;
        try {
            U apply = function.apply(cache);
            if (propertyAsBoolean) {
                cache.unpersist(false);
            }
            return apply;
        } catch (Throwable th) {
            if (propertyAsBoolean) {
                cache.unpersist(false);
            }
            throw th;
        }
    }

    private List<ManifestFile> findMatchingManifests(ManifestContent manifestContent) {
        Snapshot currentSnapshot = this.table.currentSnapshot();
        return currentSnapshot == null ? ImmutableList.of() : (List) loadManifests(manifestContent, currentSnapshot).stream().filter(manifestFile -> {
            return manifestFile.partitionSpecId() == this.spec.specId() && this.predicate.test(manifestFile);
        }).collect(Collectors.toList());
    }

    private List<ManifestFile> loadManifests(ManifestContent manifestContent, Snapshot snapshot) {
        switch (AnonymousClass1.$SwitchMap$org$apache$iceberg$ManifestContent[manifestContent.ordinal()]) {
            case 1:
                return snapshot.dataManifests(this.table.io());
            case 2:
                return snapshot.deleteManifests(this.table.io());
            default:
                throw new IllegalArgumentException("Unknown manifest content: " + manifestContent);
        }
    }

    private int targetNumManifests(long j) {
        return (int) (((j + this.targetManifestSizeBytes) - 1) / this.targetManifestSizeBytes);
    }

    private long totalSizeBytes(Iterable<ManifestFile> iterable) {
        long j = 0;
        for (ManifestFile manifestFile : iterable) {
            ValidationException.check(hasFileCounts(manifestFile), "No file counts in manifest: %s", new Object[]{manifestFile.path()});
            j += manifestFile.length();
        }
        return j;
    }

    private boolean hasFileCounts(ManifestFile manifestFile) {
        return (manifestFile.addedFilesCount() == null || manifestFile.existingFilesCount() == null || manifestFile.deletedFilesCount() == null) ? false : true;
    }

    private void replaceManifests(Iterable<ManifestFile> iterable, Iterable<ManifestFile> iterable2) {
        try {
            org.apache.iceberg.RewriteManifests rewriteManifests = this.table.rewriteManifests();
            Objects.requireNonNull(rewriteManifests);
            iterable.forEach(rewriteManifests::deleteManifest);
            Objects.requireNonNull(rewriteManifests);
            iterable2.forEach(rewriteManifests::addManifest);
            commit(rewriteManifests);
            if (this.shouldStageManifests) {
                deleteFiles(Iterables.transform(iterable2, (v0) -> {
                    return v0.path();
                }));
            }
        } catch (Exception e) {
            if (e instanceof CleanableFailure) {
                deleteFiles(Iterables.transform(iterable2, (v0) -> {
                    return v0.path();
                }));
            }
            throw e;
        } catch (CommitStateUnknownException e2) {
            throw e2;
        }
    }

    private void deleteFiles(Iterable<String> iterable) {
        Iterable transform = Iterables.transform(iterable, str -> {
            return new FileInfo(str, "Manifest");
        });
        if (this.table.io() instanceof SupportsBulkOperations) {
            deleteFiles((SupportsBulkOperations) this.table.io(), transform.iterator());
        } else {
            deleteFiles(ThreadPools.getWorkerPool(), str2 -> {
                this.table.io().deleteFile(str2);
            }, transform.iterator());
        }
    }

    private ManifestWriterFactory manifestWriters() {
        return new ManifestWriterFactory(sparkContext().broadcast(SerializableTableWithSize.copyOf(this.table)), this.formatVersion, this.spec.specId(), this.outputLocation, (long) (1.2d * this.targetManifestSizeBytes));
    }

    /* JADX WARN: Type inference failed for: r0v1, types: [org.apache.iceberg.spark.actions.RewriteManifestsSparkAction, java.lang.Object] */
    @Override // org.apache.iceberg.spark.actions.BaseSnapshotUpdateSparkAction
    public /* bridge */ /* synthetic */ RewriteManifestsSparkAction snapshotProperty(String str, String str2) {
        return super.snapshotProperty(str, str2);
    }

    @Override // org.apache.iceberg.spark.actions.BaseSparkAction
    public /* bridge */ /* synthetic */ Object options(Map map) {
        return super.options(map);
    }

    @Override // org.apache.iceberg.spark.actions.BaseSparkAction
    public /* bridge */ /* synthetic */ Object option(String str, String str2) {
        return super.option(str, str2);
    }

    /* renamed from: rewriteIf, reason: collision with other method in class */
    public /* bridge */ /* synthetic */ RewriteManifests m141rewriteIf(Predicate predicate) {
        return rewriteIf((Predicate<ManifestFile>) predicate);
    }
}
