package org.apache.iceberg.spark.actions;

import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.UUID;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.iceberg.DataFile;
import org.apache.iceberg.DataOperations;
import org.apache.iceberg.FileScanTask;
import org.apache.iceberg.NullOrder;
import org.apache.iceberg.Schema;
import org.apache.iceberg.SortDirection;
import org.apache.iceberg.SortOrder;
import org.apache.iceberg.Table;
import org.apache.iceberg.actions.RewriteStrategy;
import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet;
import org.apache.iceberg.spark.SparkDistributionAndOrderingUtil;
import org.apache.iceberg.spark.SparkReadOptions;
import org.apache.iceberg.spark.SparkUtil;
import org.apache.iceberg.spark.SparkWriteOptions;
import org.apache.iceberg.types.Types;
import org.apache.iceberg.util.PropertyUtil;
import org.apache.iceberg.util.SortOrderUtil;
import org.apache.spark.sql.Column;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.connector.distributions.Distributions;
import org.apache.spark.sql.connector.distributions.OrderedDistribution;
import org.apache.spark.sql.functions;
import org.apache.spark.sql.internal.SQLConf;
import org.apache.spark.sql.types.StructType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/apache/iceberg/spark/actions/SparkZOrderStrategy.class */
public class SparkZOrderStrategy extends SparkSortStrategy {
    private static final Logger LOG = LoggerFactory.getLogger(SparkZOrderStrategy.class);
    private static final String Z_COLUMN = "ICEZVALUE";
    private static final Schema Z_SCHEMA = new Schema(Types.NestedField.required(0, Z_COLUMN, Types.BinaryType.get()));
    private static final SortOrder Z_SORT_ORDER = SortOrder.builderFor(Z_SCHEMA).sortBy(Z_COLUMN, SortDirection.ASC, NullOrder.NULLS_LAST).build();
    private static final String MAX_OUTPUT_SIZE_KEY = "max-output-size";
    private static final int DEFAULT_MAX_OUTPUT_SIZE = Integer.MAX_VALUE;
    private static final String VAR_LENGTH_CONTRIBUTION_KEY = "var-length-contribution";
    private static final int DEFAULT_VAR_LENGTH_CONTRIBUTION = 8;
    private final List<String> zOrderColNames;
    private int maxOutputSize;
    private int varLengthContribution;

    @Override // org.apache.iceberg.spark.actions.SparkSortStrategy, org.apache.iceberg.actions.SortStrategy, org.apache.iceberg.actions.BinPackStrategy, org.apache.iceberg.actions.RewriteStrategy
    public Set<String> validOptions() {
        return ImmutableSet.builder().addAll((Iterable) super.validOptions()).add((ImmutableSet.Builder) VAR_LENGTH_CONTRIBUTION_KEY).add((ImmutableSet.Builder) MAX_OUTPUT_SIZE_KEY).build();
    }

    @Override // org.apache.iceberg.spark.actions.SparkSortStrategy, org.apache.iceberg.actions.SortStrategy, org.apache.iceberg.actions.BinPackStrategy, org.apache.iceberg.actions.RewriteStrategy
    public RewriteStrategy options(Map<String, String> map) {
        super.options(map);
        this.varLengthContribution = PropertyUtil.propertyAsInt(map, VAR_LENGTH_CONTRIBUTION_KEY, 8);
        Preconditions.checkArgument(this.varLengthContribution > 0, "Cannot use less than 1 byte for variable length types with zOrder, %s was set to %s", (Object) VAR_LENGTH_CONTRIBUTION_KEY, this.varLengthContribution);
        this.maxOutputSize = PropertyUtil.propertyAsInt(map, MAX_OUTPUT_SIZE_KEY, Integer.MAX_VALUE);
        Preconditions.checkArgument(this.maxOutputSize > 0, "Cannot have the interleaved ZOrder value use less than 1 byte, %s was set to %s", (Object) MAX_OUTPUT_SIZE_KEY, this.maxOutputSize);
        return this;
    }

    public SparkZOrderStrategy(Table table, SparkSession sparkSession, List<String> list) {
        super(table, sparkSession);
        Preconditions.checkArgument((list == null || list.isEmpty()) ? false : true, "Cannot ZOrder when no columns are specified");
        Stream<R> map = table.spec().fields().stream().filter(partitionField -> {
            return partitionField.transform().isIdentity();
        }).map((v0) -> {
            return v0.name();
        });
        list.getClass();
        List list2 = (List) map.filter((v1) -> {
            return r1.contains(v1);
        }).collect(Collectors.toList());
        if (!list2.isEmpty()) {
            LOG.warn("Cannot ZOrder on an Identity partition column as these values are constant within a partition and will be removed from the ZOrder expression: {}", list2);
            list.removeAll(list2);
            Preconditions.checkArgument(!list.isEmpty(), "Cannot perform ZOrdering, all columns provided were identity partition columns and cannot be used.");
        }
        validateColumnsExistence(table, sparkSession, list);
        this.zOrderColNames = list;
    }

    private void validateColumnsExistence(Table table, SparkSession sparkSession, List<String> list) {
        boolean caseSensitive = SparkUtil.caseSensitive(sparkSession);
        Schema schema = table.schema();
        list.forEach(str -> {
            if ((caseSensitive ? schema.findField(str) : schema.caseInsensitiveFindField(str)) == null) {
                throw new IllegalArgumentException(String.format("Cannot find column '%s' in table schema: %s", str, schema.asStruct()));
            }
        });
    }

    @Override // org.apache.iceberg.actions.SortStrategy, org.apache.iceberg.actions.BinPackStrategy, org.apache.iceberg.actions.RewriteStrategy
    public String name() {
        return "Z-ORDER";
    }

    @Override // org.apache.iceberg.actions.SortStrategy
    protected void validateOptions() {
    }

    @Override // org.apache.iceberg.spark.actions.SparkSortStrategy, org.apache.iceberg.actions.RewriteStrategy
    public Set<DataFile> rewriteFiles(List<FileScanTask> list) {
        SparkZOrderUDF sparkZOrderUDF = new SparkZOrderUDF(this.zOrderColNames.size(), this.varLengthContribution, this.maxOutputSize);
        String uuid = UUID.randomUUID().toString();
        org.apache.spark.sql.connector.expressions.SortOrder[] convert = !list.get(0).spec().equals(table().spec()) ? SparkDistributionAndOrderingUtil.convert(SortOrderUtil.buildSortOrder(table(), sortOrder())) : SparkDistributionAndOrderingUtil.convert(sortOrder());
        OrderedDistribution ordered = Distributions.ordered(convert);
        try {
            tableCache().add(uuid, table());
            manager().stageTasks(table(), uuid, list);
            SparkSession spark = spark();
            spark.conf().set(SQLConf.SHUFFLE_PARTITIONS().key(), Math.max(1L, numOutputFiles((long) (inputFileSize(list) * sizeEstimateMultiple()))));
            Dataset load = spark.read().format("iceberg").option(SparkReadOptions.FILE_SCAN_TASK_SET_ID, uuid).load(uuid);
            Column[] columnArr = (Column[]) Arrays.stream(load.schema().names()).map(str -> {
                return functions.col(str);
            }).toArray(i -> {
                return new Column[i];
            });
            Stream<String> stream = this.zOrderColNames.stream();
            StructType schema = load.schema();
            schema.getClass();
            Dataset withColumn = load.withColumn(Z_COLUMN, sparkZOrderUDF.interleaveBytes(functions.array((Column[]) ((List) stream.map(schema::apply).collect(Collectors.toList())).stream().map(structField -> {
                return sparkZOrderUDF.sortedLexicographically(functions.col(structField.name()), structField.dataType());
            }).toArray(i2 -> {
                return new Column[i2];
            }))));
            new Dataset(spark, sortPlan(ordered, convert, withColumn.logicalPlan(), spark.sessionState().conf()), withColumn.encoder()).select(columnArr).write().format("iceberg").option(SparkWriteOptions.REWRITTEN_FILE_SCAN_TASK_SET_ID, uuid).option("target-file-size-bytes", writeMaxFileSize()).option(SparkWriteOptions.USE_TABLE_DISTRIBUTION_AND_ORDERING, "false").mode(DataOperations.APPEND).save(uuid);
            Set<DataFile> fetchNewDataFiles = rewriteCoordinator().fetchNewDataFiles(table(), uuid);
            tableCache().remove(uuid);
            manager().removeTasks(table(), uuid);
            rewriteCoordinator().clearRewrite(table(), uuid);
            return fetchNewDataFiles;
        } catch (Throwable th) {
            tableCache().remove(uuid);
            manager().removeTasks(table(), uuid);
            rewriteCoordinator().clearRewrite(table(), uuid);
            throw th;
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // org.apache.iceberg.actions.SortStrategy
    public SortOrder sortOrder() {
        return Z_SORT_ORDER;
    }
}
