package org.apache.iceberg.spark;

import java.io.IOException;
import java.io.Serializable;
import java.lang.invoke.SerializedLambda;
import java.util.Arrays;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.iceberg.AppendFiles;
import org.apache.iceberg.ContentFile;
import org.apache.iceberg.DataFile;
import org.apache.iceberg.DataFiles;
import org.apache.iceberg.FileFormat;
import org.apache.iceberg.ManifestFile;
import org.apache.iceberg.ManifestFiles;
import org.apache.iceberg.ManifestWriter;
import org.apache.iceberg.Metrics;
import org.apache.iceberg.MetricsConfig;
import org.apache.iceberg.PartitionSpec;
import org.apache.iceberg.Table;
import org.apache.iceberg.hadoop.HadoopFileIO;
import org.apache.iceberg.hadoop.HadoopInputFile;
import org.apache.iceberg.hadoop.SerializableConfiguration;
import org.apache.iceberg.io.FileIO;
import org.apache.iceberg.orc.OrcMetrics;
import org.apache.iceberg.parquet.ParquetUtil;
import org.apache.iceberg.relocated.com.google.common.base.MoreObjects;
import org.apache.iceberg.relocated.com.google.common.base.Objects;
import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
import org.apache.iceberg.util.PropertyUtil;
import org.apache.iceberg.util.Tasks;
import org.apache.parquet.hadoop.ParquetFileReader;
import org.apache.spark.TaskContext;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.AnalysisException;
import org.apache.spark.sql.Column;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.catalyst.TableIdentifier;
import org.apache.spark.sql.catalyst.analysis.NoSuchDatabaseException;
import org.apache.spark.sql.catalyst.analysis.NoSuchTableException;
import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute;
import org.apache.spark.sql.catalyst.catalog.CatalogTable;
import org.apache.spark.sql.catalyst.catalog.CatalogTablePartition;
import org.apache.spark.sql.catalyst.catalog.SessionCatalog;
import org.apache.spark.sql.catalyst.expressions.Expression;
import org.apache.spark.sql.catalyst.parser.ParseException;
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan;
import org.apache.spark.sql.functions;
import scala.Function2;
import scala.Option;
import scala.Some;
import scala.Tuple2;
import scala.collection.Iterable;
import scala.collection.JavaConverters;
import scala.runtime.AbstractPartialFunction;

/* loaded from: input_file:org/apache/iceberg/spark/SparkTableUtil.class */
public class SparkTableUtil {
    private static final PathFilter HIDDEN_PATH_FILTER = path -> {
        return (path.getName().startsWith("_") || path.getName().startsWith(".")) ? false : true;
    };

    /* loaded from: input_file:org/apache/iceberg/spark/SparkTableUtil$SparkPartition.class */
    public static class SparkPartition implements Serializable {
        private final Map<String, String> values;
        private final String uri;
        private final String format;

        public SparkPartition(Map<String, String> map, String str, String str2) {
            this.values = ImmutableMap.copyOf(map);
            this.uri = str;
            this.format = str2;
        }

        public Map<String, String> getValues() {
            return this.values;
        }

        public String getUri() {
            return this.uri;
        }

        public String getFormat() {
            return this.format;
        }

        public String toString() {
            return MoreObjects.toStringHelper(this).add("values", this.values).add("uri", this.uri).add("format", this.format).toString();
        }

        public boolean equals(Object obj) {
            if (this == obj) {
                return true;
            }
            if (obj == null || getClass() != obj.getClass()) {
                return false;
            }
            SparkPartition sparkPartition = (SparkPartition) obj;
            return Objects.equal(this.values, sparkPartition.values) && Objects.equal(this.uri, sparkPartition.uri) && Objects.equal(this.format, sparkPartition.format);
        }

        public int hashCode() {
            return Objects.hashCode(new Object[]{this.values, this.uri, this.format});
        }
    }

    private SparkTableUtil() {
    }

    public static Dataset<Row> partitionDF(SparkSession sparkSession, String str) {
        return sparkSession.createDataFrame(getPartitions(sparkSession, str), SparkPartition.class).toDF(new String[]{"partition", "uri", "format"});
    }

    public static Dataset<Row> partitionDFByFilter(SparkSession sparkSession, String str, String str2) {
        return sparkSession.createDataFrame(getPartitionsByFilter(sparkSession, str, str2), SparkPartition.class).toDF(new String[]{"partition", "uri", "format"});
    }

    public static List<SparkPartition> getPartitions(SparkSession sparkSession, String str) {
        try {
            return getPartitions(sparkSession, sparkSession.sessionState().sqlParser().parseTableIdentifier(str));
        } catch (ParseException e) {
            throw SparkExceptionUtil.toUncheckedException(e, "Unable to parse table identifier: %s", str);
        }
    }

    public static List<SparkPartition> getPartitions(SparkSession sparkSession, TableIdentifier tableIdentifier) {
        try {
            SessionCatalog catalog = sparkSession.sessionState().catalog();
            CatalogTable tableMetadata = catalog.getTableMetadata(tableIdentifier);
            return (List) ((List) JavaConverters.seqAsJavaListConverter(catalog.listPartitions(tableIdentifier, Option.empty())).asJava()).stream().map(catalogTablePartition -> {
                return toSparkPartition(catalogTablePartition, tableMetadata);
            }).collect(Collectors.toList());
        } catch (NoSuchDatabaseException e) {
            throw SparkExceptionUtil.toUncheckedException(e, "Unknown table: %s. Database not found in catalog.", tableIdentifier);
        } catch (NoSuchTableException e2) {
            throw SparkExceptionUtil.toUncheckedException(e2, "Unknown table: %s. Table not found in catalog.", tableIdentifier);
        }
    }

    public static List<SparkPartition> getPartitionsByFilter(SparkSession sparkSession, String str, String str2) {
        try {
            try {
                return getPartitionsByFilter(sparkSession, sparkSession.sessionState().sqlParser().parseTableIdentifier(str), resolveAttrs(sparkSession, str, sparkSession.sessionState().sqlParser().parseExpression(str2)));
            } catch (ParseException e) {
                throw SparkExceptionUtil.toUncheckedException(e, "Unable to parse the predicate expression: %s", str2);
            }
        } catch (ParseException e2) {
            throw SparkExceptionUtil.toUncheckedException(e2, "Unable to parse the table identifier: %s", str);
        }
    }

    public static List<SparkPartition> getPartitionsByFilter(SparkSession sparkSession, TableIdentifier tableIdentifier, Expression expression) {
        try {
            SessionCatalog catalog = sparkSession.sessionState().catalog();
            CatalogTable tableMetadata = catalog.getTableMetadata(tableIdentifier);
            return (List) ((List) JavaConverters.seqAsJavaListConverter(catalog.listPartitionsByFilter(tableIdentifier, ((Iterable) JavaConverters.collectionAsScalaIterableConverter(ImmutableList.of(!expression.resolved() ? resolveAttrs(sparkSession, tableIdentifier.quotedString(), expression) : expression)).asScala()).toSeq())).asJava()).stream().map(catalogTablePartition -> {
                return toSparkPartition(catalogTablePartition, tableMetadata);
            }).collect(Collectors.toList());
        } catch (NoSuchDatabaseException e) {
            throw SparkExceptionUtil.toUncheckedException(e, "Unknown table: %s. Database not found in catalog.", tableIdentifier);
        } catch (NoSuchTableException e2) {
            throw SparkExceptionUtil.toUncheckedException(e2, "Unknown table: %s. Table not found in catalog.", tableIdentifier);
        }
    }

    public static List<DataFile> listPartition(SparkPartition sparkPartition, PartitionSpec partitionSpec, SerializableConfiguration serializableConfiguration, MetricsConfig metricsConfig) {
        return listPartition(sparkPartition.values, sparkPartition.uri, sparkPartition.format, partitionSpec, serializableConfiguration.get(), metricsConfig);
    }

    public static List<DataFile> listPartition(Map<String, String> map, String str, String str2, PartitionSpec partitionSpec, Configuration configuration, MetricsConfig metricsConfig) {
        if (str2.contains("avro")) {
            return listAvroPartition(map, str, partitionSpec, configuration);
        }
        if (str2.contains("parquet")) {
            return listParquetPartition(map, str, partitionSpec, configuration, metricsConfig);
        }
        if (str2.contains("orc")) {
            return listOrcPartition(map, str, partitionSpec, configuration);
        }
        throw new UnsupportedOperationException("Unknown partition format: " + str2);
    }

    private static List<DataFile> listAvroPartition(Map<String, String> map, String str, PartitionSpec partitionSpec, Configuration configuration) {
        try {
            Path path = new Path(str);
            return (List) Arrays.stream(path.getFileSystem(configuration).listStatus(path, HIDDEN_PATH_FILTER)).filter((v0) -> {
                return v0.isFile();
            }).map(fileStatus -> {
                Metrics metrics = new Metrics(-1L, (Map) null, (Map) null, (Map) null);
                return DataFiles.builder(partitionSpec).withPath(fileStatus.getPath().toString()).withFormat("avro").withFileSizeInBytes(fileStatus.getLen()).withMetrics(metrics).withPartitionPath((String) partitionSpec.fields().stream().map((v0) -> {
                    return v0.name();
                }).map(str2 -> {
                    return String.format("%s=%s", str2, map.get(str2));
                }).collect(Collectors.joining("/"))).build();
            }).collect(Collectors.toList());
        } catch (IOException e) {
            throw SparkExceptionUtil.toUncheckedException(e, "Unable to list files in partition: %s", str);
        }
    }

    private static List<DataFile> listParquetPartition(Map<String, String> map, String str, PartitionSpec partitionSpec, Configuration configuration, MetricsConfig metricsConfig) {
        try {
            Path path = new Path(str);
            return (List) Arrays.stream(path.getFileSystem(configuration).listStatus(path, HIDDEN_PATH_FILTER)).filter((v0) -> {
                return v0.isFile();
            }).map(fileStatus -> {
                try {
                    return DataFiles.builder(partitionSpec).withPath(fileStatus.getPath().toString()).withFormat("parquet").withFileSizeInBytes(fileStatus.getLen()).withMetrics(ParquetUtil.footerMetrics(ParquetFileReader.readFooter(configuration, fileStatus), metricsConfig)).withPartitionPath((String) partitionSpec.fields().stream().map((v0) -> {
                        return v0.name();
                    }).map(str2 -> {
                        return String.format("%s=%s", str2, map.get(str2));
                    }).collect(Collectors.joining("/"))).build();
                } catch (IOException e) {
                    throw SparkExceptionUtil.toUncheckedException(e, "Unable to read the footer of the parquet file: %s", fileStatus.getPath());
                }
            }).collect(Collectors.toList());
        } catch (IOException e) {
            throw SparkExceptionUtil.toUncheckedException(e, "Unable to list files in partition: %s", str);
        }
    }

    private static List<DataFile> listOrcPartition(Map<String, String> map, String str, PartitionSpec partitionSpec, Configuration configuration) {
        try {
            Path path = new Path(str);
            return (List) Arrays.stream(path.getFileSystem(configuration).listStatus(path, HIDDEN_PATH_FILTER)).filter((v0) -> {
                return v0.isFile();
            }).map(fileStatus -> {
                Metrics fromInputFile = OrcMetrics.fromInputFile(HadoopInputFile.fromPath(fileStatus.getPath(), configuration));
                return DataFiles.builder(partitionSpec).withPath(fileStatus.getPath().toString()).withFormat("orc").withFileSizeInBytes(fileStatus.getLen()).withMetrics(fromInputFile).withPartitionPath((String) partitionSpec.fields().stream().map((v0) -> {
                    return v0.name();
                }).map(str2 -> {
                    return String.format("%s=%s", str2, map.get(str2));
                }).collect(Collectors.joining("/"))).build();
            }).collect(Collectors.toList());
        } catch (IOException e) {
            throw SparkExceptionUtil.toUncheckedException(e, "Unable to list files in partition: %s", str);
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    public static SparkPartition toSparkPartition(CatalogTablePartition catalogTablePartition, CatalogTable catalogTable) {
        Option locationUri = catalogTablePartition.storage().locationUri();
        Option serde = catalogTablePartition.storage().serde();
        Preconditions.checkArgument(locationUri.nonEmpty(), "Partition URI should be defined");
        Preconditions.checkArgument(serde.nonEmpty() || catalogTable.provider().nonEmpty(), "Partition format should be defined");
        return new SparkPartition((Map) JavaConverters.mapAsJavaMapConverter(catalogTablePartition.spec()).asJava(), String.valueOf(locationUri.get()), serde.nonEmpty() ? (String) serde.get() : (String) catalogTable.provider().get());
    }

    private static Expression resolveAttrs(SparkSession sparkSession, String str, Expression expression) {
        final Function2 resolver = sparkSession.sessionState().analyzer().resolver();
        final LogicalPlan analyzed = sparkSession.table(str).queryExecution().analyzed();
        return expression.transform(new AbstractPartialFunction<Expression, Expression>() { // from class: org.apache.iceberg.spark.SparkTableUtil.1
            public Expression apply(Expression expression2) {
                Option resolve = analyzed.resolve(((UnresolvedAttribute) expression2).nameParts(), resolver);
                if (resolve.isDefined()) {
                    return (Expression) resolve.get();
                }
                throw new IllegalArgumentException(String.format("Could not resolve %s using columns: %s", expression2, analyzed.output()));
            }

            public boolean isDefinedAt(Expression expression2) {
                return expression2 instanceof UnresolvedAttribute;
            }
        });
    }

    /* JADX INFO: Access modifiers changed from: private */
    public static Iterator<ManifestFile> buildManifest(SerializableConfiguration serializableConfiguration, PartitionSpec partitionSpec, String str, Iterator<Tuple2<String, DataFile>> it) {
        if (!it.hasNext()) {
            return Collections.emptyIterator();
        }
        HadoopFileIO hadoopFileIO = new HadoopFileIO(serializableConfiguration.get());
        TaskContext taskContext = TaskContext.get();
        String addExtension = FileFormat.AVRO.addExtension(new Path(str, String.format("stage-%d-task-%d-manifest", Integer.valueOf(taskContext.stageId()), Long.valueOf(taskContext.taskAttemptId()))).toString());
        ManifestWriter write = ManifestFiles.write(partitionSpec, hadoopFileIO.newOutputFile(addExtension));
        Throwable th = null;
        try {
            try {
                try {
                    it.forEachRemaining(tuple2 -> {
                        write.add((ContentFile) tuple2._2);
                    });
                    if (write != null) {
                        if (0 != 0) {
                            try {
                                write.close();
                            } catch (Throwable th2) {
                                th.addSuppressed(th2);
                            }
                        } else {
                            write.close();
                        }
                    }
                    return ImmutableList.of(write.toManifestFile()).iterator();
                } catch (IOException e) {
                    throw SparkExceptionUtil.toUncheckedException(e, "Unable to close the manifest writer: %s", addExtension);
                }
            } finally {
            }
        } finally {
        }
    }

    public static void importSparkTable(SparkSession sparkSession, TableIdentifier tableIdentifier, Table table, String str) {
        SessionCatalog catalog = sparkSession.sessionState().catalog();
        TableIdentifier tableIdentifier2 = new TableIdentifier(tableIdentifier.table(), Some.apply(tableIdentifier.database().nonEmpty() ? (String) tableIdentifier.database().get() : catalog.getCurrentDatabase()));
        if (!catalog.tableExists(tableIdentifier2)) {
            throw new org.apache.iceberg.exceptions.NoSuchTableException(String.format("Table %s does not exist", tableIdentifier2), new Object[0]);
        }
        try {
            PartitionSpec specForTable = SparkSchemaUtil.specForTable(sparkSession, tableIdentifier2.unquotedString());
            if (specForTable == PartitionSpec.unpartitioned()) {
                importUnpartitionedSparkTable(sparkSession, tableIdentifier2, table);
            } else {
                importSparkPartitions(sparkSession, getPartitions(sparkSession, tableIdentifier), table, specForTable, str);
            }
        } catch (AnalysisException e) {
            throw SparkExceptionUtil.toUncheckedException(e, "Unable to get partition spec for table: %s", tableIdentifier2);
        }
    }

    private static void importUnpartitionedSparkTable(SparkSession sparkSession, TableIdentifier tableIdentifier, Table table) {
        try {
            CatalogTable tableMetadata = sparkSession.sessionState().catalog().getTableMetadata(tableIdentifier);
            Option serde = tableMetadata.storage().serde().nonEmpty() ? tableMetadata.storage().serde() : tableMetadata.provider();
            Preconditions.checkArgument(serde.nonEmpty(), "Could not determine table format");
            List<DataFile> listPartition = listPartition(Collections.emptyMap(), tableMetadata.location().toString(), (String) serde.get(), PartitionSpec.unpartitioned(), sparkSession.sessionState().newHadoopConf(), MetricsConfig.fromProperties(table.properties()));
            AppendFiles newAppend = table.newAppend();
            newAppend.getClass();
            listPartition.forEach(newAppend::appendFile);
            newAppend.commit();
        } catch (NoSuchTableException e) {
            throw SparkExceptionUtil.toUncheckedException(e, "Unknown table: %s. Table not found in catalog.", tableIdentifier);
        } catch (NoSuchDatabaseException e2) {
            throw SparkExceptionUtil.toUncheckedException(e2, "Unknown table: %s. Database not found in catalog.", tableIdentifier);
        }
    }

    public static void importSparkPartitions(SparkSession sparkSession, List<SparkPartition> list, Table table, PartitionSpec partitionSpec, String str) {
        SerializableConfiguration serializableConfiguration = new SerializableConfiguration(sparkSession.sessionState().newHadoopConf());
        int min = Math.min(list.size(), sparkSession.sessionState().conf().parallelPartitionDiscoveryParallelism());
        int numShufflePartitions = sparkSession.sessionState().conf().numShufflePartitions();
        MetricsConfig fromProperties = MetricsConfig.fromProperties(table.properties());
        List collectAsList = sparkSession.createDataset(JavaSparkContext.fromSparkContext(sparkSession.sparkContext()).parallelize(list, min).rdd(), Encoders.javaSerialization(SparkPartition.class)).flatMap(sparkPartition -> {
            return listPartition(sparkPartition, partitionSpec, serializableConfiguration, fromProperties).iterator();
        }, Encoders.javaSerialization(DataFile.class)).repartition(numShufflePartitions).map(dataFile -> {
            return Tuple2.apply(dataFile.path().toString(), dataFile);
        }, Encoders.tuple(Encoders.STRING(), Encoders.javaSerialization(DataFile.class))).orderBy(new Column[]{functions.col("_1")}).mapPartitions(it -> {
            return buildManifest(serializableConfiguration, partitionSpec, str, it);
        }, Encoders.javaSerialization(ManifestFile.class)).collectAsList();
        try {
            boolean propertyAsBoolean = PropertyUtil.propertyAsBoolean(table.properties(), "compatibility.snapshot-id-inheritance.enabled", false);
            AppendFiles newAppend = table.newAppend();
            newAppend.getClass();
            collectAsList.forEach(newAppend::appendManifest);
            newAppend.commit();
            if (!propertyAsBoolean) {
                deleteManifests(table.io(), collectAsList);
            }
        } catch (Throwable th) {
            deleteManifests(table.io(), collectAsList);
            throw th;
        }
    }

    private static void deleteManifests(FileIO fileIO, List<ManifestFile> list) {
        Tasks.foreach(list).noRetry().suppressFailureWhenFinished().run(manifestFile -> {
            fileIO.deleteFile(manifestFile.path());
        });
    }

    private static /* synthetic */ Object $deserializeLambda$(SerializedLambda serializedLambda) {
        String implMethodName = serializedLambda.getImplMethodName();
        boolean z = -1;
        switch (implMethodName.hashCode()) {
            case -745949006:
                if (implMethodName.equals("lambda$importSparkPartitions$8e419cf7$1")) {
                    z = false;
                    break;
                }
                break;
            case -616842784:
                if (implMethodName.equals("lambda$importSparkPartitions$3b150484$1")) {
                    z = true;
                    break;
                }
                break;
            case 1868787661:
                if (implMethodName.equals("lambda$importSparkPartitions$cad860c7$1")) {
                    z = 2;
                    break;
                }
                break;
        }
        switch (z) {
            case false:
                if (serializedLambda.getImplMethodKind() == 6 && serializedLambda.getFunctionalInterfaceClass().equals("org/apache/spark/api/java/function/FlatMapFunction") && serializedLambda.getFunctionalInterfaceMethodName().equals("call") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/lang/Object;)Ljava/util/Iterator;") && serializedLambda.getImplClass().equals("org/apache/iceberg/spark/SparkTableUtil") && serializedLambda.getImplMethodSignature().equals("(Lorg/apache/iceberg/PartitionSpec;Lorg/apache/iceberg/hadoop/SerializableConfiguration;Lorg/apache/iceberg/MetricsConfig;Lorg/apache/iceberg/spark/SparkTableUtil$SparkPartition;)Ljava/util/Iterator;")) {
                    PartitionSpec partitionSpec = (PartitionSpec) serializedLambda.getCapturedArg(0);
                    SerializableConfiguration serializableConfiguration = (SerializableConfiguration) serializedLambda.getCapturedArg(1);
                    MetricsConfig metricsConfig = (MetricsConfig) serializedLambda.getCapturedArg(2);
                    return sparkPartition -> {
                        return listPartition(sparkPartition, partitionSpec, serializableConfiguration, metricsConfig).iterator();
                    };
                }
                break;
            case true:
                if (serializedLambda.getImplMethodKind() == 6 && serializedLambda.getFunctionalInterfaceClass().equals("org/apache/spark/api/java/function/MapPartitionsFunction") && serializedLambda.getFunctionalInterfaceMethodName().equals("call") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/util/Iterator;)Ljava/util/Iterator;") && serializedLambda.getImplClass().equals("org/apache/iceberg/spark/SparkTableUtil") && serializedLambda.getImplMethodSignature().equals("(Lorg/apache/iceberg/hadoop/SerializableConfiguration;Lorg/apache/iceberg/PartitionSpec;Ljava/lang/String;Ljava/util/Iterator;)Ljava/util/Iterator;")) {
                    SerializableConfiguration serializableConfiguration2 = (SerializableConfiguration) serializedLambda.getCapturedArg(0);
                    PartitionSpec partitionSpec2 = (PartitionSpec) serializedLambda.getCapturedArg(1);
                    String str = (String) serializedLambda.getCapturedArg(2);
                    return it -> {
                        return buildManifest(serializableConfiguration2, partitionSpec2, str, it);
                    };
                }
                break;
            case true:
                if (serializedLambda.getImplMethodKind() == 6 && serializedLambda.getFunctionalInterfaceClass().equals("org/apache/spark/api/java/function/MapFunction") && serializedLambda.getFunctionalInterfaceMethodName().equals("call") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/lang/Object;)Ljava/lang/Object;") && serializedLambda.getImplClass().equals("org/apache/iceberg/spark/SparkTableUtil") && serializedLambda.getImplMethodSignature().equals("(Lorg/apache/iceberg/DataFile;)Lscala/Tuple2;")) {
                    return dataFile -> {
                        return Tuple2.apply(dataFile.path().toString(), dataFile);
                    };
                }
                break;
        }
        throw new IllegalArgumentException("Invalid lambda deserialization");
    }
}
