package org.apache.iceberg.spark.source;

import java.io.Serializable;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import org.apache.iceberg.CombinedScanTask;
import org.apache.iceberg.FileFormat;
import org.apache.iceberg.FileScanTask;
import org.apache.iceberg.Schema;
import org.apache.iceberg.SchemaParser;
import org.apache.iceberg.SerializableTable;
import org.apache.iceberg.Table;
import org.apache.iceberg.expressions.Expression;
import org.apache.iceberg.hadoop.HadoopInputFile;
import org.apache.iceberg.hadoop.Util;
import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
import org.apache.iceberg.spark.Spark3Util;
import org.apache.iceberg.spark.SparkSchemaUtil;
import org.apache.iceberg.spark.SparkUtil;
import org.apache.iceberg.util.PropertyUtil;
import org.apache.iceberg.util.TableScanUtil;
import org.apache.iceberg.util.Tasks;
import org.apache.iceberg.util.ThreadPools;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.broadcast.Broadcast;
import org.apache.spark.sql.RuntimeConfig;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.catalyst.InternalRow;
import org.apache.spark.sql.connector.read.Batch;
import org.apache.spark.sql.connector.read.InputPartition;
import org.apache.spark.sql.connector.read.PartitionReader;
import org.apache.spark.sql.connector.read.PartitionReaderFactory;
import org.apache.spark.sql.connector.read.Scan;
import org.apache.spark.sql.connector.read.Statistics;
import org.apache.spark.sql.connector.read.SupportsReportStatistics;
import org.apache.spark.sql.connector.read.streaming.MicroBatchStream;
import org.apache.spark.sql.types.StructType;
import org.apache.spark.sql.util.CaseInsensitiveStringMap;
import org.apache.spark.sql.vectorized.ColumnarBatch;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/apache/iceberg/spark/source/SparkBatchScan.class */
abstract class SparkBatchScan implements Scan, Batch, SupportsReportStatistics {
    private static final Logger LOG = LoggerFactory.getLogger(SparkBatchScan.class);
    private final JavaSparkContext sparkContext;
    private final Table table;
    private final boolean caseSensitive;
    private final boolean localityPreferred;
    private final Schema expectedSchema;
    private final List<Expression> filterExpressions;
    private final int batchSize;
    private final boolean readTimestampWithoutZone;
    private final CaseInsensitiveStringMap options;
    private StructType readSchema = null;

    /* loaded from: input_file:org/apache/iceberg/spark/source/SparkBatchScan$BatchReader.class */
    private static class BatchReader extends BatchDataReader implements PartitionReader<ColumnarBatch> {
        BatchReader(ReadTask readTask, int i) {
            super(readTask.task, readTask.table(), readTask.expectedSchema(), readTask.isCaseSensitive(), i);
        }
    }

    /* loaded from: input_file:org/apache/iceberg/spark/source/SparkBatchScan$ReadTask.class */
    static class ReadTask implements InputPartition, Serializable {
        private final CombinedScanTask task;
        private final Broadcast<Table> tableBroadcast;
        private final String expectedSchemaString;
        private final boolean caseSensitive;
        private transient Schema expectedSchema = null;
        private transient String[] preferredLocations;

        /* JADX INFO: Access modifiers changed from: package-private */
        public ReadTask(CombinedScanTask combinedScanTask, Broadcast<Table> broadcast, String str, boolean z, boolean z2) {
            this.preferredLocations = null;
            this.task = combinedScanTask;
            this.tableBroadcast = broadcast;
            this.expectedSchemaString = str;
            this.caseSensitive = z;
            if (z2) {
                this.preferredLocations = Util.blockLocations(((Table) broadcast.value()).io(), combinedScanTask);
            } else {
                this.preferredLocations = HadoopInputFile.NO_LOCATION_PREFERENCE;
            }
        }

        public String[] preferredLocations() {
            return this.preferredLocations;
        }

        public Collection<FileScanTask> files() {
            return this.task.files();
        }

        public Table table() {
            return (Table) this.tableBroadcast.value();
        }

        public boolean isCaseSensitive() {
            return this.caseSensitive;
        }

        /* JADX INFO: Access modifiers changed from: private */
        public Schema expectedSchema() {
            if (this.expectedSchema == null) {
                this.expectedSchema = SchemaParser.fromJson(this.expectedSchemaString);
            }
            return this.expectedSchema;
        }
    }

    /* loaded from: input_file:org/apache/iceberg/spark/source/SparkBatchScan$ReaderFactory.class */
    static class ReaderFactory implements PartitionReaderFactory {
        private final int batchSize;

        /* JADX INFO: Access modifiers changed from: package-private */
        public ReaderFactory(int i) {
            this.batchSize = i;
        }

        public PartitionReader<InternalRow> createReader(InputPartition inputPartition) {
            if (inputPartition instanceof ReadTask) {
                return new RowReader((ReadTask) inputPartition);
            }
            throw new UnsupportedOperationException("Incorrect input partition type: " + inputPartition);
        }

        public PartitionReader<ColumnarBatch> createColumnarReader(InputPartition inputPartition) {
            if (inputPartition instanceof ReadTask) {
                return new BatchReader((ReadTask) inputPartition, this.batchSize);
            }
            throw new UnsupportedOperationException("Incorrect input partition type: " + inputPartition);
        }

        public boolean supportColumnarReads(InputPartition inputPartition) {
            return this.batchSize > 1;
        }
    }

    /* loaded from: input_file:org/apache/iceberg/spark/source/SparkBatchScan$RowReader.class */
    private static class RowReader extends RowDataReader implements PartitionReader<InternalRow> {
        RowReader(ReadTask readTask) {
            super(readTask.task, readTask.table(), readTask.expectedSchema(), readTask.isCaseSensitive());
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public SparkBatchScan(SparkSession sparkSession, Table table, boolean z, Schema schema, List<Expression> list, CaseInsensitiveStringMap caseInsensitiveStringMap) {
        this.sparkContext = JavaSparkContext.fromSparkContext(sparkSession.sparkContext());
        this.table = table;
        this.caseSensitive = z;
        this.expectedSchema = schema;
        this.filterExpressions = list != null ? list : Collections.emptyList();
        this.localityPreferred = Spark3Util.isLocalityEnabled(table.io(), table.location(), caseInsensitiveStringMap);
        this.batchSize = Spark3Util.batchSize(table.properties(), caseInsensitiveStringMap);
        this.options = caseInsensitiveStringMap;
        this.readTimestampWithoutZone = SparkUtil.canHandleTimestampWithoutZone(caseInsensitiveStringMap, SparkSession.active().conf());
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public Table table() {
        return this.table;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public boolean caseSensitive() {
        return this.caseSensitive;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public Schema expectedSchema() {
        return this.expectedSchema;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public List<Expression> filterExpressions() {
        return this.filterExpressions;
    }

    protected abstract List<CombinedScanTask> tasks();

    public Batch toBatch() {
        return this;
    }

    public MicroBatchStream toMicroBatchStream(String str) {
        return new SparkMicroBatchStream(this.sparkContext, this.table, this.caseSensitive, this.expectedSchema, this.options, str);
    }

    public StructType readSchema() {
        if (this.readSchema == null) {
            Preconditions.checkArgument(this.readTimestampWithoutZone || !SparkUtil.hasTimestampWithoutZone(this.expectedSchema), SparkUtil.TIMESTAMP_WITHOUT_TIMEZONE_ERROR);
            this.readSchema = SparkSchemaUtil.convert(this.expectedSchema);
        }
        return this.readSchema;
    }

    public InputPartition[] planInputPartitions() {
        String json = SchemaParser.toJson(this.expectedSchema);
        Broadcast broadcast = this.sparkContext.broadcast(SerializableTable.copyOf(this.table));
        List<CombinedScanTask> tasks = tasks();
        InputPartition[] inputPartitionArr = new InputPartition[tasks.size()];
        Tasks.range(inputPartitionArr.length).stopOnFailure().executeWith(this.localityPreferred ? ThreadPools.getWorkerPool() : null).run(num -> {
            inputPartitionArr[num.intValue()] = new ReadTask((CombinedScanTask) tasks.get(num.intValue()), broadcast, json, this.caseSensitive, this.localityPreferred);
        });
        return inputPartitionArr;
    }

    public PartitionReaderFactory createReaderFactory() {
        boolean allMatch = tasks().stream().allMatch(combinedScanTask -> {
            return !combinedScanTask.isDataTask() && combinedScanTask.files().stream().allMatch(fileScanTask -> {
                return fileScanTask.file().format().equals(FileFormat.PARQUET);
            });
        });
        boolean allMatch2 = tasks().stream().allMatch(combinedScanTask2 -> {
            return !combinedScanTask2.isDataTask() && combinedScanTask2.files().stream().allMatch(fileScanTask -> {
                return fileScanTask.file().format().equals(FileFormat.ORC);
            });
        });
        return new ReaderFactory(batchReadsEnabled(allMatch, allMatch2) && tasks().stream().noneMatch(TableScanUtil::hasDeletes) && (allMatch2 || (allMatch && (this.expectedSchema.columns().size() > 0) && this.expectedSchema.columns().stream().allMatch(nestedField -> {
            return nestedField.type().isPrimitiveType();
        }))) ? this.batchSize : 0);
    }

    private boolean batchReadsEnabled(boolean z, boolean z2) {
        Map properties = this.table.properties();
        RuntimeConfig conf = SparkSession.active().conf();
        if (z) {
            return Spark3Util.isVectorizationEnabled(FileFormat.PARQUET, properties, conf, this.options);
        }
        if (z2) {
            return Spark3Util.isVectorizationEnabled(FileFormat.ORC, properties, conf, this.options);
        }
        return false;
    }

    public Statistics estimateStatistics() {
        if (this.table.currentSnapshot() == null) {
            return new Stats(0L, 0L);
        }
        if (!this.table.spec().isUnpartitioned() && this.filterExpressions.isEmpty()) {
            LOG.debug("using table metadata to estimate table statistics");
            long propertyAsLong = PropertyUtil.propertyAsLong(this.table.currentSnapshot().summary(), "total-records", Long.MAX_VALUE);
            return new Stats(SparkSchemaUtil.estimateSize(SparkSchemaUtil.convert(this.expectedSchema != null ? this.expectedSchema : this.table.schema()), propertyAsLong), propertyAsLong);
        }
        long j = 0;
        long j2 = 0;
        Iterator<CombinedScanTask> it = tasks().iterator();
        while (it.hasNext()) {
            for (FileScanTask fileScanTask : it.next().files()) {
                j += fileScanTask.length();
                j2 += fileScanTask.file().recordCount();
            }
        }
        return new Stats(j, j2);
    }

    public String description() {
        return String.format("%s [filters=%s]", this.table, (String) this.filterExpressions.stream().map(Spark3Util::describe).collect(Collectors.joining(", ")));
    }
}
