package org.apache.hudi.utilities.sources;

import java.util.Arrays;
import org.apache.hudi.DataSourceReadOptions;
import org.apache.hudi.DataSourceUtils;
import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.TypedProperties;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.hive.SlashEncodedDayPartitionValueExtractor;
import org.apache.hudi.utilities.schema.SchemaProvider;
import org.apache.hudi.utilities.sources.helpers.IncrSourceHelper;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;

/* loaded from: input_file:org/apache/hudi/utilities/sources/HoodieIncrSource.class */
public class HoodieIncrSource extends RowSource {
    private static final Logger LOG = LogManager.getLogger(HoodieIncrSource.class);

    /* loaded from: input_file:org/apache/hudi/utilities/sources/HoodieIncrSource$Config.class */
    protected static class Config {
        private static final String HOODIE_SRC_BASE_PATH = "hoodie.deltastreamer.source.hoodieincr.path";
        private static final String NUM_INSTANTS_PER_FETCH = "hoodie.deltastreamer.source.hoodieincr.num_instants";
        private static final String HOODIE_SRC_PARTITION_FIELDS = "hoodie.deltastreamer.source.hoodieincr.partition.fields";
        private static final String HOODIE_SRC_PARTITION_EXTRACTORCLASS = "hoodie.deltastreamer.source.hoodieincr.partition.extractor.class";
        private static final String READ_LATEST_INSTANT_ON_MISSING_CKPT = "hoodie.deltastreamer.source.hoodieincr.read_latest_on_missing_ckpt";
        private static final Integer DEFAULT_NUM_INSTANTS_PER_FETCH = 1;
        private static final String DEFAULT_HOODIE_SRC_PARTITION_EXTRACTORCLASS = SlashEncodedDayPartitionValueExtractor.class.getCanonicalName();
        private static final Boolean DEFAULT_READ_LATEST_INSTANT_ON_MISSING_CKPT = false;

        protected Config() {
        }
    }

    public HoodieIncrSource(TypedProperties typedProperties, JavaSparkContext javaSparkContext, SparkSession sparkSession, SchemaProvider schemaProvider) {
        super(typedProperties, javaSparkContext, sparkSession, schemaProvider);
    }

    @Override // org.apache.hudi.utilities.sources.RowSource
    public Pair<Option<Dataset<Row>>, String> fetchNextBatch(Option<String> option, long j) {
        DataSourceUtils.checkRequiredProperties(this.props, Arrays.asList("hoodie.deltastreamer.source.hoodieincr.path"));
        String string = this.props.getString("hoodie.deltastreamer.source.hoodieincr.path");
        Pair<String, String> calculateBeginAndEndInstants = IncrSourceHelper.calculateBeginAndEndInstants(this.sparkContext, string, this.props.getInteger("hoodie.deltastreamer.source.hoodieincr.num_instants", Config.DEFAULT_NUM_INSTANTS_PER_FETCH.intValue()), option.isPresent() ? ((String) option.get()).isEmpty() ? Option.empty() : option : Option.empty(), this.props.getBoolean("hoodie.deltastreamer.source.hoodieincr.read_latest_on_missing_ckpt", Config.DEFAULT_READ_LATEST_INSTANT_ON_MISSING_CKPT.booleanValue()));
        if (!((String) calculateBeginAndEndInstants.getKey()).equals(calculateBeginAndEndInstants.getValue())) {
            return Pair.of(Option.of(this.sparkSession.read().format("org.apache.hudi").option(DataSourceReadOptions.QUERY_TYPE_OPT_KEY(), DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL()).option(DataSourceReadOptions.BEGIN_INSTANTTIME_OPT_KEY(), (String) calculateBeginAndEndInstants.getLeft()).option(DataSourceReadOptions.END_INSTANTTIME_OPT_KEY(), (String) calculateBeginAndEndInstants.getRight()).load(string).drop((String[]) HoodieRecord.HOODIE_META_COLUMNS.stream().filter(str -> {
                return !str.equals(HoodieRecord.PARTITION_PATH_METADATA_FIELD);
            }).toArray(i -> {
                return new String[i];
            }))), calculateBeginAndEndInstants.getRight());
        }
        LOG.warn("Already caught up. Begin Checkpoint was :" + ((String) calculateBeginAndEndInstants.getKey()));
        return Pair.of(Option.empty(), calculateBeginAndEndInstants.getKey());
    }
}
