package org.apache.flink.connectors.hive;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.stream.Collectors;
import javax.annotation.Nullable;
import org.apache.flink.annotation.VisibleForTesting;
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.api.common.typeutils.TypeSerializer;
import org.apache.flink.api.common.typeutils.base.LongSerializer;
import org.apache.flink.api.common.typeutils.base.StringSerializer;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.configuration.ReadableConfig;
import org.apache.flink.connector.file.table.DefaultPartTimeExtractor;
import org.apache.flink.connector.file.table.FileSystemConnectorOptions;
import org.apache.flink.connectors.hive.read.HiveContinuousPartitionContext;
import org.apache.flink.connectors.hive.read.HivePartitionFetcherContextBase;
import org.apache.flink.connectors.hive.read.HiveSourceSplit;
import org.apache.flink.connectors.hive.util.HivePartitionUtils;
import org.apache.flink.hive.shaded.formats.parquet.ParquetFileFormatFactory;
import org.apache.flink.hive.shaded.formats.parquet.utils.ParquetFormatStatisticsReportUtil;
import org.apache.flink.orc.OrcFileFormatFactory;
import org.apache.flink.orc.util.OrcFormatStatisticsReportUtil;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.TableException;
import org.apache.flink.table.api.TableSchema;
import org.apache.flink.table.catalog.CatalogTable;
import org.apache.flink.table.catalog.ObjectPath;
import org.apache.flink.table.catalog.hive.client.HiveShim;
import org.apache.flink.table.catalog.hive.client.HiveShimLoader;
import org.apache.flink.table.catalog.hive.factories.HiveCatalogFactoryOptions;
import org.apache.flink.table.connector.ChangelogMode;
import org.apache.flink.table.connector.ProviderContext;
import org.apache.flink.table.connector.format.FileBasedStatisticsReportableInputFormat;
import org.apache.flink.table.connector.source.DataStreamScanProvider;
import org.apache.flink.table.connector.source.DynamicTableSource;
import org.apache.flink.table.connector.source.ScanTableSource;
import org.apache.flink.table.connector.source.abilities.SupportsDynamicFiltering;
import org.apache.flink.table.connector.source.abilities.SupportsLimitPushDown;
import org.apache.flink.table.connector.source.abilities.SupportsPartitionPushDown;
import org.apache.flink.table.connector.source.abilities.SupportsProjectionPushDown;
import org.apache.flink.table.connector.source.abilities.SupportsStatisticReport;
import org.apache.flink.table.data.RowData;
import org.apache.flink.table.data.TimestampData;
import org.apache.flink.table.plan.stats.TableStats;
import org.apache.flink.table.types.DataType;
import org.apache.flink.util.Preconditions;
import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
import org.apache.hadoop.hive.metastore.api.Partition;
import org.apache.hadoop.mapred.JobConf;
import org.apache.thrift.TException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/apache/flink/connectors/hive/HiveTableSource.class */
public class HiveTableSource implements ScanTableSource, SupportsPartitionPushDown, SupportsProjectionPushDown, SupportsLimitPushDown, SupportsStatisticReport, SupportsDynamicFiltering {
    private static final Logger LOG = LoggerFactory.getLogger(HiveTableSource.class);
    private static final String HIVE_TRANSFORMATION = "hive";
    protected final JobConf jobConf;
    protected final ReadableConfig flinkConf;
    protected final ObjectPath tablePath;
    protected final CatalogTable catalogTable;
    protected final String hiveVersion;
    protected final HiveShim hiveShim;
    protected int[] projectedFields;

    @Nullable
    protected List<Map<String, String>> remainingPartitions = null;

    @Nullable
    protected List<String> dynamicFilterPartitionKeys = null;
    protected Long limit = null;

    /* loaded from: input_file:org/apache/flink/connectors/hive/HiveTableSource$HiveContinuousPartitionFetcherContext.class */
    public static class HiveContinuousPartitionFetcherContext<T extends Comparable<T>> extends HivePartitionFetcherContextBase<Partition> implements HiveContinuousPartitionContext<Partition, T> {
        private static final long serialVersionUID = 1;
        private static final Long DEFAULT_MIN_TIME_OFFSET = 0L;
        private static final String DEFAULT_MIN_NAME_OFFSET = "";
        private final TypeSerializer<T> typeSerializer;
        private final T consumeStartOffset;

        public HiveContinuousPartitionFetcherContext(ObjectPath objectPath, HiveShim hiveShim, JobConfWrapper jobConfWrapper, List<String> list, DataType[] dataTypeArr, String[] strArr, Configuration configuration, String str) {
            super(objectPath, hiveShim, jobConfWrapper, list, dataTypeArr, strArr, configuration, str);
            switch (this.partitionOrder) {
                case PARTITION_NAME:
                    if (configuration.contains(HiveOptions.STREAMING_SOURCE_CONSUME_START_OFFSET)) {
                        this.consumeStartOffset = configuration.getString(HiveOptions.STREAMING_SOURCE_CONSUME_START_OFFSET);
                    } else {
                        this.consumeStartOffset = "";
                    }
                    this.typeSerializer = StringSerializer.INSTANCE;
                    return;
                case PARTITION_TIME:
                case CREATE_TIME:
                    if (configuration.contains(HiveOptions.STREAMING_SOURCE_CONSUME_START_OFFSET)) {
                        this.consumeStartOffset = Long.valueOf(TimestampData.fromLocalDateTime(DefaultPartTimeExtractor.toLocalDateTime(configuration.getString(HiveOptions.STREAMING_SOURCE_CONSUME_START_OFFSET), configuration.getString(FileSystemConnectorOptions.PARTITION_TIME_EXTRACTOR_TIMESTAMP_FORMATTER))).getMillisecond());
                    } else {
                        this.consumeStartOffset = DEFAULT_MIN_TIME_OFFSET;
                    }
                    this.typeSerializer = LongSerializer.INSTANCE;
                    return;
                default:
                    throw new UnsupportedOperationException("Unsupported partition order: " + this.partitionOrder);
            }
        }

        public Optional<Partition> getPartition(List<String> list) throws TException {
            try {
                return Optional.of(this.metaStoreClient.getPartition(this.tablePath.getDatabaseName(), this.tablePath.getObjectName(), list));
            } catch (NoSuchObjectException e) {
                return Optional.empty();
            }
        }

        public ObjectPath getTablePath() {
            return this.tablePath;
        }

        @Override // org.apache.flink.connectors.hive.read.HivePartitionFetcherContextBase, org.apache.flink.connectors.hive.read.HivePartitionContext
        public HiveTablePartition toHiveTablePartition(Partition partition) {
            return HivePartitionUtils.toHiveTablePartition(this.partitionKeys, this.tableProps, partition);
        }

        public TypeSerializer<T> getTypeSerializer() {
            return this.typeSerializer;
        }

        public T getConsumeStartOffset() {
            return this.consumeStartOffset;
        }

        @Override // org.apache.flink.connectors.hive.read.HivePartitionFetcherContextBase
        public void close() throws Exception {
            if (this.metaStoreClient != null) {
                this.metaStoreClient.close();
            }
        }
    }

    public HiveTableSource(JobConf jobConf, ReadableConfig readableConfig, ObjectPath objectPath, CatalogTable catalogTable) {
        this.jobConf = (JobConf) Preconditions.checkNotNull(jobConf);
        this.flinkConf = (ReadableConfig) Preconditions.checkNotNull(readableConfig);
        this.tablePath = (ObjectPath) Preconditions.checkNotNull(objectPath);
        this.catalogTable = (CatalogTable) Preconditions.checkNotNull(catalogTable);
        this.hiveVersion = (String) Preconditions.checkNotNull(jobConf.get(HiveCatalogFactoryOptions.HIVE_VERSION.key()), "Hive version is not defined");
        this.hiveShim = HiveShimLoader.loadHiveShim(this.hiveVersion);
    }

    public ScanTableSource.ScanRuntimeProvider getScanRuntimeProvider(ScanTableSource.ScanContext scanContext) {
        return new DataStreamScanProvider() { // from class: org.apache.flink.connectors.hive.HiveTableSource.1
            public DataStream<RowData> produceDataStream(ProviderContext providerContext, StreamExecutionEnvironment streamExecutionEnvironment) {
                return HiveTableSource.this.getDataStream(providerContext, streamExecutionEnvironment);
            }

            public boolean isBounded() {
                return !HiveTableSource.this.isStreamingSource();
            }
        };
    }

    @VisibleForTesting
    protected DataStream<RowData> getDataStream(ProviderContext providerContext, StreamExecutionEnvironment streamExecutionEnvironment) {
        HiveSourceBuilder limit = new HiveSourceBuilder(this.jobConf, this.flinkConf, this.tablePath, this.hiveVersion, this.catalogTable).setProjectedFields(this.projectedFields).setLimit(this.limit);
        if (!isStreamingSource()) {
            List<HiveTablePartition> allPartitions = HivePartitionUtils.getAllPartitions(this.jobConf, this.hiveVersion, this.tablePath, this.catalogTable.getPartitionKeys(), this.remainingPartitions);
            return toDataStreamSource(streamExecutionEnvironment, limit.setPartitions(allPartitions).setDynamicFilterPartitionKeys(this.dynamicFilterPartitionKeys).buildWithDefaultBulkFormat()).setParallelism(new HiveParallelismInference(this.tablePath, this.flinkConf).infer(() -> {
                return Integer.valueOf(HiveSourceFileEnumerator.getNumFiles(allPartitions, this.jobConf));
            }, () -> {
                return Integer.valueOf(HiveSourceFileEnumerator.createInputSplits(0, allPartitions, this.jobConf, true).size());
            }).limit(this.limit));
        }
        DataStreamSource<RowData> dataStreamSource = toDataStreamSource(streamExecutionEnvironment, limit.buildWithDefaultBulkFormat());
        Optional generateUid = providerContext.generateUid("hive");
        dataStreamSource.getClass();
        generateUid.ifPresent(dataStreamSource::uid);
        return dataStreamSource;
    }

    private DataStreamSource<RowData> toDataStreamSource(StreamExecutionEnvironment streamExecutionEnvironment, HiveSource<RowData> hiveSource) {
        return streamExecutionEnvironment.fromSource(hiveSource, WatermarkStrategy.noWatermarks(), "HiveSource-" + this.tablePath.getFullName());
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public boolean isStreamingSource() {
        return Boolean.parseBoolean((String) this.catalogTable.getOptions().getOrDefault(HiveOptions.STREAMING_SOURCE_ENABLE.key(), ((Boolean) HiveOptions.STREAMING_SOURCE_ENABLE.defaultValue()).toString()));
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public TableSchema getTableSchema() {
        return this.catalogTable.getSchema();
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public TableSchema getProducedTableSchema() {
        TableSchema tableSchema = getTableSchema();
        if (this.projectedFields == null) {
            return tableSchema;
        }
        String[] fieldNames = tableSchema.getFieldNames();
        DataType[] fieldDataTypes = tableSchema.getFieldDataTypes();
        return TableSchema.builder().fields((String[]) Arrays.stream(this.projectedFields).mapToObj(i -> {
            return fieldNames[i];
        }).toArray(i2 -> {
            return new String[i2];
        }), (DataType[]) Arrays.stream(this.projectedFields).mapToObj(i3 -> {
            return fieldDataTypes[i3];
        }).toArray(i4 -> {
            return new DataType[i4];
        })).build();
    }

    public void applyLimit(long j) {
        this.limit = Long.valueOf(j);
    }

    public Optional<List<Map<String, String>>> listPartitions() {
        return Optional.empty();
    }

    public void applyPartitions(List<Map<String, String>> list) {
        if (this.catalogTable.getPartitionKeys() == null || this.catalogTable.getPartitionKeys().size() == 0) {
            throw new UnsupportedOperationException("Should not apply partitions to a non-partitioned table.");
        }
        this.remainingPartitions = list;
    }

    public List<String> listAcceptedFilterFields() {
        ArrayList arrayList = new ArrayList();
        for (String str : this.catalogTable.getPartitionKeys()) {
            if (HiveSourceDynamicFileEnumerator.SUPPORTED_TYPES.contains(this.catalogTable.getSchema().getFieldDataType(str).map((v0) -> {
                return v0.getLogicalType();
            }).map((v0) -> {
                return v0.getTypeRoot();
            }).orElse(null))) {
                arrayList.add(str);
            }
        }
        return arrayList;
    }

    public void applyDynamicFiltering(List<String> list) {
        if (!this.catalogTable.isPartitioned()) {
            throw new TableException(String.format("Hive source table : %s is not a partition table, but try to apply dynamic filtering.", this.catalogTable));
        }
        this.dynamicFilterPartitionKeys = list;
    }

    public boolean supportsNestedProjection() {
        return false;
    }

    public void applyProjection(int[][] iArr, DataType dataType) {
        this.projectedFields = Arrays.stream(iArr).mapToInt(iArr2 -> {
            return iArr2[0];
        }).toArray();
    }

    public String asSummaryString() {
        return "HiveSource";
    }

    public ChangelogMode getChangelogMode() {
        return ChangelogMode.insertOnly();
    }

    public DynamicTableSource copy() {
        HiveTableSource hiveTableSource = new HiveTableSource(this.jobConf, this.flinkConf, this.tablePath, this.catalogTable);
        hiveTableSource.remainingPartitions = this.remainingPartitions;
        hiveTableSource.projectedFields = this.projectedFields;
        hiveTableSource.limit = this.limit;
        hiveTableSource.dynamicFilterPartitionKeys = this.dynamicFilterPartitionKeys;
        return hiveTableSource;
    }

    public TableStats reportStatistics() {
        try {
            if (!isStreamingSource() && this.flinkConf.get(FileSystemConnectorOptions.SOURCE_REPORT_STATISTICS) == FileSystemConnectorOptions.FileStatisticsType.ALL) {
                HiveSourceBuilder projectedFields = new HiveSourceBuilder(this.jobConf, this.flinkConf, this.tablePath, this.hiveVersion, this.catalogTable).setProjectedFields(this.projectedFields);
                List<HiveTablePartition> allPartitions = HivePartitionUtils.getAllPartitions(this.jobConf, this.hiveVersion, this.tablePath, this.catalogTable.getPartitionKeys(), this.remainingPartitions);
                FileBasedStatisticsReportableInputFormat createDefaultBulkFormat = projectedFields.createDefaultBulkFormat();
                List<HiveSourceSplit> createInputSplits = HiveSourceFileEnumerator.createInputSplits(1, allPartitions, this.jobConf, false);
                if (createInputSplits.size() == 0) {
                    return new TableStats(0L);
                }
                TableStats reportStatistics = createDefaultBulkFormat instanceof FileBasedStatisticsReportableInputFormat ? createDefaultBulkFormat.reportStatistics((List) createInputSplits.stream().map((v0) -> {
                    return v0.path();
                }).collect(Collectors.toList()), this.catalogTable.getSchema().toRowDataType()) : getMapRedInputFormatStatistics(createInputSplits, this.catalogTable.getSchema().toRowDataType());
                return this.limit == null ? reportStatistics : new TableStats(Math.min(this.limit.longValue(), reportStatistics.getRowCount()));
            }
            return TableStats.UNKNOWN;
        } catch (Exception e) {
            LOG.warn("Reporting statistics failed for hive table source: {}", e.getMessage());
            return TableStats.UNKNOWN;
        }
    }

    private TableStats getMapRedInputFormatStatistics(List<HiveSourceSplit> list, DataType dataType) {
        String lowerCase = list.get(0).getHiveTablePartition().getStorageDescriptor().getSerdeInfo().getSerializationLib().toLowerCase();
        List list2 = (List) list.stream().map((v0) -> {
            return v0.path();
        }).collect(Collectors.toList());
        if (lowerCase.contains(ParquetFileFormatFactory.IDENTIFIER)) {
            return ParquetFormatStatisticsReportUtil.getTableStatistics(list2, dataType, this.jobConf, this.hiveVersion.startsWith("3"));
        }
        if (lowerCase.contains(OrcFileFormatFactory.IDENTIFIER)) {
            return OrcFormatStatisticsReportUtil.getTableStatistics(list2, dataType, this.jobConf);
        }
        LOG.info("Now for hive table source, reporting statistics only support Orc and Parquet formats.");
        return TableStats.UNKNOWN;
    }

    @VisibleForTesting
    public JobConf getJobConf() {
        return this.jobConf;
    }
}
