package org.apache.hudi.common.table;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.concurrent.ConcurrentHashMap;
import javax.annotation.concurrent.ThreadSafe;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hudi.avro.AvroSchemaUtils;
import org.apache.hudi.avro.HoodieAvroUtils;
import org.apache.hudi.common.model.HoodieCommitMetadata;
import org.apache.hudi.common.model.HoodieFileFormat;
import org.apache.hudi.common.model.HoodieLogFile;
import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.table.log.HoodieLogFormat;
import org.apache.hudi.common.table.log.block.HoodieDataBlock;
import org.apache.hudi.common.table.log.block.HoodieLogBlock;
import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
import org.apache.hudi.common.table.timeline.HoodieInstant;
import org.apache.hudi.common.util.Functions;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.StringUtils;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.exception.HoodieException;
import org.apache.hudi.exception.HoodieIOException;
import org.apache.hudi.exception.HoodieIncompatibleSchemaException;
import org.apache.hudi.exception.InvalidTableException;
import org.apache.hudi.internal.schema.InternalSchema;
import org.apache.hudi.internal.schema.io.FileBasedInternalSchemaStorageManager;
import org.apache.hudi.internal.schema.utils.SerDeHelper;
import org.apache.hudi.io.storage.HoodieHFileReader;
import org.apache.hudi.io.storage.HoodieOrcReader;
import org.apache.hudi.org.apache.avro.JsonProperties;
import org.apache.hudi.org.apache.avro.Schema;
import org.apache.hudi.org.apache.avro.SchemaCompatibility;
import org.apache.hudi.org.apache.hadoop.hbase.io.hfile.CacheConfig;
import org.apache.hudi.org.apache.parquet.avro.AvroSchemaConverter;
import org.apache.hudi.org.apache.parquet.format.converter.ParquetMetadataConverter;
import org.apache.hudi.org.apache.parquet.hadoop.ParquetFileReader;
import org.apache.hudi.org.apache.parquet.schema.MessageType;
import org.apache.hudi.util.Lazy;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;

@ThreadSafe
/* loaded from: input_file:org/apache/hudi/common/table/TableSchemaResolver.class */
public class TableSchemaResolver {
    private static final Logger LOG = LogManager.getLogger(TableSchemaResolver.class);
    private final HoodieTableMetaClient metaClient;
    private volatile HoodieInstant latestCommitWithValidSchema = null;
    private volatile HoodieInstant latestCommitWithValidData = null;
    private final Lazy<ConcurrentHashMap<HoodieInstant, HoodieCommitMetadata>> commitMetadataCache = Lazy.lazily(() -> {
        return new ConcurrentHashMap(2);
    });
    private final Lazy<Boolean> hasOperationField = Lazy.lazily(this::hasOperationField);

    public TableSchemaResolver(HoodieTableMetaClient hoodieTableMetaClient) {
        this.metaClient = hoodieTableMetaClient;
    }

    public Schema getTableAvroSchemaFromDataFile() {
        return convertParquetSchemaToAvro(getTableParquetSchemaFromDataFile());
    }

    public Schema getTableAvroSchema() throws Exception {
        return getTableAvroSchema(this.metaClient.getTableConfig().populateMetaFields());
    }

    public Schema getTableAvroSchema(boolean z) throws Exception {
        return getTableAvroSchemaInternal(z, Option.empty());
    }

    public Schema getTableAvroSchema(HoodieInstant hoodieInstant, boolean z) throws Exception {
        return getTableAvroSchemaInternal(z, Option.of(hoodieInstant));
    }

    public MessageType getTableParquetSchema() throws Exception {
        return convertAvroSchemaToParquet(getTableAvroSchema(true));
    }

    @Deprecated
    public Schema getTableAvroSchemaWithoutMetadataFields() throws Exception {
        return getTableAvroSchema(false);
    }

    private Schema getTableAvroSchemaInternal(boolean z, Option<HoodieInstant> option) {
        Schema orElseGet = (option.isPresent() ? getTableSchemaFromCommitMetadata(option.get(), z) : getTableSchemaFromLatestCommitMetadata(z)).or(() -> {
            return this.metaClient.getTableConfig().getTableCreateSchema().map(schema -> {
                return z ? HoodieAvroUtils.addMetadataFields(schema, this.hasOperationField.get().booleanValue()) : schema;
            });
        }).orElseGet(() -> {
            Schema tableAvroSchemaFromDataFile = getTableAvroSchemaFromDataFile();
            return z ? tableAvroSchemaFromDataFile : HoodieAvroUtils.removeMetadataFields(tableAvroSchemaFromDataFile);
        });
        return this.metaClient.getTableConfig().shouldDropPartitionColumns().booleanValue() ? (Schema) this.metaClient.getTableConfig().getPartitionFields().map(strArr -> {
            return appendPartitionColumns(orElseGet, strArr);
        }).orElse(orElseGet) : orElseGet;
    }

    private Option<Schema> getTableSchemaFromLatestCommitMetadata(boolean z) {
        Option<Pair<HoodieInstant, HoodieCommitMetadata>> latestCommitMetadataWithValidSchema = getLatestCommitMetadataWithValidSchema();
        if (!latestCommitMetadataWithValidSchema.isPresent()) {
            return Option.empty();
        }
        Schema parse = new Schema.Parser().parse(latestCommitMetadataWithValidSchema.get().getRight().getMetadata("schema"));
        if (z) {
            parse = HoodieAvroUtils.addMetadataFields(parse, this.hasOperationField.get().booleanValue());
        }
        return Option.of(parse);
    }

    private Option<Schema> getTableSchemaFromCommitMetadata(HoodieInstant hoodieInstant, boolean z) {
        try {
            String metadata = getCachedCommitMetadata(hoodieInstant).getMetadata("schema");
            if (StringUtils.isNullOrEmpty(metadata)) {
                return Option.empty();
            }
            Schema parse = new Schema.Parser().parse(metadata);
            if (z) {
                parse = HoodieAvroUtils.addMetadataFields(parse, this.hasOperationField.get().booleanValue());
            }
            return Option.of(parse);
        } catch (Exception e) {
            throw new HoodieException("Failed to read schema from commit metadata", e);
        }
    }

    private MessageType getTableParquetSchemaFromDataFile() {
        Option<Pair<HoodieInstant, HoodieCommitMetadata>> latestCommitMetadataWithValidData = getLatestCommitMetadataWithValidData();
        try {
            switch (this.metaClient.getTableType()) {
                case COPY_ON_WRITE:
                case MERGE_ON_READ:
                    if (latestCommitMetadataWithValidData.isPresent()) {
                        return fetchSchemaFromFiles(latestCommitMetadataWithValidData.get().getRight().getFileIdAndFullPaths(this.metaClient.getBasePathV2()).values().iterator());
                    }
                    throw new IllegalArgumentException("Could not find any data file written for commit, so could not get schema for table " + this.metaClient.getBasePath());
                default:
                    LOG.error("Unknown table type " + this.metaClient.getTableType());
                    throw new InvalidTableException(this.metaClient.getBasePath());
            }
        } catch (IOException e) {
            throw new HoodieException("Failed to read data schema", e);
        }
    }

    private Schema convertParquetSchemaToAvro(MessageType messageType) {
        return new AvroSchemaConverter(this.metaClient.getHadoopConf()).convert(messageType);
    }

    private MessageType convertAvroSchemaToParquet(Schema schema) {
        return new AvroSchemaConverter(this.metaClient.getHadoopConf()).convert(schema);
    }

    public static boolean isSchemaCompatible(Schema schema, Schema schema2) {
        if (schema.getType() != schema2.getType() || schema2.getType() != Schema.Type.RECORD) {
            return SchemaCompatibility.checkReaderWriterCompatibility(schema2, schema).getType() == SchemaCompatibility.SchemaCompatibilityType.COMPATIBLE;
        }
        if (!SchemaCompatibility.schemaNameEquals(schema2, schema)) {
            return false;
        }
        for (Schema.Field field : schema.getFields()) {
            Schema.Field lookupWriterField = SchemaCompatibility.lookupWriterField(schema2, field);
            if (lookupWriterField == null || !isSchemaCompatible(field.schema(), lookupWriterField.schema())) {
                return false;
            }
        }
        for (Schema.Field field2 : schema2.getFields()) {
            if (SchemaCompatibility.lookupWriterField(schema, field2) == null && field2.defaultVal() == null) {
                return false;
            }
        }
        return true;
    }

    public static boolean isSchemaCompatible(String str, String str2) {
        return isSchemaCompatible(new Schema.Parser().parse(str), new Schema.Parser().parse(str2));
    }

    public Schema getLatestSchema(Schema schema, boolean z, Functions.Function1<Schema, Schema> function1) {
        Schema schema2 = schema;
        try {
            if (this.metaClient.isTimelineNonEmpty()) {
                Schema tableAvroSchemaWithoutMetadataFields = getTableAvroSchemaWithoutMetadataFields();
                if (z && function1 != null) {
                    tableAvroSchemaWithoutMetadataFields = function1.apply(tableAvroSchemaWithoutMetadataFields);
                }
                if (schema.getFields().size() < tableAvroSchemaWithoutMetadataFields.getFields().size() && isSchemaCompatible(schema, tableAvroSchemaWithoutMetadataFields)) {
                    schema2 = tableAvroSchemaWithoutMetadataFields;
                    LOG.debug("Using latest table schema to rewrite incoming records " + tableAvroSchemaWithoutMetadataFields.toString());
                }
            }
        } catch (IllegalArgumentException | InvalidTableException e) {
            LOG.warn("Could not find any commits, falling back to using incoming batch's write schema");
        } catch (Exception e2) {
            LOG.warn("Unknown exception thrown " + e2.getMessage() + ", Falling back to using incoming batch's write schema");
        }
        return schema2;
    }

    private MessageType readSchemaFromParquetBaseFile(Path path) throws IOException {
        LOG.info("Reading schema from " + path);
        return ParquetFileReader.readFooter(this.metaClient.getRawFs().getConf(), path, ParquetMetadataConverter.NO_FILTER).getFileMetaData().getSchema();
    }

    private MessageType readSchemaFromHFileBaseFile(Path path) throws IOException {
        LOG.info("Reading schema from " + path);
        FileSystem rawFs = this.metaClient.getRawFs();
        return convertAvroSchemaToParquet(new HoodieHFileReader(rawFs.getConf(), path, new CacheConfig(rawFs.getConf())).getSchema());
    }

    private MessageType readSchemaFromORCBaseFile(Path path) throws IOException {
        LOG.info("Reading schema from " + path);
        return convertAvroSchemaToParquet(new HoodieOrcReader(this.metaClient.getRawFs().getConf(), path).getSchema());
    }

    public MessageType readSchemaFromLastCompaction(Option<HoodieInstant> option) throws Exception {
        HoodieActiveTimeline activeTimeline = this.metaClient.getActiveTimeline();
        HoodieInstant orElseThrow = option.orElseThrow(() -> {
            return new Exception("Could not read schema from last compaction, no compaction commits found on path " + this.metaClient);
        });
        return readSchemaFromBaseFile(((HoodieCommitMetadata) HoodieCommitMetadata.fromBytes(activeTimeline.getInstantDetails(orElseThrow).get(), HoodieCommitMetadata.class)).getFileIdAndFullPaths(this.metaClient.getBasePathV2()).values().stream().findAny().orElseThrow(() -> {
            return new IllegalArgumentException("Could not find any data file written for compaction " + orElseThrow + ", could not get schema for table " + this.metaClient.getBasePath());
        }));
    }

    private MessageType readSchemaFromLogFile(Path path) throws IOException {
        return readSchemaFromLogFile(this.metaClient.getRawFs(), path);
    }

    public static MessageType readSchemaFromLogFile(FileSystem fileSystem, Path path) throws IOException {
        HoodieLogFormat.Reader newReader = HoodieLogFormat.newReader(fileSystem, new HoodieLogFile(path), null);
        Throwable th = null;
        try {
            HoodieDataBlock hoodieDataBlock = null;
            while (newReader.hasNext()) {
                HoodieLogBlock next = newReader.next();
                if (next instanceof HoodieDataBlock) {
                    hoodieDataBlock = (HoodieDataBlock) next;
                }
            }
            return hoodieDataBlock != null ? new AvroSchemaConverter().convert(hoodieDataBlock.getSchema()) : null;
        } finally {
            if (newReader != null) {
                if (0 != 0) {
                    try {
                        newReader.close();
                    } catch (Throwable th2) {
                        th.addSuppressed(th2);
                    }
                } else {
                    newReader.close();
                }
            }
        }
    }

    public Option<InternalSchema> getTableInternalSchemaFromCommitMetadata() {
        return this.metaClient.getActiveTimeline().getCommitsTimeline().filterCompletedInstants().lastInstant().flatMap(this::getTableInternalSchemaFromCommitMetadata);
    }

    private Option<InternalSchema> getTableInternalSchemaFromCommitMetadata(HoodieInstant hoodieInstant) {
        try {
            String metadata = getCachedCommitMetadata(hoodieInstant).getMetadata(SerDeHelper.LATEST_SCHEMA);
            return metadata != null ? SerDeHelper.fromJson(metadata) : Option.empty();
        } catch (Exception e) {
            throw new HoodieException("Failed to read schema from commit metadata", e);
        }
    }

    public Option<String> getTableHistorySchemaStrFromCommitMetadata() {
        String historySchemaStr = new FileBasedInternalSchemaStorageManager(this.metaClient).getHistorySchemaStr();
        return historySchemaStr.isEmpty() ? Option.empty() : Option.of(historySchemaStr);
    }

    public boolean hasOperationField() {
        try {
            return getTableAvroSchemaFromDataFile().getField(HoodieRecord.OPERATION_METADATA_FIELD) != null;
        } catch (Exception e) {
            LOG.info(String.format("Failed to read operation field from avro schema (%s)", e.getMessage()));
            return false;
        }
    }

    private Option<Pair<HoodieInstant, HoodieCommitMetadata>> getLatestCommitMetadataWithValidSchema() {
        if (this.latestCommitWithValidSchema == null) {
            Option<Pair<HoodieInstant, HoodieCommitMetadata>> lastCommitMetadataWithValidSchema = this.metaClient.getActiveTimeline().getLastCommitMetadataWithValidSchema();
            if (lastCommitMetadataWithValidSchema.isPresent()) {
                HoodieInstant left = lastCommitMetadataWithValidSchema.get().getLeft();
                HoodieCommitMetadata right = lastCommitMetadataWithValidSchema.get().getRight();
                synchronized (this) {
                    if (this.latestCommitWithValidSchema == null) {
                        this.latestCommitWithValidSchema = left;
                    }
                    this.commitMetadataCache.get().putIfAbsent(left, right);
                }
            }
        }
        return Option.ofNullable(this.latestCommitWithValidSchema).map(hoodieInstant -> {
            return Pair.of(hoodieInstant, this.commitMetadataCache.get().get(hoodieInstant));
        });
    }

    private Option<Pair<HoodieInstant, HoodieCommitMetadata>> getLatestCommitMetadataWithValidData() {
        if (this.latestCommitWithValidData == null) {
            Option<Pair<HoodieInstant, HoodieCommitMetadata>> lastCommitMetadataWithValidData = this.metaClient.getActiveTimeline().getLastCommitMetadataWithValidData();
            if (lastCommitMetadataWithValidData.isPresent()) {
                HoodieInstant left = lastCommitMetadataWithValidData.get().getLeft();
                HoodieCommitMetadata right = lastCommitMetadataWithValidData.get().getRight();
                synchronized (this) {
                    if (this.latestCommitWithValidData == null) {
                        this.latestCommitWithValidData = left;
                    }
                    this.commitMetadataCache.get().putIfAbsent(left, right);
                }
            }
        }
        return Option.ofNullable(this.latestCommitWithValidData).map(hoodieInstant -> {
            return Pair.of(hoodieInstant, this.commitMetadataCache.get().get(hoodieInstant));
        });
    }

    private HoodieCommitMetadata getCachedCommitMetadata(HoodieInstant hoodieInstant) {
        return this.commitMetadataCache.get().computeIfAbsent(hoodieInstant, hoodieInstant2 -> {
            try {
                return (HoodieCommitMetadata) HoodieCommitMetadata.fromBytes(this.metaClient.getActiveTimeline().getCommitsTimeline().filterCompletedInstants().getInstantDetails(hoodieInstant2).get(), HoodieCommitMetadata.class);
            } catch (IOException e) {
                throw new HoodieIOException(String.format("Failed to fetch HoodieCommitMetadata for instant (%s)", hoodieInstant2), e);
            }
        });
    }

    private MessageType fetchSchemaFromFiles(Iterator<String> it) throws IOException {
        MessageType messageType;
        MessageType messageType2 = null;
        while (true) {
            messageType = messageType2;
            if (!it.hasNext() || messageType != null) {
                break;
            }
            String next = it.next();
            messageType2 = next.contains(HoodieFileFormat.HOODIE_LOG.getFileExtension()) ? readSchemaFromLogFile(new Path(next)) : readSchemaFromBaseFile(next);
        }
        return messageType;
    }

    private MessageType readSchemaFromBaseFile(String str) throws IOException {
        if (str.contains(HoodieFileFormat.PARQUET.getFileExtension())) {
            return readSchemaFromParquetBaseFile(new Path(str));
        }
        if (str.contains(HoodieFileFormat.HFILE.getFileExtension())) {
            return readSchemaFromHFileBaseFile(new Path(str));
        }
        if (str.contains(HoodieFileFormat.ORC.getFileExtension())) {
            return readSchemaFromORCBaseFile(new Path(str));
        }
        throw new IllegalArgumentException("Unknown base file format :" + str);
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public static Schema appendPartitionColumns(Schema schema, String[] strArr) {
        if (strArr.length == 0) {
            return schema;
        }
        boolean anyMatch = Arrays.stream(strArr).anyMatch(str -> {
            return !AvroSchemaUtils.containsFieldInSchema(schema, str);
        });
        boolean anyMatch2 = Arrays.stream(strArr).anyMatch(str2 -> {
            return AvroSchemaUtils.containsFieldInSchema(schema, str2);
        });
        if (anyMatch && anyMatch2) {
            throw new HoodieIncompatibleSchemaException("Partition columns could not be partially contained w/in the data schema");
        }
        if (!anyMatch) {
            return schema;
        }
        ArrayList arrayList = new ArrayList();
        for (String str3 : strArr) {
            arrayList.add(new Schema.Field(str3, AvroSchemaUtils.createNullableSchema(Schema.Type.STRING), "", JsonProperties.NULL_VALUE));
        }
        return AvroSchemaUtils.appendFieldsToSchema(schema, arrayList);
    }
}
