package org.apache.hudi.hive;

import com.uber.hoodie.hadoop.HoodieInputFormat;
import com.uber.hoodie.hadoop.realtime.HoodieRealtimeInputFormat;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.http.cookie.ClientCookie;
import org.apache.hudi.com.beust.jcommander.JCommander;
import org.apache.hudi.common.fs.FSUtils;
import org.apache.hudi.common.model.HoodieFileFormat;
import org.apache.hudi.common.model.HoodieTableType;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.exception.HoodieException;
import org.apache.hudi.exception.InvalidTableException;
import org.apache.hudi.hadoop.utils.HoodieInputFormatUtils;
import org.apache.hudi.hive.util.ConfigUtils;
import org.apache.hudi.hive.util.HiveSchemaUtil;
import org.apache.hudi.hive.util.Parquet2SparkSchemaUtils;
import org.apache.hudi.org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hudi.org.apache.parquet.schema.GroupType;
import org.apache.hudi.org.apache.parquet.schema.MessageType;
import org.apache.hudi.org.apache.parquet.schema.OriginalType;
import org.apache.hudi.org.apache.parquet.schema.PrimitiveType;
import org.apache.hudi.org.apache.parquet.schema.Type;
import org.apache.hudi.sync.common.AbstractSyncHoodieClient;
import org.apache.hudi.sync.common.AbstractSyncTool;
import org.apache.hudi.table.HoodieTableFactory;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;

/* loaded from: input_file:org/apache/hudi/hive/HiveSyncTool.class */
public class HiveSyncTool extends AbstractSyncTool {
    private static final Logger LOG = LogManager.getLogger(HiveSyncTool.class);
    public static final String SUFFIX_SNAPSHOT_TABLE = "_rt";
    public static final String SUFFIX_READ_OPTIMIZED_TABLE = "_ro";
    protected final HiveSyncConfig cfg;
    protected HoodieHiveClient hoodieHiveClient;
    protected String snapshotTableName;
    protected Option<String> roTableName;

    public HiveSyncTool(HiveSyncConfig hiveSyncConfig, HiveConf hiveConf, FileSystem fileSystem) {
        super(hiveConf.getAllProperties(), fileSystem);
        this.hoodieHiveClient = null;
        this.snapshotTableName = null;
        this.roTableName = null;
        try {
            this.hoodieHiveClient = new HoodieHiveClient(hiveSyncConfig, hiveConf, fileSystem);
        } catch (RuntimeException e) {
            if (!hiveSyncConfig.ignoreExceptions.booleanValue()) {
                throw new HoodieHiveSyncException("Got runtime exception when hive syncing", e);
            }
            LOG.error("Got runtime exception when hive syncing, but continuing as ignoreExceptions config is set ", e);
        }
        this.cfg = hiveSyncConfig;
        if (NonPartitionedExtractor.class.getName().equals(hiveSyncConfig.partitionValueExtractorClass)) {
            LOG.warn("Set partitionFields to empty, since the NonPartitionedExtractor is used");
            hiveSyncConfig.partitionFields = new ArrayList();
        }
        if (this.hoodieHiveClient != null) {
            switch (this.hoodieHiveClient.getTableType()) {
                case COPY_ON_WRITE:
                    this.snapshotTableName = hiveSyncConfig.tableName;
                    this.roTableName = Option.empty();
                    return;
                case MERGE_ON_READ:
                    this.snapshotTableName = hiveSyncConfig.tableName + SUFFIX_SNAPSHOT_TABLE;
                    this.roTableName = hiveSyncConfig.skipROSuffix.booleanValue() ? Option.of(hiveSyncConfig.tableName) : Option.of(hiveSyncConfig.tableName + SUFFIX_READ_OPTIMIZED_TABLE);
                    return;
                default:
                    LOG.error("Unknown table type " + this.hoodieHiveClient.getTableType());
                    throw new InvalidTableException(this.hoodieHiveClient.getBasePath());
            }
        }
    }

    @Override // org.apache.hudi.sync.common.AbstractSyncTool
    public void syncHoodieTable() {
        try {
            try {
                if (this.hoodieHiveClient != null) {
                    doSync();
                }
            } catch (RuntimeException e) {
                throw new HoodieException("Got runtime exception when hive syncing " + this.cfg.tableName, e);
            }
        } finally {
            if (this.hoodieHiveClient != null) {
                this.hoodieHiveClient.close();
            }
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public void doSync() {
        switch (this.hoodieHiveClient.getTableType()) {
            case COPY_ON_WRITE:
                syncHoodieTable(this.snapshotTableName, false, false);
                return;
            case MERGE_ON_READ:
                syncHoodieTable(this.roTableName.get(), false, true);
                syncHoodieTable(this.snapshotTableName, true, false);
                return;
            default:
                LOG.error("Unknown table type " + this.hoodieHiveClient.getTableType());
                throw new InvalidTableException(this.hoodieHiveClient.getBasePath());
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public void syncHoodieTable(String str, boolean z, boolean z2) {
        LOG.info("Trying to sync hoodie table " + str + " with base path " + this.hoodieHiveClient.getBasePath() + " of type " + this.hoodieHiveClient.getTableType());
        if (this.cfg.autoCreateDatabase.booleanValue()) {
            try {
                if (!this.hoodieHiveClient.doesDataBaseExist(this.cfg.databaseName)) {
                    this.hoodieHiveClient.createDatabase(this.cfg.databaseName);
                }
            } catch (Exception e) {
                LOG.warn("Unable to create database", e);
            }
        } else if (!this.hoodieHiveClient.doesDataBaseExist(this.cfg.databaseName)) {
            throw new HoodieHiveSyncException("hive database does not exist " + this.cfg.databaseName);
        }
        boolean doesTableExist = this.hoodieHiveClient.doesTableExist(str);
        MessageType dataSchema = this.hoodieHiveClient.getDataSchema();
        if (this.hoodieHiveClient.isBootstrap() && this.hoodieHiveClient.getTableType() == HoodieTableType.MERGE_ON_READ && !z2) {
            this.cfg.syncAsSparkDataSourceTable = false;
        }
        syncSchema(str, doesTableExist, z, z2, dataSchema);
        LOG.info("Schema sync complete. Syncing partitions for " + str);
        Option<String> empty = Option.empty();
        if (doesTableExist) {
            empty = this.hoodieHiveClient.getLastCommitTimeSynced(str);
        }
        LOG.info("Last commit time synced was found to be " + empty.orElse("null"));
        List<String> partitionsWrittenToSince = this.hoodieHiveClient.getPartitionsWrittenToSince(empty);
        LOG.info("Storage partitions scan complete. Found " + partitionsWrittenToSince.size());
        syncPartitions(str, partitionsWrittenToSince);
        this.hoodieHiveClient.updateLastCommitTimeSynced(str);
        LOG.info("Sync complete for " + str);
    }

    private void syncSchema(String str, boolean z, boolean z2, boolean z3, MessageType messageType) {
        Map<String, String> map = ConfigUtils.toMap(this.cfg.tableProperties);
        Map<String, String> map2 = ConfigUtils.toMap(this.cfg.serdeProperties);
        if (this.cfg.syncAsSparkDataSourceTable.booleanValue()) {
            Map<String, String> sparkTableProperties = getSparkTableProperties(this.cfg.sparkSchemaLengthThreshold, messageType);
            Map<String, String> sparkSerdeProperties = getSparkSerdeProperties(z3);
            map.putAll(sparkTableProperties);
            map2.putAll(sparkSerdeProperties);
        }
        if (!z) {
            LOG.info("Hive table " + str + " is not found. Creating it");
            HoodieFileFormat valueOf = HoodieFileFormat.valueOf(this.cfg.baseFileFormat.toUpperCase());
            String inputFormatClassName = HoodieInputFormatUtils.getInputFormatClassName(valueOf, z2);
            if (valueOf.equals(HoodieFileFormat.PARQUET) && this.cfg.usePreApacheInputFormat.booleanValue()) {
                inputFormatClassName = z2 ? HoodieRealtimeInputFormat.class.getName() : HoodieInputFormat.class.getName();
            }
            this.hoodieHiveClient.createTable(str, messageType, inputFormatClassName, HoodieInputFormatUtils.getOutputFormatClassName(valueOf), HoodieInputFormatUtils.getSerDeClassName(valueOf), map2, map);
            return;
        }
        if (HiveSchemaUtil.getSchemaDifference(messageType, this.hoodieHiveClient.getTableSchema(str), this.cfg.partitionFields, this.cfg.supportTimestamp.booleanValue()).isEmpty()) {
            LOG.info("No Schema difference for " + str);
            return;
        }
        LOG.info("Schema difference found for " + str);
        this.hoodieHiveClient.updateTableDefinition(str, messageType);
        if (this.cfg.tableProperties != null) {
            this.hoodieHiveClient.updateTableProperties(str, map);
            LOG.info("Sync table properties for " + str + ", table properties is: " + this.cfg.tableProperties);
        }
    }

    private Map<String, String> getSparkTableProperties(int i, MessageType messageType) {
        GroupType asGroupType = messageType.asGroupType();
        List<String> list = this.cfg.partitionFields;
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        HashMap hashMap = new HashMap();
        for (Type type : asGroupType.getFields()) {
            hashMap.put(type.getName(), type);
        }
        for (String str : list) {
            arrayList.add(hashMap.getOrDefault(str, new PrimitiveType(Type.Repetition.REQUIRED, PrimitiveType.PrimitiveTypeName.BINARY, str, OriginalType.UTF8)));
        }
        for (Type type2 : asGroupType.getFields()) {
            if (!list.contains(type2.getName())) {
                arrayList2.add(type2);
            }
        }
        ArrayList arrayList3 = new ArrayList();
        arrayList3.addAll(arrayList2);
        arrayList3.addAll(arrayList);
        GroupType groupType = new GroupType(asGroupType.getRepetition(), asGroupType.getName(), arrayList3);
        HashMap hashMap2 = new HashMap();
        hashMap2.put("spark.sql.sources.provider", HoodieTableFactory.FACTORY_ID);
        String convertToSparkSchemaJson = Parquet2SparkSchemaUtils.convertToSparkSchemaJson(groupType);
        int length = ((convertToSparkSchemaJson.length() + i) - 1) / i;
        hashMap2.put("spark.sql.sources.schema.numParts", String.valueOf(length));
        for (int i2 = 0; i2 < length; i2++) {
            int i3 = i2 * i;
            hashMap2.put("spark.sql.sources.schema.part." + i2, convertToSparkSchemaJson.substring(i3, Math.min(i3 + i, convertToSparkSchemaJson.length())));
        }
        if (!list.isEmpty()) {
            hashMap2.put("spark.sql.sources.schema.numPartCols", String.valueOf(list.size()));
            for (int i4 = 0; i4 < list.size(); i4++) {
                hashMap2.put("spark.sql.sources.schema.partCol." + i4, list.get(i4));
            }
        }
        return hashMap2;
    }

    private Map<String, String> getSparkSerdeProperties(boolean z) {
        HashMap hashMap = new HashMap();
        hashMap.put(ClientCookie.PATH_ATTR, this.cfg.basePath);
        hashMap.put(ConfigUtils.IS_QUERY_AS_RO_TABLE, String.valueOf(z));
        return hashMap;
    }

    private void syncPartitions(String str, List<String> list) {
        try {
            List<AbstractSyncHoodieClient.PartitionEvent> partitionEvents = this.hoodieHiveClient.getPartitionEvents(this.hoodieHiveClient.scanTablePartitions(str), list);
            List<String> filterPartitions = filterPartitions(partitionEvents, AbstractSyncHoodieClient.PartitionEvent.PartitionEventType.ADD);
            LOG.info("New Partitions " + filterPartitions);
            this.hoodieHiveClient.addPartitionsToTable(str, filterPartitions);
            List<String> filterPartitions2 = filterPartitions(partitionEvents, AbstractSyncHoodieClient.PartitionEvent.PartitionEventType.UPDATE);
            LOG.info("Changed Partitions " + filterPartitions2);
            this.hoodieHiveClient.updatePartitionsToTable(str, filterPartitions2);
        } catch (Exception e) {
            throw new HoodieHiveSyncException("Failed to sync partitions for table " + str, e);
        }
    }

    private List<String> filterPartitions(List<AbstractSyncHoodieClient.PartitionEvent> list, AbstractSyncHoodieClient.PartitionEvent.PartitionEventType partitionEventType) {
        return (List) list.stream().filter(partitionEvent -> {
            return partitionEvent.eventType == partitionEventType;
        }).map(partitionEvent2 -> {
            return partitionEvent2.storagePartition;
        }).collect(Collectors.toList());
    }

    public static void main(String[] strArr) {
        HiveSyncConfig hiveSyncConfig = new HiveSyncConfig();
        JCommander jCommander = new JCommander(hiveSyncConfig, null, strArr);
        if (hiveSyncConfig.help.booleanValue() || strArr.length == 0) {
            jCommander.usage();
            System.exit(1);
        }
        FileSystem fs = FSUtils.getFs(hiveSyncConfig.basePath, new Configuration());
        HiveConf hiveConf = new HiveConf();
        hiveConf.addResource(fs.getConf());
        new HiveSyncTool(hiveSyncConfig, hiveConf, fs).syncHoodieTable();
    }
}
