package org.apache.gobblin.data.management.conversion.hive.converter;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Optional;
import com.google.common.base.Preconditions;
import com.google.common.base.Splitter;
import com.google.common.base.Throwables;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Random;
import org.apache.avro.Schema;
import org.apache.commons.lang3.StringUtils;
import org.apache.gobblin.configuration.WorkUnitState;
import org.apache.gobblin.converter.Converter;
import org.apache.gobblin.converter.DataConversionException;
import org.apache.gobblin.converter.SingleRecordIterable;
import org.apache.gobblin.data.management.conversion.hive.dataset.ConvertibleHiveDataset;
import org.apache.gobblin.data.management.conversion.hive.entities.HiveProcessingEntity;
import org.apache.gobblin.data.management.conversion.hive.entities.QueryBasedHiveConversionEntity;
import org.apache.gobblin.data.management.conversion.hive.entities.QueryBasedHivePublishEntity;
import org.apache.gobblin.data.management.conversion.hive.events.EventWorkunitUtils;
import org.apache.gobblin.data.management.conversion.hive.query.HiveAvroORCQueryGenerator;
import org.apache.gobblin.data.management.conversion.hive.source.HiveSource;
import org.apache.gobblin.data.management.conversion.hive.task.HiveConverterUtils;
import org.apache.gobblin.data.management.copy.hive.HiveDatasetFinder;
import org.apache.gobblin.data.management.copy.hive.WhitelistBlacklist;
import org.apache.gobblin.hive.HiveMetastoreClientPool;
import org.apache.gobblin.util.AutoReturnableObject;
import org.apache.gobblin.util.HadoopUtils;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.hive.metastore.IMetaStoreClient;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
import org.apache.hadoop.hive.metastore.api.Table;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.metadata.Partition;
import org.apache.thrift.TException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/apache/gobblin/data/management/conversion/hive/converter/AbstractAvroToOrcConverter.class */
public abstract class AbstractAvroToOrcConverter extends Converter<Schema, Schema, QueryBasedHiveConversionEntity, QueryBasedHiveConversionEntity> {
    private static final Logger log = LoggerFactory.getLogger(AbstractAvroToOrcConverter.class);
    private static final String PUBLISHED_TABLE_SUBDIRECTORY = "final";
    private static final String ORC_FORMAT = "orc";
    private static final String GOBBLIN_DATASET_URN_KEY = "gobblin.datasetUrn";
    private static final String GOBBLIN_PARTITION_NAME_KEY = "gobblin.partitionName";
    private static final String GOBBLIN_WORKUNIT_CREATE_TIME_KEY = "gobblin.workunitCreateTime";
    private static final String HIVE_PARTITIONS_INFO = "/";
    private static final String HIVE_PARTITIONS_TYPE = ":";
    protected final FileSystem fs;
    public static final String REPLACED_PARTITIONS_HIVE_METASTORE_KEY = "gobblin.replaced.partitions";
    protected ConvertibleHiveDataset hiveDataset;
    public static final String HIVE_DATASET_DESTINATION_SKIP_SETGROUP = "hive.dataset.destination.skip.setGroup";
    public static final boolean DEFAULT_HIVE_DATASET_DESTINATION_SKIP_SETGROUP = false;
    public static final String HIVE_DATASET_DESTINATION_GROUP_NAME = "hive.dataset.destination.groupName";
    public static final String HIVE_DATASET_STAGING_GROUP_NAME = "hive.dataset.staging.groupName";
    public static final String HIVE_CONVERSION_SETSERDETOAVROEXPLICITELY = "hive.conversion.setSerdeToAvroExplicitly";
    public static final boolean DEFAULT_HIVE_CONVERSION_SETSERDETOAVROEXPLICITELY = true;
    public static final String HIVE_CONVERSION_VIEW_REGISTRATION_WHITELIST = "hive.conversion.view.registration.whitelist";
    public static final String HIVE_CONVERSION_VIEW_REGISTRATION_BLACKLIST = "hive.conversion.view.registration.blacklist";

    /* loaded from: input_file:org/apache/gobblin/data/management/conversion/hive/converter/AbstractAvroToOrcConverter$OrcFormats.class */
    protected enum OrcFormats {
        FLATTENED_ORC("flattenedOrc"),
        NESTED_ORC("nestedOrc");

        private final String configPrefix;

        OrcFormats(String str) {
            this.configPrefix = str;
        }

        public String getConfigPrefix() {
            return this.configPrefix;
        }
    }

    @Override // 
    public abstract Schema convertSchema(Schema schema, WorkUnitState workUnitState);

    protected abstract boolean hasConversionConfig();

    protected abstract ConvertibleHiveDataset.ConversionConfig getConversionConfig();

    public AbstractAvroToOrcConverter() {
        try {
            this.fs = FileSystem.get(HadoopUtils.newConfiguration());
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    public Iterable<QueryBasedHiveConversionEntity> convertRecord(Schema schema, QueryBasedHiveConversionEntity queryBasedHiveConversionEntity, WorkUnitState workUnitState) throws DataConversionException {
        FileStatus fileStatus;
        FsPermission permission;
        Preconditions.checkNotNull(schema, "Avro schema must not be null");
        Preconditions.checkNotNull(queryBasedHiveConversionEntity, "Conversion entity must not be null");
        Preconditions.checkNotNull(workUnitState, "Workunit state must not be null");
        Preconditions.checkNotNull(queryBasedHiveConversionEntity.getTable(), "Hive table within conversion entity must not be null");
        EventWorkunitUtils.setBeginDDLBuildTimeMetadata(workUnitState, System.currentTimeMillis());
        this.hiveDataset = queryBasedHiveConversionEntity.getConvertibleHiveDataset();
        if (!hasConversionConfig()) {
            return new SingleRecordIterable(queryBasedHiveConversionEntity);
        }
        String tableName = queryBasedHiveConversionEntity.getTable().getTableName();
        String destinationTableName = getConversionConfig().getDestinationTableName();
        String orcStagingTableName = getOrcStagingTableName(getConversionConfig().getDestinationStagingTableName());
        String destinationDbName = getConversionConfig().getDestinationDbName();
        String orcDataLocation = getOrcDataLocation();
        String orcStagingDataLocation = getOrcStagingDataLocation(orcStagingTableName);
        boolean isEvolutionEnabled = getConversionConfig().isEvolutionEnabled();
        Optional<Table> optional = (Optional) HiveConverterUtils.getDestinationTableMeta(destinationDbName, destinationTableName, workUnitState.getProperties()).getLeft();
        Optional<WhitelistBlacklist> viewWhiteBackListFromWorkUnit = getViewWhiteBackListFromWorkUnit(workUnitState);
        Optional<String> destinationViewName = viewWhiteBackListFromWorkUnit.isPresent() ? ((WhitelistBlacklist) viewWhiteBackListFromWorkUnit.get()).acceptTable(destinationDbName, destinationTableName) ? getConversionConfig().getDestinationViewName() : Optional.absent() : getConversionConfig().getDestinationViewName();
        boolean isUpdateViewAlwaysEnabled = getConversionConfig().isUpdateViewAlwaysEnabled();
        Optional absent = getConversionConfig().getClusterBy().isEmpty() ? Optional.absent() : Optional.of(getConversionConfig().getClusterBy());
        Optional<Integer> numBuckets = getConversionConfig().getNumBuckets();
        Optional<Integer> rowLimit = getConversionConfig().getRowLimit();
        Properties destinationTableProperties = getConversionConfig().getDestinationTableProperties();
        List<String> sourceDataPathIdentifier = getConversionConfig().getSourceDataPathIdentifier();
        HashMap newHashMap = Maps.newHashMap();
        HashMap newHashMap2 = Maps.newHashMap();
        HiveConverterUtils.populatePartitionInfo(queryBasedHiveConversionEntity, newHashMap, newHashMap2);
        try {
            fileStatus = this.fs.getFileStatus(queryBasedHiveConversionEntity.getTable().getDataLocation());
            permission = fileStatus.getPermission();
        } catch (IOException e) {
            Throwables.propagate(e);
        }
        if (!this.fs.mkdirs(new Path(getConversionConfig().getDestinationDataPath()), permission)) {
            throw new RuntimeException(String.format("Failed to create path %s with permissions %s", new Path(getConversionConfig().getDestinationDataPath()), permission));
        }
        this.fs.setPermission(new Path(getConversionConfig().getDestinationDataPath()), permission);
        String prop = workUnitState.contains(HIVE_DATASET_DESTINATION_GROUP_NAME) ? workUnitState.getProp(HIVE_DATASET_DESTINATION_GROUP_NAME) : fileStatus.getGroup();
        if (!workUnitState.getPropAsBoolean("hive.dataset.destination.skip.setGroup", false)) {
            this.fs.setOwner(new Path(getConversionConfig().getDestinationDataPath()), (String) null, prop);
        }
        log.info(String.format("Created %s with permissions %s and group %s", new Path(getConversionConfig().getDestinationDataPath()), permission, fileStatus.getGroup()));
        if (workUnitState.contains(HIVE_DATASET_STAGING_GROUP_NAME)) {
            String prop2 = workUnitState.getProp(HIVE_DATASET_STAGING_GROUP_NAME);
            log.info("Setting staging directory group name as " + prop2);
            this.fs.mkdirs(new Path(getOrcStagingDataLocation(orcStagingTableName)));
            this.fs.setOwner(new Path(getOrcStagingDataLocation(orcStagingTableName)), (String) null, prop2);
            this.fs.mkdirs(new Path(getOrcDataLocation()));
            this.fs.setOwner(new Path(getOrcDataLocation()), (String) null, prop2);
        }
        for (Map.Entry entry : getConversionConfig().getHiveRuntimeProperties().entrySet()) {
            queryBasedHiveConversionEntity.getQueries().add(String.format("SET %s=%s", entry.getKey(), entry.getValue()));
        }
        queryBasedHiveConversionEntity.getQueries().add(String.format("SET %s=%s", GOBBLIN_DATASET_URN_KEY, queryBasedHiveConversionEntity.getTable().getCompleteName()));
        if (queryBasedHiveConversionEntity.getPartition().isPresent()) {
            queryBasedHiveConversionEntity.getQueries().add(String.format("SET %s=%s", GOBBLIN_PARTITION_NAME_KEY, ((Partition) queryBasedHiveConversionEntity.getPartition().get()).getCompleteName()));
        }
        queryBasedHiveConversionEntity.getQueries().add(String.format("SET %s=%s", GOBBLIN_WORKUNIT_CREATE_TIME_KEY, workUnitState.getWorkunit().getProp("event.sla.originTimestamp")));
        LinkedHashMap linkedHashMap = new LinkedHashMap();
        String generateCreateTableDDL = HiveAvroORCQueryGenerator.generateCreateTableDDL(schema, orcStagingTableName, orcStagingDataLocation, Optional.of(destinationDbName), Optional.of(newHashMap), absent, Optional.absent(), numBuckets, Optional.absent(), Optional.absent(), Optional.absent(), destinationTableProperties, isEvolutionEnabled, optional, linkedHashMap);
        queryBasedHiveConversionEntity.getQueries().add(generateCreateTableDDL);
        log.debug("Create staging table DDL: " + generateCreateTableDDL);
        String stagingDataPartitionDirName = HiveConverterUtils.getStagingDataPartitionDirName(queryBasedHiveConversionEntity, sourceDataPathIdentifier);
        String str = orcStagingDataLocation + HIVE_PARTITIONS_INFO + stagingDataPartitionDirName;
        if (newHashMap2.size() > 0) {
            List<String> generateCreatePartitionDDL = HiveAvroORCQueryGenerator.generateCreatePartitionDDL(destinationDbName, orcStagingTableName, str, newHashMap2);
            queryBasedHiveConversionEntity.getQueries().addAll(generateCreatePartitionDDL);
            log.debug("Create staging partition DDL: " + generateCreatePartitionDDL);
        }
        String generateTableMappingDML = HiveAvroORCQueryGenerator.generateTableMappingDML(queryBasedHiveConversionEntity.getHiveTable().getAvroSchema(), schema, tableName, orcStagingTableName, Optional.of(queryBasedHiveConversionEntity.getTable().getDbName()), Optional.of(destinationDbName), Optional.of(newHashMap2), Optional.absent(), Optional.absent(), isEvolutionEnabled, optional, rowLimit);
        queryBasedHiveConversionEntity.getQueries().add(generateTableMappingDML);
        log.debug("Conversion staging DML: " + generateTableMappingDML);
        QueryBasedHivePublishEntity queryBasedHivePublishEntity = new QueryBasedHivePublishEntity();
        List<String> publishQueries = queryBasedHivePublishEntity.getPublishQueries();
        Map<String, String> publishDirectories = queryBasedHivePublishEntity.getPublishDirectories();
        List<String> cleanupQueries = queryBasedHivePublishEntity.getCleanupQueries();
        List<String> cleanupDirectories = queryBasedHivePublishEntity.getCleanupDirectories();
        if (!optional.isPresent()) {
            String generateCreateTableDDL2 = HiveAvroORCQueryGenerator.generateCreateTableDDL(schema, destinationTableName, orcDataLocation, Optional.of(destinationDbName), Optional.of(newHashMap), absent, Optional.absent(), numBuckets, Optional.absent(), Optional.absent(), Optional.absent(), destinationTableProperties, isEvolutionEnabled, optional, new HashMap());
            publishQueries.add(generateCreateTableDDL2);
            log.debug("Create final table DDL: " + generateCreateTableDDL2);
        }
        List<String> generateEvolutionDDL = HiveAvroORCQueryGenerator.generateEvolutionDDL(orcStagingTableName, destinationTableName, Optional.of(destinationDbName), Optional.of(destinationDbName), schema, isEvolutionEnabled, linkedHashMap, optional);
        log.debug("Evolve final table DDLs: " + generateEvolutionDDL);
        EventWorkunitUtils.setEvolutionMetadata(workUnitState, generateEvolutionDDL);
        boolean z = isUpdateViewAlwaysEnabled | (generateEvolutionDDL.size() > 0);
        publishQueries.addAll(generateEvolutionDDL);
        if (newHashMap.size() == 0) {
            log.info("Snapshot directory to move: " + orcStagingDataLocation + " to: " + orcDataLocation);
            publishDirectories.put(orcStagingDataLocation, orcDataLocation);
            String generateDropTableDDL = HiveAvroORCQueryGenerator.generateDropTableDDL(destinationDbName, orcStagingTableName);
            log.debug("Drop staging table DDL: " + generateDropTableDDL);
            cleanupQueries.add(generateDropTableDDL);
            log.info("Staging table directory to delete: " + orcStagingDataLocation);
            cleanupDirectories.add(orcStagingDataLocation);
        } else {
            String updatePartitionLocation = HiveConverterUtils.updatePartitionLocation(orcDataLocation + HIVE_PARTITIONS_INFO + stagingDataPartitionDirName, workUnitState, getDestinationPartitionLocation(optional, workUnitState, ((Partition) queryBasedHiveConversionEntity.getPartition().get()).getName()));
            log.info("Partition directory to move: " + str + " to: " + updatePartitionLocation);
            publishDirectories.put(str, updatePartitionLocation);
            List<String> generateDropPartitionsDDL = HiveAvroORCQueryGenerator.generateDropPartitionsDDL(destinationDbName, destinationTableName, newHashMap2);
            log.debug("Drop partitions if exist in final table: " + generateDropPartitionsDDL);
            publishQueries.addAll(generateDropPartitionsDDL);
            if (workUnitState.getPropAsBoolean(HIVE_CONVERSION_SETSERDETOAVROEXPLICITELY, true)) {
                List<String> generateCreatePartitionDDL2 = HiveAvroORCQueryGenerator.generateCreatePartitionDDL(destinationDbName, destinationTableName, updatePartitionLocation, newHashMap2, Optional.absent());
                log.debug("Create final partition DDL: " + generateCreatePartitionDDL2);
                publishQueries.addAll(generateCreatePartitionDDL2);
                List<String> generateAlterTableOrPartitionStorageFormatDDL = HiveAvroORCQueryGenerator.generateAlterTableOrPartitionStorageFormatDDL(destinationDbName, destinationTableName, Optional.of(newHashMap2), ORC_FORMAT);
                log.debug("Update final partition storage format to ORC (if not already in ORC)");
                publishQueries.addAll(generateAlterTableOrPartitionStorageFormatDDL);
            } else {
                List<String> generateCreatePartitionDDL3 = HiveAvroORCQueryGenerator.generateCreatePartitionDDL(destinationDbName, destinationTableName, updatePartitionLocation, newHashMap2, Optional.fromNullable(ORC_FORMAT));
                log.debug("Create final partition DDL: " + generateCreatePartitionDDL3);
                publishQueries.addAll(generateCreatePartitionDDL3);
            }
            String generateDropTableDDL2 = HiveAvroORCQueryGenerator.generateDropTableDDL(destinationDbName, orcStagingTableName);
            log.debug("Drop staging table DDL: " + generateDropTableDDL2);
            cleanupQueries.add(generateDropTableDDL2);
            log.info("Staging table directory to delete: " + orcStagingDataLocation);
            cleanupDirectories.add(orcStagingDataLocation);
        }
        publishQueries.addAll(HiveAvroORCQueryGenerator.generateDropPartitionsDDL(destinationDbName, destinationTableName, getDropPartitionsDDLInfo(queryBasedHiveConversionEntity)));
        if (destinationViewName.isPresent()) {
            List<String> generateCreateOrUpdateViewDDL = HiveAvroORCQueryGenerator.generateCreateOrUpdateViewDDL(destinationDbName, destinationTableName, destinationDbName, (String) destinationViewName.get(), z);
            log.debug("Create or update View DDLs: " + generateCreateOrUpdateViewDDL);
            publishQueries.addAll(generateCreateOrUpdateViewDDL);
        }
        HiveAvroORCQueryGenerator.serializePublishCommands(workUnitState, queryBasedHivePublishEntity);
        log.debug("Publish partition entity: " + queryBasedHivePublishEntity);
        log.debug("Conversion Query " + queryBasedHiveConversionEntity.getQueries());
        EventWorkunitUtils.setEndDDLBuildTimeMetadata(workUnitState, System.currentTimeMillis());
        return new SingleRecordIterable(queryBasedHiveConversionEntity);
    }

    @VisibleForTesting
    public static Optional<WhitelistBlacklist> getViewWhiteBackListFromWorkUnit(WorkUnitState workUnitState) {
        Optional<WhitelistBlacklist> absent = Optional.absent();
        if (workUnitState == null) {
            return absent;
        }
        if (workUnitState.contains(HIVE_CONVERSION_VIEW_REGISTRATION_WHITELIST) || workUnitState.contains(HIVE_CONVERSION_VIEW_REGISTRATION_BLACKLIST)) {
            try {
                absent = Optional.of(new WhitelistBlacklist(workUnitState.getProp(HIVE_CONVERSION_VIEW_REGISTRATION_WHITELIST, HiveSource.DEFAULT_HIVE_SOURCE_IGNORE_DATA_PATH_IDENTIFIER), workUnitState.getProp(HIVE_CONVERSION_VIEW_REGISTRATION_BLACKLIST, HiveSource.DEFAULT_HIVE_SOURCE_IGNORE_DATA_PATH_IDENTIFIER)));
            } catch (IOException e) {
                Throwables.propagate(e);
            }
        }
        return absent;
    }

    private String getOrcStagingTableName(String str) {
        return str + "_" + String.format("%s%s", Long.valueOf(System.currentTimeMillis()), Integer.valueOf(new Random().nextInt(10)));
    }

    private String getOrcDataLocation() {
        return getConversionConfig().getDestinationDataPath() + HIVE_PARTITIONS_INFO + PUBLISHED_TABLE_SUBDIRECTORY;
    }

    private String getOrcStagingDataLocation(String str) {
        return getConversionConfig().getDestinationDataPath() + HIVE_PARTITIONS_INFO + str;
    }

    @VisibleForTesting
    public static List<Map<String, String>> getDropPartitionsDDLInfo(HiveProcessingEntity hiveProcessingEntity) {
        return !hiveProcessingEntity.getPartition().isPresent() ? Collections.emptyList() : getDropPartitionsDDLInfo((Partition) hiveProcessingEntity.getPartition().get());
    }

    public static List<Map<String, String>> getDropPartitionsDDLInfo(Partition partition) {
        ArrayList newArrayList = Lists.newArrayList();
        List partitionKeys = partition.getTable().getPartitionKeys();
        if (StringUtils.isNotBlank((CharSequence) partition.getParameters().get(REPLACED_PARTITIONS_HIVE_METASTORE_KEY))) {
            Iterator it = Splitter.on("|").omitEmptyStrings().split((CharSequence) partition.getParameters().get(REPLACED_PARTITIONS_HIVE_METASTORE_KEY)).iterator();
            while (it.hasNext()) {
                List splitToList = Splitter.on(",").omitEmptyStrings().trimResults().splitToList((String) it.next());
                if (!splitToList.equals(partition.getValues())) {
                    ImmutableMap.Builder builder = ImmutableMap.builder();
                    for (int i = 0; i < partitionKeys.size(); i++) {
                        builder.put(((FieldSchema) partitionKeys.get(i)).getName(), splitToList.get(i));
                    }
                    newArrayList.add(builder.build());
                }
            }
        }
        return newArrayList;
    }

    private Optional<Path> getDestinationPartitionLocation(Optional<Table> optional, WorkUnitState workUnitState, String str) throws DataConversionException {
        Optional.absent();
        if (!optional.isPresent()) {
            return Optional.absent();
        }
        try {
            try {
                AutoReturnableObject client = HiveMetastoreClientPool.get(workUnitState.getJobState().getProperties(), Optional.fromNullable(workUnitState.getJobState().getProp(HiveDatasetFinder.HIVE_METASTORE_URI_KEY))).getClient();
                Throwable th = null;
                try {
                    try {
                        Optional of = Optional.of(((IMetaStoreClient) client.get()).getPartition(((Table) optional.get()).getDbName(), ((Table) optional.get()).getTableName(), str));
                        if (client != null) {
                            if (0 != 0) {
                                try {
                                    client.close();
                                } catch (Throwable th2) {
                                    th.addSuppressed(th2);
                                }
                            } else {
                                client.close();
                            }
                        }
                        return of.isPresent() ? Optional.of(new Partition(new org.apache.hadoop.hive.ql.metadata.Table((Table) optional.get()), (org.apache.hadoop.hive.metastore.api.Partition) of.get()).getDataLocation()) : Optional.absent();
                    } finally {
                    }
                } catch (Throwable th3) {
                    if (client != null) {
                        if (th != null) {
                            try {
                                client.close();
                            } catch (Throwable th4) {
                                th.addSuppressed(th4);
                            }
                        } else {
                            client.close();
                        }
                    }
                    throw th3;
                }
            } catch (NoSuchObjectException e) {
                return Optional.absent();
            }
        } catch (IOException | TException | HiveException e2) {
            throw new DataConversionException("Could not fetch destination table metadata", e2);
        }
    }
}
