package org.apache.kylin.source.hive;

import java.io.IOException;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hive.hcatalog.data.HCatRecord;
import org.apache.hive.hcatalog.mapreduce.HCatInputFormat;
import org.apache.kylin.common.KylinConfig;
import org.apache.kylin.common.util.HadoopUtil;
import org.apache.kylin.common.util.HiveCmdBuilder;
import org.apache.kylin.common.util.Pair;
import org.apache.kylin.cube.CubeManager;
import org.apache.kylin.cube.CubeSegment;
import org.apache.kylin.engine.mr.IMRInput;
import org.apache.kylin.engine.mr.JobBuilderSupport;
import org.apache.kylin.engine.mr.steps.CubingExecutableUtil;
import org.apache.kylin.job.JoinedFlatTable;
import org.apache.kylin.job.common.PatternedLogger;
import org.apache.kylin.job.common.ShellExecutable;
import org.apache.kylin.job.constant.ExecutableConstants;
import org.apache.kylin.job.exception.ExecuteException;
import org.apache.kylin.job.execution.AbstractExecutable;
import org.apache.kylin.job.execution.DefaultChainedExecutable;
import org.apache.kylin.job.execution.ExecutableContext;
import org.apache.kylin.job.execution.ExecuteResult;
import org.apache.kylin.metadata.TableMetadataManager;
import org.apache.kylin.metadata.model.DataModelDesc;
import org.apache.kylin.metadata.model.IJoinedFlatTableDesc;
import org.apache.kylin.metadata.model.ISegment;
import org.apache.kylin.metadata.model.JoinTableDesc;
import org.apache.kylin.metadata.model.TableDesc;
import org.apache.kylin.tool.shaded.com.google.common.collect.Sets;
import org.apache.kylin.tool.shaded.org.apache.commons.lang.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.util.AntPathMatcher;

/* loaded from: input_file:org/apache/kylin/source/hive/HiveMRInput.class */
public class HiveMRInput implements IMRInput {
    private static final Logger logger = LoggerFactory.getLogger((Class<?>) HiveMRInput.class);

    /* loaded from: input_file:org/apache/kylin/source/hive/HiveMRInput$BatchCubingInputSide.class */
    public static class BatchCubingInputSide implements IMRInput.IMRBatchCubingInputSide {
        protected final IJoinedFlatTableDesc flatDesc;
        protected final String flatTableDatabase;
        protected final String hdfsWorkingDir;
        String hiveViewIntermediateTables = "";

        public BatchCubingInputSide(IJoinedFlatTableDesc iJoinedFlatTableDesc) {
            KylinConfig instanceFromEnv = KylinConfig.getInstanceFromEnv();
            this.flatDesc = iJoinedFlatTableDesc;
            this.flatTableDatabase = instanceFromEnv.getHiveDatabaseForIntermediateTable();
            this.hdfsWorkingDir = instanceFromEnv.getHdfsWorkingDirectory();
        }

        @Override // org.apache.kylin.engine.mr.IMRInput.IMRBatchCubingInputSide
        public void addStepPhase1_CreateFlatTable(DefaultChainedExecutable defaultChainedExecutable) {
            String cubeName = CubingExecutableUtil.getCubeName(defaultChainedExecutable.getParams());
            KylinConfig config = CubeManager.getInstance(KylinConfig.getInstanceFromEnv()).getCube(cubeName).getConfig();
            String generateHiveInitStatements = JoinedFlatTable.generateHiveInitStatements(this.flatTableDatabase);
            addStepPhase1_DoCreateFlatTable(defaultChainedExecutable);
            if (config.isHiveRedistributeEnabled()) {
                defaultChainedExecutable.addTask(createRedistributeFlatHiveTableStep(generateHiveInitStatements, cubeName));
            }
            addStepPhase1_DoMaterializeLookupTable(defaultChainedExecutable);
        }

        protected void addStepPhase1_DoCreateFlatTable(DefaultChainedExecutable defaultChainedExecutable) {
            defaultChainedExecutable.addTask(createFlatHiveTableStep(JoinedFlatTable.generateHiveInitStatements(this.flatTableDatabase), getJobWorkingDir(defaultChainedExecutable), CubingExecutableUtil.getCubeName(defaultChainedExecutable.getParams())));
        }

        protected void addStepPhase1_DoMaterializeLookupTable(DefaultChainedExecutable defaultChainedExecutable) {
            ShellExecutable createLookupHiveViewMaterializationStep = createLookupHiveViewMaterializationStep(JoinedFlatTable.generateHiveInitStatements(this.flatTableDatabase), getJobWorkingDir(defaultChainedExecutable));
            if (createLookupHiveViewMaterializationStep != null) {
                defaultChainedExecutable.addTask(createLookupHiveViewMaterializationStep);
            }
        }

        /* JADX INFO: Access modifiers changed from: protected */
        public String getJobWorkingDir(DefaultChainedExecutable defaultChainedExecutable) {
            String jobWorkingDir = JobBuilderSupport.getJobWorkingDir(this.hdfsWorkingDir, defaultChainedExecutable.getId());
            if (KylinConfig.getInstanceFromEnv().getHiveTableDirCreateFirst()) {
                checkAndCreateWorkDir(jobWorkingDir);
            }
            return jobWorkingDir;
        }

        private void checkAndCreateWorkDir(String str) {
            try {
                Path path = new Path(str);
                FileSystem fileSystem = HadoopUtil.getFileSystem(path);
                if (!fileSystem.exists(path)) {
                    HiveMRInput.logger.info("Create jobWorkDir : " + str);
                    fileSystem.mkdirs(path);
                }
            } catch (IOException e) {
                HiveMRInput.logger.error("Could not create lookUp table dir : " + str);
            }
        }

        private AbstractExecutable createRedistributeFlatHiveTableStep(String str, String str2) {
            RedistributeFlatHiveTableStep redistributeFlatHiveTableStep = new RedistributeFlatHiveTableStep();
            redistributeFlatHiveTableStep.setInitStatement(str);
            redistributeFlatHiveTableStep.setIntermediateTable(this.flatDesc.getTableName());
            redistributeFlatHiveTableStep.setRedistributeDataStatement(JoinedFlatTable.generateRedistributeFlatTableStatement(this.flatDesc));
            CubingExecutableUtil.setCubeName(str2, redistributeFlatHiveTableStep.getParams());
            redistributeFlatHiveTableStep.setName(ExecutableConstants.STEP_NAME_REDISTRIBUTE_FLAT_HIVE_TABLE);
            return redistributeFlatHiveTableStep;
        }

        private ShellExecutable createLookupHiveViewMaterializationStep(String str, String str2) {
            ShellExecutable shellExecutable = new ShellExecutable();
            shellExecutable.setName(ExecutableConstants.STEP_NAME_MATERIALIZE_HIVE_VIEW_IN_LOOKUP);
            KylinConfig config = ((CubeSegment) this.flatDesc.getSegment()).getConfig();
            TableMetadataManager tableMetadataManager = TableMetadataManager.getInstance(config);
            HashSet<TableDesc> newHashSet = Sets.newHashSet();
            String project = this.flatDesc.getDataModel().getProject();
            for (JoinTableDesc joinTableDesc : this.flatDesc.getDataModel().getJoinTables()) {
                TableDesc tableDesc = tableMetadataManager.getTableDesc(joinTableDesc.getTable(), project);
                if (joinTableDesc.getKind() == DataModelDesc.TableKind.LOOKUP && tableDesc.isView()) {
                    newHashSet.add(tableDesc);
                }
            }
            if (newHashSet.size() == 0) {
                return null;
            }
            HiveCmdBuilder hiveCmdBuilder = new HiveCmdBuilder();
            hiveCmdBuilder.overwriteHiveProps(config.getHiveConfigOverride());
            hiveCmdBuilder.addStatement(str);
            for (TableDesc tableDesc2 : newHashSet) {
                String identity = tableDesc2.getIdentity();
                String materializedName = tableDesc2.getMaterializedName();
                if (tableDesc2.isView()) {
                    StringBuilder sb = new StringBuilder();
                    sb.append("DROP TABLE IF EXISTS " + materializedName + ";\n");
                    sb.append("CREATE EXTERNAL TABLE IF NOT EXISTS " + materializedName + " LIKE " + identity + "\n");
                    sb.append("LOCATION '" + str2 + AntPathMatcher.DEFAULT_PATH_SEPARATOR + materializedName + "';\n");
                    sb.append("ALTER TABLE " + materializedName + " SET TBLPROPERTIES('auto.purge'='true');\n");
                    sb.append("INSERT OVERWRITE TABLE " + materializedName + " SELECT * FROM " + identity + ";\n");
                    hiveCmdBuilder.addStatement(sb.toString());
                    this.hiveViewIntermediateTables += materializedName + ";";
                }
            }
            this.hiveViewIntermediateTables = this.hiveViewIntermediateTables.substring(0, this.hiveViewIntermediateTables.length() - 1);
            shellExecutable.setCmd(hiveCmdBuilder.build());
            return shellExecutable;
        }

        private AbstractExecutable createFlatHiveTableStep(String str, String str2, String str3) {
            String generateDropTableStatement = JoinedFlatTable.generateDropTableStatement(this.flatDesc);
            String generateCreateTableStatement = JoinedFlatTable.generateCreateTableStatement(this.flatDesc, str2);
            String generateInsertDataStatement = JoinedFlatTable.generateInsertDataStatement(this.flatDesc);
            CreateFlatHiveTableStep createFlatHiveTableStep = new CreateFlatHiveTableStep();
            createFlatHiveTableStep.setInitStatement(str);
            createFlatHiveTableStep.setCreateTableStatement(generateDropTableStatement + generateCreateTableStatement + generateInsertDataStatement);
            CubingExecutableUtil.setCubeName(str3, createFlatHiveTableStep.getParams());
            createFlatHiveTableStep.setName(ExecutableConstants.STEP_NAME_CREATE_FLAT_HIVE_TABLE);
            return createFlatHiveTableStep;
        }

        @Override // org.apache.kylin.engine.mr.IMRInput.IMRBatchCubingInputSide
        public void addStepPhase4_Cleanup(DefaultChainedExecutable defaultChainedExecutable) {
            String jobWorkingDir = getJobWorkingDir(defaultChainedExecutable);
            GarbageCollectionStep garbageCollectionStep = new GarbageCollectionStep();
            garbageCollectionStep.setName(ExecutableConstants.STEP_NAME_HIVE_CLEANUP);
            garbageCollectionStep.setIntermediateTableIdentity(getIntermediateTableIdentity());
            garbageCollectionStep.setExternalDataPath(JoinedFlatTable.getTableDir(this.flatDesc, jobWorkingDir));
            garbageCollectionStep.setHiveViewIntermediateTableIdentities(this.hiveViewIntermediateTables);
            defaultChainedExecutable.addTask(garbageCollectionStep);
        }

        @Override // org.apache.kylin.engine.mr.IMRInput.IMRBatchCubingInputSide
        public IMRInput.IMRTableInputFormat getFlatTableInputFormat() {
            return new HiveTableInputFormat(getIntermediateTableIdentity());
        }

        private String getIntermediateTableIdentity() {
            return this.flatTableDatabase + "." + this.flatDesc.getTableName();
        }
    }

    /* loaded from: input_file:org/apache/kylin/source/hive/HiveMRInput$GarbageCollectionStep.class */
    public static class GarbageCollectionStep extends AbstractExecutable {
        private static final Logger logger = LoggerFactory.getLogger((Class<?>) GarbageCollectionStep.class);

        @Override // org.apache.kylin.job.execution.AbstractExecutable
        protected ExecuteResult doWork(ExecutableContext executableContext) throws ExecuteException {
            KylinConfig config = executableContext.getConfig();
            StringBuffer stringBuffer = new StringBuffer();
            try {
                stringBuffer.append(cleanUpIntermediateFlatTable(config));
                return new ExecuteResult(ExecuteResult.State.SUCCEED, stringBuffer.toString());
            } catch (IOException e) {
                logger.error("job:" + getId() + " execute finished with exception", (Throwable) e);
                return ExecuteResult.createError(e);
            }
        }

        private String cleanUpIntermediateFlatTable(KylinConfig kylinConfig) throws IOException {
            StringBuffer stringBuffer = new StringBuffer();
            String intermediateTableIdentity = getIntermediateTableIdentity();
            if (!kylinConfig.isHiveKeepFlatTable() && StringUtils.isNotEmpty(intermediateTableIdentity)) {
                HiveCmdBuilder hiveCmdBuilder = new HiveCmdBuilder();
                hiveCmdBuilder.addStatement("USE " + kylinConfig.getHiveDatabaseForIntermediateTable() + ";");
                hiveCmdBuilder.addStatement("DROP TABLE IF EXISTS  " + intermediateTableIdentity + ";");
                kylinConfig.getCliCommandExecutor().execute(hiveCmdBuilder.build());
                stringBuffer.append("Hive table " + intermediateTableIdentity + " is dropped. \n");
                rmdirOnHDFS(getExternalDataPath());
                stringBuffer.append("Hive table " + intermediateTableIdentity + " external data path " + getExternalDataPath() + " is deleted. \n");
            }
            return stringBuffer.toString();
        }

        private void rmdirOnHDFS(String str) throws IOException {
            Path path = new Path(str);
            FileSystem workingFileSystem = HadoopUtil.getWorkingFileSystem();
            if (workingFileSystem.exists(path)) {
                workingFileSystem.delete(path, true);
            }
        }

        public void setIntermediateTableIdentity(String str) {
            setParam("oldHiveTable", str);
        }

        private String getIntermediateTableIdentity() {
            return getParam("oldHiveTable");
        }

        public void setExternalDataPath(String str) {
            setParam("externalDataPath", str);
        }

        private String getExternalDataPath() {
            return getParam("externalDataPath");
        }

        public void setHiveViewIntermediateTableIdentities(String str) {
            setParam("oldHiveViewIntermediateTables", str);
        }
    }

    /* loaded from: input_file:org/apache/kylin/source/hive/HiveMRInput$HiveTableInputFormat.class */
    public static class HiveTableInputFormat implements IMRInput.IMRTableInputFormat {
        final String dbName;
        final String tableName;

        public HiveTableInputFormat(String str) {
            String[] parseHiveTableName = HadoopUtil.parseHiveTableName(str);
            this.dbName = parseHiveTableName[0];
            this.tableName = parseHiveTableName[1];
        }

        @Override // org.apache.kylin.engine.mr.IMRInput.IMRTableInputFormat
        public void configureJob(Job job) {
            try {
                job.getConfiguration().addResource("hive-site.xml");
                HCatInputFormat.setInput(job, this.dbName, this.tableName);
                job.setInputFormatClass(HCatInputFormat.class);
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
        }

        @Override // org.apache.kylin.engine.mr.IMRInput.IMRTableInputFormat
        public List<String[]> parseMapperInput(Object obj) {
            return Collections.singletonList(HiveTableReader.getRowAsStringArray((HCatRecord) obj));
        }
    }

    /* loaded from: input_file:org/apache/kylin/source/hive/HiveMRInput$RedistributeFlatHiveTableStep.class */
    public static class RedistributeFlatHiveTableStep extends AbstractExecutable {
        private final PatternedLogger stepLogger = new PatternedLogger(logger);

        private long computeRowCount(String str, String str2) throws Exception {
            return HiveClientFactory.getHiveClient().getHiveTableRows(str, str2);
        }

        private long getDataSize(String str, String str2) throws Exception {
            return HiveClientFactory.getHiveClient().getHiveTableMeta(str, str2).fileSize;
        }

        private void redistributeTable(KylinConfig kylinConfig, int i) throws IOException {
            HiveCmdBuilder hiveCmdBuilder = new HiveCmdBuilder();
            hiveCmdBuilder.overwriteHiveProps(kylinConfig.getHiveConfigOverride());
            hiveCmdBuilder.addStatement(getInitStatement());
            hiveCmdBuilder.addStatement("set mapreduce.job.reduces=" + i + ";\n");
            hiveCmdBuilder.addStatement("set hive.merge.mapredfiles=false;\n");
            hiveCmdBuilder.addStatement(getRedistributeDataStatement());
            String hiveCmdBuilder2 = hiveCmdBuilder.toString();
            this.stepLogger.log("Redistribute table, cmd: ");
            this.stepLogger.log(hiveCmdBuilder2);
            Pair<Integer, String> execute = kylinConfig.getCliCommandExecutor().execute(hiveCmdBuilder2, this.stepLogger);
            getManager().addJobInfo(getId(), this.stepLogger.getInfo());
            if (execute.getFirst().intValue() != 0) {
                throw new RuntimeException("Failed to redistribute flat hive table");
            }
        }

        private KylinConfig getCubeSpecificConfig() {
            return CubeManager.getInstance(KylinConfig.getInstanceFromEnv()).getCube(CubingExecutableUtil.getCubeName(getParams())).getConfig();
        }

        @Override // org.apache.kylin.job.execution.AbstractExecutable
        protected ExecuteResult doWork(ExecutableContext executableContext) throws ExecuteException {
            String hiveDatabaseForIntermediateTable;
            String str;
            KylinConfig cubeSpecificConfig = getCubeSpecificConfig();
            String intermediateTable = getIntermediateTable();
            if (intermediateTable.indexOf(".") > 0) {
                hiveDatabaseForIntermediateTable = intermediateTable.substring(0, intermediateTable.indexOf("."));
                str = intermediateTable.substring(intermediateTable.indexOf(".") + 1);
            } else {
                hiveDatabaseForIntermediateTable = cubeSpecificConfig.getHiveDatabaseForIntermediateTable();
                str = intermediateTable;
            }
            try {
                long computeRowCount = computeRowCount(hiveDatabaseForIntermediateTable, str);
                logger.debug("Row count of table '" + intermediateTable + "' is " + computeRowCount);
                if (computeRowCount == 0) {
                    if (cubeSpecificConfig.isEmptySegmentAllowed()) {
                        return new ExecuteResult(ExecuteResult.State.SUCCEED, "Row count is 0, no need to redistribute");
                    }
                    this.stepLogger.log("Detect upstream hive table is empty, fail the job because \"kylin.job.allow-empty-segment\" = \"false\"");
                    return new ExecuteResult(ExecuteResult.State.ERROR, this.stepLogger.getBufferedLog());
                }
                int hadoopJobMapperInputRows = cubeSpecificConfig.getHadoopJobMapperInputRows();
                int min = Math.min(Math.max(1, Math.round(((float) computeRowCount) / hadoopJobMapperInputRows)), cubeSpecificConfig.getHadoopJobMaxReducerNumber());
                this.stepLogger.log("total input rows = " + computeRowCount);
                this.stepLogger.log("expected input rows per mapper = " + hadoopJobMapperInputRows);
                this.stepLogger.log("num reducers for RedistributeFlatHiveTableStep = " + min);
                redistributeTable(cubeSpecificConfig, min);
                getManager().addJobInfo(getId(), ExecutableConstants.HDFS_BYTES_WRITTEN, "" + getDataSize(hiveDatabaseForIntermediateTable, str));
                return new ExecuteResult(ExecuteResult.State.SUCCEED, this.stepLogger.getBufferedLog());
            } catch (Exception e) {
                logger.error("job:" + getId() + " execute finished with exception", (Throwable) e);
                return new ExecuteResult(ExecuteResult.State.ERROR, this.stepLogger.getBufferedLog(), e);
            }
        }

        public void setInitStatement(String str) {
            setParam("HiveInit", str);
        }

        public String getInitStatement() {
            return getParam("HiveInit");
        }

        public void setRedistributeDataStatement(String str) {
            setParam("HiveRedistributeData", str);
        }

        public String getRedistributeDataStatement() {
            return getParam("HiveRedistributeData");
        }

        public String getIntermediateTable() {
            return getParam("intermediateTable");
        }

        public void setIntermediateTable(String str) {
            setParam("intermediateTable", str);
        }
    }

    public static String getTableNameForHCat(TableDesc tableDesc) {
        return String.format("%s.%s", tableDesc.isView() ? KylinConfig.getInstanceFromEnv().getHiveDatabaseForIntermediateTable() : tableDesc.getDatabase(), tableDesc.isView() ? tableDesc.getMaterializedName() : tableDesc.getName()).toUpperCase();
    }

    @Override // org.apache.kylin.engine.mr.IMRInput
    public IMRInput.IMRBatchCubingInputSide getBatchCubingInputSide(IJoinedFlatTableDesc iJoinedFlatTableDesc) {
        return new BatchCubingInputSide(iJoinedFlatTableDesc);
    }

    @Override // org.apache.kylin.engine.mr.IMRInput
    public IMRInput.IMRTableInputFormat getTableInputFormat(TableDesc tableDesc) {
        return new HiveTableInputFormat(getTableNameForHCat(tableDesc));
    }

    @Override // org.apache.kylin.engine.mr.IMRInput
    public IMRInput.IMRBatchMergeInputSide getBatchMergeInputSide(ISegment iSegment) {
        return new IMRInput.IMRBatchMergeInputSide() { // from class: org.apache.kylin.source.hive.HiveMRInput.1
            @Override // org.apache.kylin.engine.mr.IMRInput.IMRBatchMergeInputSide
            public void addStepPhase1_MergeDictionary(DefaultChainedExecutable defaultChainedExecutable) {
            }
        };
    }
}
