package org.apache.kylin.engine.spark;

import org.apache.kylin.common.util.StringUtil;
import org.apache.kylin.cube.CubeSegment;
import org.apache.kylin.cube.cuboid.CuboidModeEnum;
import org.apache.kylin.engine.mr.CubingJob;
import org.apache.kylin.engine.mr.JobBuilderSupport;
import org.apache.kylin.engine.mr.steps.CopyDictionaryStep;
import org.apache.kylin.engine.mr.steps.CubingExecutableUtil;
import org.apache.kylin.engine.mr.steps.MergeStatisticsWithOldStep;
import org.apache.kylin.engine.mr.steps.UpdateCubeInfoAfterOptimizeStep;
import org.apache.kylin.engine.spark.ISparkOutput;
import org.apache.kylin.job.constant.ExecutableConstants;
import org.apache.kylin.job.execution.AbstractExecutable;
import org.apache.kylin.shaded.com.google.common.base.Preconditions;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/apache/kylin/engine/spark/SparkBatchOptimizeJobBuilder2.class */
public class SparkBatchOptimizeJobBuilder2 extends JobBuilderSupport {
    private static final Logger logger = LoggerFactory.getLogger((Class<?>) SparkBatchOptimizeJobBuilder2.class);
    private final ISparkOutput.ISparkBatchOptimizeOutputSide outputSide;

    public SparkBatchOptimizeJobBuilder2(CubeSegment cubeSegment, String str) {
        super(cubeSegment, str);
        this.outputSide = SparkUtil.getBatchOptimizeOutputSide2(this.seg);
    }

    public CubingJob build() {
        logger.info("Spark new job to Optimize segment " + this.seg);
        CubingJob createOptimizeJob = CubingJob.createOptimizeJob(this.seg, this.submitter, this.config);
        String id = createOptimizeJob.getId();
        String cuboidRootPath = getCuboidRootPath(id);
        String optimizationCuboidPath = getOptimizationCuboidPath(id);
        CubeSegment originalSegmentToOptimize = this.seg.getCubeInstance().getOriginalSegmentToOptimize(this.seg);
        Preconditions.checkNotNull(originalSegmentToOptimize, "cannot find the original segment to be optimized by " + this.seg);
        createOptimizeJob.addTask(createFilterRecommendCuboidDataStep(getCuboidRootPath(originalSegmentToOptimize) + "*", optimizationCuboidPath, id));
        createOptimizeJob.addTask(createCopyDictionaryStep());
        String baseCuboidPath = getBaseCuboidPath(optimizationCuboidPath);
        String optimizationStatisticsPath = getOptimizationStatisticsPath(id);
        if (this.seg.getConfig().isUseSparkCalculateStatsEnable()) {
            createOptimizeJob.addTask(createCalculateStatsFromBaseCuboidStepWithSpark(baseCuboidPath, optimizationStatisticsPath, CuboidModeEnum.RECOMMEND_MISSING, id));
        } else {
            createOptimizeJob.addTask(createCalculateStatsFromBaseCuboid(baseCuboidPath, optimizationStatisticsPath, CuboidModeEnum.RECOMMEND_MISSING));
        }
        createOptimizeJob.addTask(createMergeStatisticsWithOldStep(id, optimizationStatisticsPath, getStatisticsPath(id)));
        this.outputSide.addStepPhase2_CreateHTable(createOptimizeJob);
        createOptimizeJob.addTask(createUpdateShardForOldCuboidDataStep(optimizationCuboidPath, cuboidRootPath, id));
        addLayerCubingSteps(createOptimizeJob, id, CuboidModeEnum.RECOMMEND_MISSING_WITH_BASE, SparkUtil.generateFilePath(JobBuilderSupport.PathNameCuboidBase, cuboidRootPath), cuboidRootPath);
        this.outputSide.addStepPhase3_BuildCube(createOptimizeJob);
        createOptimizeJob.addTask(createUpdateCubeInfoAfterOptimizeStep(id));
        this.outputSide.addStepPhase4_Cleanup(createOptimizeJob);
        return createOptimizeJob;
    }

    private SparkExecutable createCalculateStatsFromBaseCuboidStepWithSpark(String str, String str2, CuboidModeEnum cuboidModeEnum, String str3) {
        SparkExecutable instance = SparkExecutableFactory.instance(this.seg.getConfig());
        instance.setName(ExecutableConstants.STEP_NAME_CALCULATE_STATS_FROM_BASECUBOID_SPARK);
        instance.setParam(SparkCalculateStatsFromBaseCuboidJob.OPTION_CUBE_NAME.getOpt(), this.seg.getRealization().getName());
        instance.setParam(SparkCalculateStatsFromBaseCuboidJob.OPTION_SEGMENT_ID.getOpt(), this.seg.getUuid());
        instance.setParam(SparkCalculateStatsFromBaseCuboidJob.OPTION_INPUT_PATH.getOpt(), str);
        instance.setParam(SparkCalculateStatsFromBaseCuboidJob.OPTION_OUTPUT_PATH.getOpt(), str2);
        instance.setParam(SparkCalculateStatsFromBaseCuboidJob.OPTION_META_URL.getOpt(), getSegmentMetadataUrl(this.seg.getConfig(), str3));
        instance.setParam(SparkCalculateStatsFromBaseCuboidJob.OPTION_JOB_MODE.getOpt(), cuboidModeEnum.toString());
        instance.setParam(SparkCalculateStatsFromBaseCuboidJob.OPTION_SAMPLING_PERCENT.getOpt(), String.valueOf(this.config.getConfig().getCubingInMemSamplingPercent()));
        instance.setJobId(str3);
        instance.setClassName(SparkCalculateStatsFromBaseCuboidJob.class.getName());
        return instance;
    }

    private SparkExecutable createFilterRecommendCuboidDataStep(String str, String str2, String str3) {
        SparkExecutable instance = SparkExecutableFactory.instance(this.seg.getConfig());
        instance.setName(ExecutableConstants.STEP_NAME_FILTER_RECOMMEND_CUBOID_DATA_FOR_OPTIMIZATION_SPARK);
        instance.setParam(SparkFilterRecommendCuboidDataJob.OPTION_CUBE_NAME.getOpt(), this.seg.getRealization().getName());
        instance.setParam(SparkFilterRecommendCuboidDataJob.OPTION_SEGMENT_ID.getOpt(), this.seg.getUuid());
        instance.setParam(SparkFilterRecommendCuboidDataJob.OPTION_INPUT_PATH.getOpt(), str);
        instance.setParam(SparkFilterRecommendCuboidDataJob.OPTION_OUTPUT_PATH.getOpt(), str2);
        instance.setParam(SparkFilterRecommendCuboidDataJob.OPTION_META_URL.getOpt(), getSegmentMetadataUrl(this.seg.getConfig(), str3));
        instance.setClassName(SparkFilterRecommendCuboidDataJob.class.getName());
        instance.setJobId(str3);
        return instance;
    }

    private UpdateCubeInfoAfterOptimizeStep createUpdateCubeInfoAfterOptimizeStep(String str) {
        UpdateCubeInfoAfterOptimizeStep updateCubeInfoAfterOptimizeStep = new UpdateCubeInfoAfterOptimizeStep();
        updateCubeInfoAfterOptimizeStep.setName("Update Cube Info");
        CubingExecutableUtil.setCubeName(this.seg.getRealization().getName(), updateCubeInfoAfterOptimizeStep.getParams());
        CubingExecutableUtil.setSegmentId(this.seg.getUuid(), updateCubeInfoAfterOptimizeStep.getParams());
        CubingExecutableUtil.setCubingJobId(str, updateCubeInfoAfterOptimizeStep.getParams());
        return updateCubeInfoAfterOptimizeStep;
    }

    private void addLayerCubingSteps(CubingJob cubingJob, String str, CuboidModeEnum cuboidModeEnum, String str2, String str3) {
        SparkExecutable instance = SparkExecutableFactory.instance(this.seg.getConfig());
        instance.setClassName(SparkCubingByLayerForOpt.class.getName());
        configureSparkJob(this.seg, instance, str, str2, str3, cuboidModeEnum);
        cubingJob.addTask(instance);
    }

    private SparkExecutable createUpdateShardForOldCuboidDataStep(String str, String str2, String str3) {
        SparkExecutable instance = SparkExecutableFactory.instance(this.seg.getConfig());
        instance.setName(ExecutableConstants.STEP_NAME_UPDATE_OLD_CUBOID_SHARD_SPARK);
        instance.setParam(SparkUpdateShardForOldCuboidDataStep.OPTION_CUBE_NAME.getOpt(), this.seg.getRealization().getName());
        instance.setParam(SparkUpdateShardForOldCuboidDataStep.OPTION_SEGMENT_ID.getOpt(), this.seg.getUuid());
        instance.setParam(SparkUpdateShardForOldCuboidDataStep.OPTION_INPUT_PATH.getOpt(), str);
        instance.setParam(SparkUpdateShardForOldCuboidDataStep.OPTION_OUTPUT_PATH.getOpt(), str2);
        instance.setParam(SparkUpdateShardForOldCuboidDataStep.OPTION_META_URL.getOpt(), getSegmentMetadataUrl(this.seg.getConfig(), str3));
        instance.setJobId(str3);
        instance.setClassName(SparkUpdateShardForOldCuboidDataStep.class.getName());
        return instance;
    }

    private MergeStatisticsWithOldStep createMergeStatisticsWithOldStep(String str, String str2, String str3) {
        MergeStatisticsWithOldStep mergeStatisticsWithOldStep = new MergeStatisticsWithOldStep();
        mergeStatisticsWithOldStep.setName(ExecutableConstants.STEP_NAME_MERGE_STATISTICS_WITH_OLD);
        CubingExecutableUtil.setCubingJobId(str, mergeStatisticsWithOldStep.getParams());
        CubingExecutableUtil.setCubeName(this.seg.getRealization().getName(), mergeStatisticsWithOldStep.getParams());
        CubingExecutableUtil.setSegmentId(this.seg.getUuid(), mergeStatisticsWithOldStep.getParams());
        CubingExecutableUtil.setStatisticsPath(str2, mergeStatisticsWithOldStep.getParams());
        CubingExecutableUtil.setMergedStatisticsPath(str3, mergeStatisticsWithOldStep.getParams());
        return mergeStatisticsWithOldStep;
    }

    private AbstractExecutable createCopyDictionaryStep() {
        CopyDictionaryStep copyDictionaryStep = new CopyDictionaryStep();
        copyDictionaryStep.setName(ExecutableConstants.STEP_NAME_COPY_DICTIONARY);
        CubingExecutableUtil.setCubeName(this.seg.getRealization().getName(), copyDictionaryStep.getParams());
        CubingExecutableUtil.setSegmentId(this.seg.getUuid(), copyDictionaryStep.getParams());
        return copyDictionaryStep;
    }

    private void configureSparkJob(CubeSegment cubeSegment, SparkExecutable sparkExecutable, String str, String str2, String str3, CuboidModeEnum cuboidModeEnum) {
        sparkExecutable.setParam(SparkCubingByLayerForOpt.OPTION_CUBE_NAME.getOpt(), cubeSegment.getRealization().getName());
        sparkExecutable.setParam(SparkCubingByLayerForOpt.OPTION_SEGMENT_ID.getOpt(), cubeSegment.getUuid());
        sparkExecutable.setParam(SparkCubingByLayerForOpt.OPTION_META_URL.getOpt(), getSegmentMetadataUrl(cubeSegment.getConfig(), str));
        sparkExecutable.setParam(SparkCubingByLayerForOpt.OPTION_OUTPUT_PATH.getOpt(), str3);
        sparkExecutable.setParam(SparkCubingByLayerForOpt.OPTION_INPUT_PATH.getOpt(), str2);
        sparkExecutable.setParam(SparkCubingByLayerForOpt.OPTION_CUBOID_MODE.getOpt(), cuboidModeEnum.toString());
        sparkExecutable.setJobId(str);
        StringBuilder sb = new StringBuilder();
        StringUtil.appendWithSeparator(sb, cubeSegment.getConfig().getSparkAdditionalJars());
        sparkExecutable.setJars(sb.toString());
        sparkExecutable.setName("Optimize Cube with Spark:" + cubeSegment.toString());
    }
}
