package org.apache.kylin.storage.hbase.steps;

import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.apache.kylin.common.util.StringUtil;
import org.apache.kylin.cube.CubeInstance;
import org.apache.kylin.cube.CubeSegment;
import org.apache.kylin.cube.cuboid.CuboidModeEnum;
import org.apache.kylin.engine.mr.JobBuilderSupport;
import org.apache.kylin.engine.mr.common.AbstractHadoopJob;
import org.apache.kylin.engine.mr.common.BatchConstants;
import org.apache.kylin.engine.mr.common.HadoopShellExecutable;
import org.apache.kylin.engine.mr.common.MapReduceExecutable;
import org.apache.kylin.job.constant.ExecutableConstants;
import org.apache.kylin.job.execution.DefaultChainedExecutable;
import org.apache.kylin.metadata.model.SegmentStatusEnum;
import org.apache.kylin.metadata.model.Segments;
import org.apache.kylin.storage.hbase.HBaseConnection;
import org.apache.kylin.tool.shaded.com.google.common.base.Preconditions;
import org.apache.kylin.tool.shaded.com.google.common.collect.Lists;
import org.slf4j.Marker;
import org.springframework.util.AntPathMatcher;

/* loaded from: input_file:org/apache/kylin/storage/hbase/steps/HBaseMRSteps.class */
public class HBaseMRSteps extends JobBuilderSupport {
    public HBaseMRSteps(CubeSegment cubeSegment) {
        super(cubeSegment, null);
    }

    public HadoopShellExecutable createCreateHTableStep(String str) {
        return createCreateHTableStep(str, CuboidModeEnum.CURRENT);
    }

    public HadoopShellExecutable createCreateHTableStep(String str, CuboidModeEnum cuboidModeEnum) {
        HadoopShellExecutable hadoopShellExecutable = new HadoopShellExecutable();
        hadoopShellExecutable.setName(ExecutableConstants.STEP_NAME_CREATE_HBASE_TABLE);
        StringBuilder sb = new StringBuilder();
        appendExecCmdParameters(sb, BatchConstants.ARG_CUBE_NAME, this.seg.getRealization().getName());
        appendExecCmdParameters(sb, BatchConstants.ARG_SEGMENT_ID, this.seg.getUuid());
        appendExecCmdParameters(sb, BatchConstants.ARG_PARTITION, getRowkeyDistributionOutputPath(str) + "/part-r-00000");
        appendExecCmdParameters(sb, BatchConstants.ARG_CUBOID_MODE, cuboidModeEnum.toString());
        hadoopShellExecutable.setJobParams(sb.toString());
        hadoopShellExecutable.setJobClass(CreateHTableJob.class);
        return hadoopShellExecutable;
    }

    public MapReduceExecutable createMergeCuboidDataStep(CubeSegment cubeSegment, List<CubeSegment> list, String str, Class<? extends AbstractHadoopJob> cls) {
        ArrayList newArrayList = Lists.newArrayList();
        Iterator<CubeSegment> it = list.iterator();
        while (it.hasNext()) {
            newArrayList.add(getCuboidRootPath(it.next()) + Marker.ANY_MARKER);
        }
        String join = StringUtil.join(newArrayList, ",");
        String cuboidRootPath = getCuboidRootPath(str);
        MapReduceExecutable mapReduceExecutable = new MapReduceExecutable();
        mapReduceExecutable.setName(ExecutableConstants.STEP_NAME_MERGE_CUBOID);
        StringBuilder sb = new StringBuilder();
        appendMapReduceParameters(sb);
        appendExecCmdParameters(sb, BatchConstants.ARG_CUBE_NAME, cubeSegment.getCubeInstance().getName());
        appendExecCmdParameters(sb, BatchConstants.ARG_SEGMENT_ID, cubeSegment.getUuid());
        appendExecCmdParameters(sb, BatchConstants.ARG_INPUT, join);
        appendExecCmdParameters(sb, BatchConstants.ARG_OUTPUT, cuboidRootPath);
        appendExecCmdParameters(sb, BatchConstants.ARG_JOB_NAME, "Kylin_Merge_Cuboid_" + cubeSegment.getCubeInstance().getName() + "_Step");
        mapReduceExecutable.setMapReduceParams(sb.toString());
        mapReduceExecutable.setMapReduceJobClass(cls);
        return mapReduceExecutable;
    }

    public MapReduceExecutable createConvertCuboidToHfileStep(String str) {
        String cuboidRootPath = getCuboidRootPath(str);
        String str2 = cuboidRootPath + (cuboidRootPath.endsWith(AntPathMatcher.DEFAULT_PATH_SEPARATOR) ? "" : AntPathMatcher.DEFAULT_PATH_SEPARATOR) + Marker.ANY_MARKER;
        MapReduceExecutable mapReduceExecutable = new MapReduceExecutable();
        mapReduceExecutable.setName(ExecutableConstants.STEP_NAME_CONVERT_CUBOID_TO_HFILE);
        StringBuilder sb = new StringBuilder();
        appendMapReduceParameters(sb);
        appendExecCmdParameters(sb, BatchConstants.ARG_CUBE_NAME, this.seg.getRealization().getName());
        appendExecCmdParameters(sb, BatchConstants.ARG_PARTITION, getRowkeyDistributionOutputPath(str) + "/part-r-00000_hfile");
        appendExecCmdParameters(sb, BatchConstants.ARG_INPUT, str2);
        appendExecCmdParameters(sb, BatchConstants.ARG_OUTPUT, getHFilePath(str));
        appendExecCmdParameters(sb, BatchConstants.ARG_HTABLE_NAME, this.seg.getStorageLocationIdentifier());
        appendExecCmdParameters(sb, BatchConstants.ARG_JOB_NAME, "Kylin_HFile_Generator_" + this.seg.getRealization().getName() + "_Step");
        mapReduceExecutable.setMapReduceParams(sb.toString());
        mapReduceExecutable.setMapReduceJobClass(CubeHFileJob.class);
        mapReduceExecutable.setCounterSaveAs(",,byteSizeBytes");
        return mapReduceExecutable;
    }

    public HadoopShellExecutable createBulkLoadStep(String str) {
        HadoopShellExecutable hadoopShellExecutable = new HadoopShellExecutable();
        hadoopShellExecutable.setName("Load HFile to HBase Table");
        StringBuilder sb = new StringBuilder();
        appendExecCmdParameters(sb, BatchConstants.ARG_INPUT, getHFilePath(str));
        appendExecCmdParameters(sb, BatchConstants.ARG_HTABLE_NAME, this.seg.getStorageLocationIdentifier());
        appendExecCmdParameters(sb, BatchConstants.ARG_CUBE_NAME, this.seg.getRealization().getName());
        hadoopShellExecutable.setJobParams(sb.toString());
        hadoopShellExecutable.setJobClass(BulkLoadJob.class);
        return hadoopShellExecutable;
    }

    public MergeGCStep createMergeGCStep() {
        MergeGCStep mergeGCStep = new MergeGCStep();
        mergeGCStep.setName(ExecutableConstants.STEP_NAME_GARBAGE_COLLECTION_HBASE);
        mergeGCStep.setOldHTables(getMergingHTables());
        return mergeGCStep;
    }

    public MergeGCStep createOptimizeGCStep() {
        MergeGCStep mergeGCStep = new MergeGCStep();
        mergeGCStep.setName(ExecutableConstants.STEP_NAME_GARBAGE_COLLECTION);
        mergeGCStep.setOldHTables(getOptimizeHTables());
        return mergeGCStep;
    }

    public List<CubeSegment> getOptimizeSegments() {
        CubeInstance cubeInstance = (CubeInstance) this.seg.getRealization();
        ArrayList newArrayList = Lists.newArrayList(cubeInstance.getSegments(SegmentStatusEnum.READY_PENDING));
        ArrayList newArrayListWithExpectedSize = Lists.newArrayListWithExpectedSize(newArrayList.size());
        Iterator it = newArrayList.iterator();
        while (it.hasNext()) {
            newArrayListWithExpectedSize.add(cubeInstance.getOriginalSegmentToOptimize((CubeSegment) it.next()));
        }
        return newArrayListWithExpectedSize;
    }

    public List<String> getOptimizeHTables() {
        return getOldHTables(getOptimizeSegments());
    }

    public List<String> getOldHTables(List<CubeSegment> list) {
        ArrayList newArrayListWithExpectedSize = Lists.newArrayListWithExpectedSize(list.size());
        Iterator<CubeSegment> it = list.iterator();
        while (it.hasNext()) {
            newArrayListWithExpectedSize.add(it.next().getStorageLocationIdentifier());
        }
        return newArrayListWithExpectedSize;
    }

    public List<String> getMergingHTables() {
        Segments<CubeSegment> mergingSegments = ((CubeInstance) this.seg.getRealization()).getMergingSegments(this.seg);
        Preconditions.checkState(mergingSegments.size() > 1, "there should be more than 2 segments to merge, target segment " + this.seg);
        ArrayList newArrayList = Lists.newArrayList();
        Iterator<T> it = mergingSegments.iterator();
        while (it.hasNext()) {
            newArrayList.add(((CubeSegment) it.next()).getStorageLocationIdentifier());
        }
        return newArrayList;
    }

    public List<String> getMergingHDFSPaths() {
        Segments<CubeSegment> mergingSegments = ((CubeInstance) this.seg.getRealization()).getMergingSegments(this.seg);
        Preconditions.checkState(mergingSegments.size() > 1, "there should be more than 2 segments to merge, target segment " + this.seg);
        ArrayList newArrayList = Lists.newArrayList();
        Iterator<T> it = mergingSegments.iterator();
        while (it.hasNext()) {
            newArrayList.add(getJobWorkingDir(((CubeSegment) it.next()).getLastBuildJobID()));
        }
        return newArrayList;
    }

    public List<String> getOptimizeHDFSPaths() {
        return getOldHDFSPaths(getOptimizeSegments());
    }

    public List<String> getOldHDFSPaths(List<CubeSegment> list) {
        ArrayList newArrayListWithExpectedSize = Lists.newArrayListWithExpectedSize(list.size());
        Iterator<CubeSegment> it = list.iterator();
        while (it.hasNext()) {
            newArrayListWithExpectedSize.add(getJobWorkingDir(it.next().getLastBuildJobID()));
        }
        return newArrayListWithExpectedSize;
    }

    public String getHFilePath(String str) {
        return HBaseConnection.makeQualifiedPathInHBaseCluster(getJobWorkingDir(str) + AntPathMatcher.DEFAULT_PATH_SEPARATOR + this.seg.getRealization().getName() + "/hfile/");
    }

    public String getRowkeyDistributionOutputPath(String str) {
        return HBaseConnection.makeQualifiedPathInHBaseCluster(getJobWorkingDir(str) + AntPathMatcher.DEFAULT_PATH_SEPARATOR + this.seg.getRealization().getName() + "/rowkey_stats");
    }

    public void addOptimizeGarbageCollectionSteps(DefaultChainedExecutable defaultChainedExecutable) {
        String id = defaultChainedExecutable.getId();
        ArrayList arrayList = new ArrayList();
        arrayList.add(getOptimizationRootPath(id));
        HDFSPathGarbageCollectionStep hDFSPathGarbageCollectionStep = new HDFSPathGarbageCollectionStep();
        hDFSPathGarbageCollectionStep.setName(ExecutableConstants.STEP_NAME_GARBAGE_COLLECTION_HDFS);
        hDFSPathGarbageCollectionStep.setDeletePaths(arrayList);
        hDFSPathGarbageCollectionStep.setJobId(id);
        defaultChainedExecutable.addTask(hDFSPathGarbageCollectionStep);
    }

    public void addCheckpointGarbageCollectionSteps(DefaultChainedExecutable defaultChainedExecutable) {
        String id = defaultChainedExecutable.getId();
        defaultChainedExecutable.addTask(createOptimizeGCStep());
        ArrayList arrayList = new ArrayList();
        arrayList.addAll(getOptimizeHDFSPaths());
        HDFSPathGarbageCollectionStep hDFSPathGarbageCollectionStep = new HDFSPathGarbageCollectionStep();
        hDFSPathGarbageCollectionStep.setName(ExecutableConstants.STEP_NAME_GARBAGE_COLLECTION_HDFS);
        hDFSPathGarbageCollectionStep.setDeletePaths(arrayList);
        hDFSPathGarbageCollectionStep.setJobId(id);
        defaultChainedExecutable.addTask(hDFSPathGarbageCollectionStep);
    }

    public void addMergingGarbageCollectionSteps(DefaultChainedExecutable defaultChainedExecutable) {
        String id = defaultChainedExecutable.getId();
        defaultChainedExecutable.addTask(createMergeGCStep());
        ArrayList arrayList = new ArrayList();
        arrayList.addAll(getMergingHDFSPaths());
        arrayList.add(getHFilePath(id));
        HDFSPathGarbageCollectionStep hDFSPathGarbageCollectionStep = new HDFSPathGarbageCollectionStep();
        hDFSPathGarbageCollectionStep.setName(ExecutableConstants.STEP_NAME_GARBAGE_COLLECTION_HDFS);
        hDFSPathGarbageCollectionStep.setDeletePaths(arrayList);
        hDFSPathGarbageCollectionStep.setJobId(id);
        defaultChainedExecutable.addTask(hDFSPathGarbageCollectionStep);
    }

    public void addCubingGarbageCollectionSteps(DefaultChainedExecutable defaultChainedExecutable) {
        String id = defaultChainedExecutable.getId();
        ArrayList arrayList = new ArrayList();
        arrayList.add(getFactDistinctColumnsPath(id));
        arrayList.add(getHFilePath(id));
        HDFSPathGarbageCollectionStep hDFSPathGarbageCollectionStep = new HDFSPathGarbageCollectionStep();
        hDFSPathGarbageCollectionStep.setName(ExecutableConstants.STEP_NAME_GARBAGE_COLLECTION_HBASE);
        hDFSPathGarbageCollectionStep.setDeletePaths(arrayList);
        hDFSPathGarbageCollectionStep.setJobId(id);
        defaultChainedExecutable.addTask(hDFSPathGarbageCollectionStep);
    }
}
