package org.apache.kylin.engine.mr.steps;

import java.io.IOException;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.apache.commons.cli.Options;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.ArrayPrimitiveWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import org.apache.hadoop.util.ToolRunner;
import org.apache.kylin.common.KylinConfig;
import org.apache.kylin.common.util.HadoopUtil;
import org.apache.kylin.cube.CubeInstance;
import org.apache.kylin.cube.CubeManager;
import org.apache.kylin.engine.mr.common.AbstractHadoopJob;
import org.apache.kylin.engine.mr.common.BatchConstants;
import org.apache.kylin.engine.mr.steps.filter.UHCDictPathFilter;
import org.apache.kylin.metadata.model.TblColRef;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:WEB-INF/lib/kylin-engine-mr-2.6.5.jar:org/apache/kylin/engine/mr/steps/UHCDictionaryJob.class */
public class UHCDictionaryJob extends AbstractHadoopJob {
    protected static final Logger logger = LoggerFactory.getLogger((Class<?>) UHCDictionaryJob.class);
    private boolean isSkipped = false;

    public int run(String[] strArr) throws Exception {
        Options options = new Options();
        try {
            options.addOption(OPTION_JOB_NAME);
            options.addOption(OPTION_CUBE_NAME);
            options.addOption(OPTION_CUBING_JOB_ID);
            options.addOption(OPTION_OUTPUT_PATH);
            options.addOption(OPTION_INPUT_PATH);
            parseOptions(options, strArr);
            this.job = Job.getInstance(getConf(), getOptionValue(OPTION_JOB_NAME));
            String optionValue = getOptionValue(OPTION_CUBING_JOB_ID);
            String optionValue2 = getOptionValue(OPTION_CUBE_NAME);
            Path path = new Path(getOptionValue(OPTION_OUTPUT_PATH));
            Path path2 = new Path(getOptionValue(OPTION_INPUT_PATH));
            CubeInstance cube = CubeManager.getInstance(KylinConfig.getInstanceFromEnv()).getCube(optionValue2);
            attachCubeMetadata(cube, this.job.getConfiguration());
            List<TblColRef> allUHCColumns = cube.getDescriptor().getAllUHCColumns();
            int size = allUHCColumns.size();
            boolean z = false;
            Iterator<TblColRef> it = allUHCColumns.iterator();
            while (it.hasNext()) {
                Path path3 = new Path(path2.toString() + "/" + it.next().getIdentity());
                if (HadoopUtil.getFileSystem(path3).exists(path3)) {
                    FileInputFormat.addInputPath(this.job, path3);
                    FileInputFormat.setInputPathFilter(this.job, UHCDictPathFilter.class);
                    z = true;
                }
            }
            if (!z) {
                this.isSkipped = true;
                if (this.job != null) {
                    cleanupTempConfFile(this.job.getConfiguration());
                }
                return 0;
            }
            setJobClasspath(this.job, cube.getConfig());
            setupMapper();
            setupReducer(path, size);
            this.job.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, optionValue2);
            this.job.getConfiguration().set("cubingJobId", optionValue);
            this.job.getConfiguration().set(BatchConstants.CFG_GLOBAL_DICT_BASE_DIR, KylinConfig.getInstanceFromEnv().getHdfsWorkingDirectory());
            this.job.getConfiguration().set(BatchConstants.CFG_MAPRED_OUTPUT_COMPRESS, "false");
            this.job.getConfiguration().set("mapreduce.reduce.memory.mb", "8500");
            this.job.getConfiguration().set("mapred.reduce.child.java.opts", "-Xmx8g");
            this.job.getConfiguration().set("mapreduce.task.timeout", "28800000");
            for (Map.Entry<String, String> entry : cube.getConfig().getUHCMRConfigOverride().entrySet()) {
                this.job.getConfiguration().set(entry.getKey(), entry.getValue());
            }
            int waitForCompletion = waitForCompletion(this.job);
            if (this.job != null) {
                cleanupTempConfFile(this.job.getConfiguration());
            }
            return waitForCompletion;
        } catch (Throwable th) {
            if (this.job != null) {
                cleanupTempConfFile(this.job.getConfiguration());
            }
            throw th;
        }
    }

    private void setupMapper() throws IOException {
        this.job.setInputFormatClass(SequenceFileInputFormat.class);
        this.job.setMapperClass(UHCDictionaryMapper.class);
        this.job.setMapOutputKeyClass(SelfDefineSortableKey.class);
        this.job.setMapOutputValueClass(NullWritable.class);
    }

    private void setupReducer(Path path, int i) throws IOException {
        this.job.setReducerClass(UHCDictionaryReducer.class);
        this.job.setPartitionerClass(UHCDictionaryPartitioner.class);
        this.job.setNumReduceTasks(i);
        MultipleOutputs.addNamedOutput(this.job, "dict", SequenceFileOutputFormat.class, NullWritable.class, ArrayPrimitiveWritable.class);
        FileOutputFormat.setOutputPath(this.job, path);
        this.job.getConfiguration().set("output.path", path.toString());
        LazyOutputFormat.setOutputFormatClass(this.job, SequenceFileOutputFormat.class);
        deletePath(this.job.getConfiguration(), path);
    }

    @Override // org.apache.kylin.engine.mr.common.AbstractHadoopJob
    public boolean isSkipped() {
        return this.isSkipped;
    }

    public static void main(String[] strArr) throws Exception {
        System.exit(ToolRunner.run(new UHCDictionaryJob(), strArr));
    }
}
