package org.apache.kylin.storage.hbase.steps;

import java.io.Serializable;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Objects;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.kylin.common.util.AbstractApplication;
import org.apache.kylin.common.util.HadoopUtil;
import org.apache.kylin.common.util.OptionsHelper;
import org.apache.kylin.cube.CubeInstance;
import org.apache.kylin.cube.CubeManager;
import org.apache.kylin.cube.CubeSegment;
import org.apache.kylin.cube.model.CubeDesc;
import org.apache.kylin.cube.model.HBaseColumnDesc;
import org.apache.kylin.cube.model.HBaseColumnFamilyDesc;
import org.apache.kylin.engine.mr.common.AbstractHadoopJob;
import org.apache.kylin.engine.mr.common.BatchConstants;
import org.apache.kylin.engine.mr.common.SerializableConfiguration;
import org.apache.kylin.engine.spark.KylinSparkJobListener;
import org.apache.kylin.engine.spark.SparkUtil;
import org.apache.kylin.job.constant.ExecutableConstants;
import org.apache.kylin.measure.MeasureCodec;
import org.apache.kylin.shaded.com.google.common.collect.Lists;
import org.apache.kylin.shaded.com.google.common.collect.Maps;
import org.apache.kylin.storage.hbase.steps.RowKeyWritable;
import org.apache.spark.Partitioner;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.PairFlatMapFunction;
import org.apache.spark.api.java.function.PairFunction;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import scala.Tuple2;

/* loaded from: input_file:WEB-INF/lib/kylin-storage-hbase-3.1.3.jar:org/apache/kylin/storage/hbase/steps/SparkCubeHFile.class */
public class SparkCubeHFile extends AbstractApplication implements Serializable {
    protected static final Logger logger = LoggerFactory.getLogger((Class<?>) SparkCubeHFile.class);
    public static final Option OPTION_CUBE_NAME;
    public static final Option OPTION_SEGMENT_ID;
    public static final Option OPTION_META_URL;
    public static final Option OPTION_OUTPUT_PATH;
    public static final Option OPTION_INPUT_PATH;
    public static final Option OPTION_PARTITION_FILE_PATH;
    public static final Option OPTION_COUNTER_PATH;
    private Options options = new Options();

    /* loaded from: input_file:WEB-INF/lib/kylin-storage-hbase-3.1.3.jar:org/apache/kylin/storage/hbase/steps/SparkCubeHFile$HFilePartitioner.class */
    static class HFilePartitioner extends Partitioner {
        private List<RowKeyWritable> keys;

        public HFilePartitioner(List list) {
            this.keys = list;
        }

        public int numPartitions() {
            return this.keys.size() + 1;
        }

        public int getPartition(Object obj) {
            int binarySearch = Collections.binarySearch(this.keys, (RowKeyWritable) obj) + 1;
            return binarySearch < 0 ? -binarySearch : binarySearch;
        }

        public boolean equals(Object obj) {
            if (this == obj) {
                return true;
            }
            if (obj == null || getClass() != obj.getClass()) {
                return false;
            }
            return Objects.equals(this.keys, ((HFilePartitioner) obj).keys);
        }

        public int hashCode() {
            return Objects.hash(this.keys);
        }
    }

    public SparkCubeHFile() {
        this.options.addOption(OPTION_INPUT_PATH);
        this.options.addOption(OPTION_CUBE_NAME);
        this.options.addOption(OPTION_SEGMENT_ID);
        this.options.addOption(OPTION_META_URL);
        this.options.addOption(OPTION_OUTPUT_PATH);
        this.options.addOption(OPTION_PARTITION_FILE_PATH);
        this.options.addOption(AbstractHadoopJob.OPTION_HBASE_CONF_PATH);
        this.options.addOption(OPTION_COUNTER_PATH);
    }

    @Override // org.apache.kylin.common.util.AbstractApplication
    protected Options getOptions() {
        return this.options;
    }

    @Override // org.apache.kylin.common.util.AbstractApplication
    protected void execute(OptionsHelper optionsHelper) throws Exception {
        String optionValue = optionsHelper.getOptionValue(OPTION_META_URL);
        String optionValue2 = optionsHelper.getOptionValue(OPTION_INPUT_PATH);
        String optionValue3 = optionsHelper.getOptionValue(OPTION_CUBE_NAME);
        String optionValue4 = optionsHelper.getOptionValue(OPTION_SEGMENT_ID);
        String optionValue5 = optionsHelper.getOptionValue(OPTION_OUTPUT_PATH);
        Path path = new Path(optionsHelper.getOptionValue(OPTION_PARTITION_FILE_PATH));
        String optionValue6 = optionsHelper.getOptionValue(AbstractHadoopJob.OPTION_HBASE_CONF_PATH);
        String optionValue7 = optionsHelper.getOptionValue(OPTION_COUNTER_PATH);
        Class[] clsArr = {Class.forName("scala.reflect.ClassTag$$anon$1"), KeyValueCreator.class, KeyValue.class, RowKeyWritable.class};
        SparkConf appName = new SparkConf().setAppName("Converting HFile for:" + optionValue3 + " segment " + optionValue4);
        appName.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
        appName.set("spark.kryo.registrator", "org.apache.kylin.engine.spark.KylinKryoRegistrator");
        appName.set("spark.kryo.registrationRequired", "true").registerKryoClasses(clsArr);
        KylinSparkJobListener kylinSparkJobListener = new KylinSparkJobListener();
        JavaSparkContext javaSparkContext = new JavaSparkContext(appName);
        Throwable th = null;
        try {
            javaSparkContext.sc().addSparkListener(kylinSparkJobListener);
            FileSystem fileSystem = path.getFileSystem(javaSparkContext.hadoopConfiguration());
            if (!fileSystem.exists(path)) {
                throw new IllegalArgumentException("File not exist: " + path.toString());
            }
            HadoopUtil.deletePath(javaSparkContext.hadoopConfiguration(), new Path(optionValue5));
            CubeInstance cube = CubeManager.getInstance(AbstractHadoopJob.loadKylinConfigFromHdfs(new SerializableConfiguration(javaSparkContext.hadoopConfiguration()), optionValue)).getCube(optionValue3);
            final CubeDesc descriptor = cube.getDescriptor();
            CubeSegment segmentById = cube.getSegmentById(optionValue4);
            final MeasureCodec measureCodec = new MeasureCodec(descriptor.getMeasures());
            final ArrayList newArrayList = Lists.newArrayList();
            for (HBaseColumnFamilyDesc hBaseColumnFamilyDesc : descriptor.getHbaseMapping().getColumnFamily()) {
                for (HBaseColumnDesc hBaseColumnDesc : hBaseColumnFamilyDesc.getColumns()) {
                    newArrayList.add(new KeyValueCreator(descriptor, hBaseColumnDesc));
                }
            }
            final int size = newArrayList.size();
            boolean z = newArrayList.size() == 1 && ((KeyValueCreator) newArrayList.get(0)).isFullCopy;
            logger.info("Input path: {}", optionValue2);
            logger.info("Output path: {}", optionValue5);
            ArrayList arrayList = new ArrayList();
            SequenceFile.Reader reader = new SequenceFile.Reader(fileSystem, path, javaSparkContext.hadoopConfiguration());
            Throwable th2 = null;
            try {
                try {
                    NullWritable nullWritable = NullWritable.get();
                    for (RowKeyWritable rowKeyWritable = new RowKeyWritable(); reader.next(rowKeyWritable, nullWritable); rowKeyWritable = new RowKeyWritable()) {
                        arrayList.add(rowKeyWritable);
                        logger.info(" ------- split key: {}", rowKeyWritable);
                    }
                    if (reader != null) {
                        if (0 != 0) {
                            try {
                                reader.close();
                            } catch (Throwable th3) {
                                th2.addSuppressed(th3);
                            }
                        } else {
                            reader.close();
                        }
                    }
                    logger.info("There are {} split keys, totally {} hfiles", Integer.valueOf(arrayList.size()), Integer.valueOf(arrayList.size() + 1));
                    logger.info("Loading HBase configuration from:{}", optionValue6);
                    FileSystem fileSystem2 = new Path(optionValue6).getFileSystem(javaSparkContext.hadoopConfiguration());
                    FSDataInputStream open = fileSystem2.open(new Path(optionValue6));
                    Throwable th4 = null;
                    try {
                        try {
                            Configuration configuration = new Configuration();
                            configuration.addResource(open);
                            configuration.set("spark.hadoop.dfs.replication", "3");
                            Job job = Job.getInstance(configuration, segmentById.getStorageLocationIdentifier());
                            FileOutputFormat.setOutputPath(job, new Path(optionValue5));
                            JavaPairRDD parseInputPath = SparkUtil.parseInputPath(optionValue2, fileSystem2, javaSparkContext, Text.class, Text.class);
                            (z ? parseInputPath.mapToPair(new PairFunction<Tuple2<Text, Text>, RowKeyWritable, KeyValue>() { // from class: org.apache.kylin.storage.hbase.steps.SparkCubeHFile.1
                                public Tuple2<RowKeyWritable, KeyValue> call(Tuple2<Text, Text> tuple2) throws Exception {
                                    KeyValue create = ((KeyValueCreator) newArrayList.get(0)).create(tuple2._1, tuple2._2.getBytes(), 0, tuple2._2.getLength());
                                    return new Tuple2<>(new RowKeyWritable(create.createKeyOnly(false).getKey()), create);
                                }
                            }) : parseInputPath.flatMapToPair(new PairFlatMapFunction<Tuple2<Text, Text>, RowKeyWritable, KeyValue>() { // from class: org.apache.kylin.storage.hbase.steps.SparkCubeHFile.2
                                public Iterator<Tuple2<RowKeyWritable, KeyValue>> call(Tuple2<Text, Text> tuple2) throws Exception {
                                    ArrayList newArrayListWithExpectedSize = Lists.newArrayListWithExpectedSize(size);
                                    Object[] objArr = new Object[descriptor.getMeasures().size()];
                                    measureCodec.decode(ByteBuffer.wrap(tuple2._2.getBytes(), 0, tuple2._2.getLength()), objArr);
                                    for (int i = 0; i < size; i++) {
                                        KeyValue create = ((KeyValueCreator) newArrayList.get(i)).create(tuple2._1, objArr);
                                        newArrayListWithExpectedSize.add(new Tuple2(new RowKeyWritable(create.createKeyOnly(false).getKey()), create));
                                    }
                                    return newArrayListWithExpectedSize.iterator();
                                }
                            })).repartitionAndSortWithinPartitions(new HFilePartitioner(arrayList), RowKeyWritable.RowKeyComparator.INSTANCE).mapToPair(new PairFunction<Tuple2<RowKeyWritable, KeyValue>, ImmutableBytesWritable, KeyValue>() { // from class: org.apache.kylin.storage.hbase.steps.SparkCubeHFile.3
                                public Tuple2<ImmutableBytesWritable, KeyValue> call(Tuple2<RowKeyWritable, KeyValue> tuple2) throws Exception {
                                    return new Tuple2<>(new ImmutableBytesWritable(tuple2._2.getKey()), tuple2._2);
                                }
                            }).saveAsNewAPIHadoopDataset(job.getConfiguration());
                            if (open != null) {
                                if (0 != 0) {
                                    try {
                                        open.close();
                                    } catch (Throwable th5) {
                                        th4.addSuppressed(th5);
                                    }
                                } else {
                                    open.close();
                                }
                            }
                            logger.info("HDFS: Number of bytes written={}", Long.valueOf(kylinSparkJobListener.metrics.getBytesWritten()));
                            HashMap newHashMap = Maps.newHashMap();
                            newHashMap.put(ExecutableConstants.HDFS_BYTES_WRITTEN, String.valueOf(kylinSparkJobListener.metrics.getBytesWritten()));
                            HadoopUtil.writeToSequenceFile(javaSparkContext.hadoopConfiguration(), optionValue7, newHashMap);
                            if (javaSparkContext != null) {
                                if (0 == 0) {
                                    javaSparkContext.close();
                                    return;
                                }
                                try {
                                    javaSparkContext.close();
                                } catch (Throwable th6) {
                                    th.addSuppressed(th6);
                                }
                            }
                        } catch (Throwable th7) {
                            th4 = th7;
                            throw th7;
                        }
                    } catch (Throwable th8) {
                        if (open != null) {
                            if (th4 != null) {
                                try {
                                    open.close();
                                } catch (Throwable th9) {
                                    th4.addSuppressed(th9);
                                }
                            } else {
                                open.close();
                            }
                        }
                        throw th8;
                    }
                } catch (Throwable th10) {
                    th2 = th10;
                    throw th10;
                }
            } catch (Throwable th11) {
                if (reader != null) {
                    if (th2 != null) {
                        try {
                            reader.close();
                        } catch (Throwable th12) {
                            th2.addSuppressed(th12);
                        }
                    } else {
                        reader.close();
                    }
                }
                throw th11;
            }
        } catch (Throwable th13) {
            if (javaSparkContext != null) {
                if (0 != 0) {
                    try {
                        javaSparkContext.close();
                    } catch (Throwable th14) {
                        th.addSuppressed(th14);
                    }
                } else {
                    javaSparkContext.close();
                }
            }
            throw th13;
        }
    }

    static {
        OptionBuilder.withArgName(BatchConstants.ARG_CUBE_NAME);
        OptionBuilder.hasArg();
        OptionBuilder.isRequired(true);
        OptionBuilder.withDescription("Cube Name");
        OPTION_CUBE_NAME = OptionBuilder.create(BatchConstants.ARG_CUBE_NAME);
        OptionBuilder.withArgName("segment");
        OptionBuilder.hasArg();
        OptionBuilder.isRequired(true);
        OptionBuilder.withDescription("Cube Segment Id");
        OPTION_SEGMENT_ID = OptionBuilder.create("segmentId");
        OptionBuilder.withArgName(BatchConstants.ARG_META_URL);
        OptionBuilder.hasArg();
        OptionBuilder.isRequired(true);
        OptionBuilder.withDescription("HDFS metadata url");
        OPTION_META_URL = OptionBuilder.create(BatchConstants.ARG_META_URL);
        OptionBuilder.withArgName(BatchConstants.ARG_OUTPUT);
        OptionBuilder.hasArg();
        OptionBuilder.isRequired(true);
        OptionBuilder.withDescription("HFile output path");
        OPTION_OUTPUT_PATH = OptionBuilder.create(BatchConstants.ARG_OUTPUT);
        OptionBuilder.withArgName(BatchConstants.ARG_INPUT);
        OptionBuilder.hasArg();
        OptionBuilder.isRequired(true);
        OptionBuilder.withDescription("Cuboid files PATH");
        OPTION_INPUT_PATH = OptionBuilder.create(BatchConstants.ARG_INPUT);
        OptionBuilder.withArgName("partitions");
        OptionBuilder.hasArg();
        OptionBuilder.isRequired(true);
        OptionBuilder.withDescription("Partition file path.");
        OPTION_PARTITION_FILE_PATH = OptionBuilder.create("partitions");
        OptionBuilder.withArgName(BatchConstants.ARG_COUNTER_OUTPUT);
        OptionBuilder.hasArg();
        OptionBuilder.isRequired(true);
        OptionBuilder.withDescription("counter output path");
        OPTION_COUNTER_PATH = OptionBuilder.create(BatchConstants.ARG_COUNTER_OUTPUT);
    }
}
