package org.apache.hadoop.hbase.mapreduce.hadoopbackport;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.Random;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.RawComparator;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.TaskAttemptID;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.util.ReflectionUtils;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

/* loaded from: input_file:lib/hbase-0.94.3.jar:org/apache/hadoop/hbase/mapreduce/hadoopbackport/InputSampler.class */
public class InputSampler<K, V> extends Configured implements Tool {
    private static final Log LOG = LogFactory.getLog(InputSampler.class);

    /* loaded from: input_file:lib/hbase-0.94.3.jar:org/apache/hadoop/hbase/mapreduce/hadoopbackport/InputSampler$IntervalSampler.class */
    public static class IntervalSampler<K, V> implements Sampler<K, V> {
        private final double freq;
        private final int maxSplitsSampled;

        public IntervalSampler(double d) {
            this(d, Integer.MAX_VALUE);
        }

        public IntervalSampler(double d, int i) {
            this.freq = d;
            this.maxSplitsSampled = i;
        }

        @Override // org.apache.hadoop.hbase.mapreduce.hadoopbackport.InputSampler.Sampler
        public K[] getSample(InputFormat<K, V> inputFormat, Job job) throws IOException, InterruptedException {
            List splits = inputFormat.getSplits(job);
            ArrayList arrayList = new ArrayList();
            int min = Math.min(this.maxSplitsSampled, splits.size());
            long j = 0;
            long j2 = 0;
            for (int i = 0; i < min; i++) {
                TaskAttemptContext taskAttemptContext = InputSampler.getTaskAttemptContext(job);
                RecordReader createRecordReader = inputFormat.createRecordReader((InputSplit) splits.get(i), taskAttemptContext);
                createRecordReader.initialize((InputSplit) splits.get(i), taskAttemptContext);
                while (createRecordReader.nextKeyValue()) {
                    j++;
                    if (j2 / j < this.freq) {
                        arrayList.add(ReflectionUtils.copy(job.getConfiguration(), createRecordReader.getCurrentKey(), (Object) null));
                        j2++;
                    }
                }
                createRecordReader.close();
            }
            return (K[]) arrayList.toArray();
        }
    }

    /* loaded from: input_file:lib/hbase-0.94.3.jar:org/apache/hadoop/hbase/mapreduce/hadoopbackport/InputSampler$RandomSampler.class */
    public static class RandomSampler<K, V> implements Sampler<K, V> {
        private double freq;
        private final int numSamples;
        private final int maxSplitsSampled;

        public RandomSampler(double d, int i) {
            this(d, i, Integer.MAX_VALUE);
        }

        public RandomSampler(double d, int i, int i2) {
            this.freq = d;
            this.numSamples = i;
            this.maxSplitsSampled = i2;
        }

        @Override // org.apache.hadoop.hbase.mapreduce.hadoopbackport.InputSampler.Sampler
        public K[] getSample(InputFormat<K, V> inputFormat, Job job) throws IOException, InterruptedException {
            List splits = inputFormat.getSplits(job);
            ArrayList arrayList = new ArrayList(this.numSamples);
            int min = Math.min(this.maxSplitsSampled, splits.size());
            Random random = new Random();
            long nextLong = random.nextLong();
            random.setSeed(nextLong);
            InputSampler.LOG.debug("seed: " + nextLong);
            for (int i = 0; i < splits.size(); i++) {
                InputSplit inputSplit = (InputSplit) splits.get(i);
                int nextInt = random.nextInt(splits.size());
                splits.set(i, splits.get(nextInt));
                splits.set(nextInt, inputSplit);
            }
            int i2 = 0;
            while (true) {
                if (i2 < min || (i2 < splits.size() && arrayList.size() < this.numSamples)) {
                    TaskAttemptContext taskAttemptContext = InputSampler.getTaskAttemptContext(job);
                    RecordReader createRecordReader = inputFormat.createRecordReader((InputSplit) splits.get(i2), taskAttemptContext);
                    createRecordReader.initialize((InputSplit) splits.get(i2), taskAttemptContext);
                    while (createRecordReader.nextKeyValue()) {
                        if (random.nextDouble() <= this.freq) {
                            if (arrayList.size() < this.numSamples) {
                                arrayList.add(ReflectionUtils.copy(job.getConfiguration(), createRecordReader.getCurrentKey(), (Object) null));
                            } else {
                                int nextInt2 = random.nextInt(this.numSamples);
                                if (nextInt2 != this.numSamples) {
                                    arrayList.set(nextInt2, ReflectionUtils.copy(job.getConfiguration(), createRecordReader.getCurrentKey(), (Object) null));
                                }
                                this.freq *= (this.numSamples - 1) / this.numSamples;
                            }
                        }
                    }
                    createRecordReader.close();
                    i2++;
                }
            }
            return (K[]) arrayList.toArray();
        }
    }

    /* loaded from: input_file:lib/hbase-0.94.3.jar:org/apache/hadoop/hbase/mapreduce/hadoopbackport/InputSampler$Sampler.class */
    public interface Sampler<K, V> {
        K[] getSample(InputFormat<K, V> inputFormat, Job job) throws IOException, InterruptedException;
    }

    /* loaded from: input_file:lib/hbase-0.94.3.jar:org/apache/hadoop/hbase/mapreduce/hadoopbackport/InputSampler$SplitSampler.class */
    public static class SplitSampler<K, V> implements Sampler<K, V> {
        private final int numSamples;
        private final int maxSplitsSampled;

        public SplitSampler(int i) {
            this(i, Integer.MAX_VALUE);
        }

        public SplitSampler(int i, int i2) {
            this.numSamples = i;
            this.maxSplitsSampled = i2;
        }

        @Override // org.apache.hadoop.hbase.mapreduce.hadoopbackport.InputSampler.Sampler
        public K[] getSample(InputFormat<K, V> inputFormat, Job job) throws IOException, InterruptedException {
            List splits = inputFormat.getSplits(job);
            ArrayList arrayList = new ArrayList(this.numSamples);
            int min = Math.min(this.maxSplitsSampled, splits.size());
            int i = this.numSamples / min;
            long j = 0;
            for (int i2 = 0; i2 < min; i2++) {
                TaskAttemptContext taskAttemptContext = InputSampler.getTaskAttemptContext(job);
                RecordReader createRecordReader = inputFormat.createRecordReader((InputSplit) splits.get(i2), taskAttemptContext);
                createRecordReader.initialize((InputSplit) splits.get(i2), taskAttemptContext);
                while (createRecordReader.nextKeyValue()) {
                    arrayList.add(ReflectionUtils.copy(job.getConfiguration(), createRecordReader.getCurrentKey(), (Object) null));
                    j++;
                    if ((i2 + 1) * i <= j) {
                        break;
                    }
                }
                createRecordReader.close();
            }
            return (K[]) arrayList.toArray();
        }
    }

    static int printUsage() {
        System.out.println("sampler -r <reduces>\n      [-inFormat <input format class>]\n      [-keyClass <map input & output key class>]\n      [-splitRandom <double pcnt> <numSamples> <maxsplits> |              // Sample from random splits at random (general)\n       -splitSample <numSamples> <maxsplits> |              // Sample from first records in splits (random data)\n       -splitInterval <double pcnt> <maxsplits>]             // Sample from splits at intervals (sorted data)");
        System.out.println("Default sampler: -splitRandom 0.1 10000 10");
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    public InputSampler(Configuration configuration) {
        setConf(configuration);
    }

    public static TaskAttemptContext getTaskAttemptContext(Job job) throws IOException {
        try {
            try {
                return (TaskAttemptContext) TaskAttemptContext.class.getConstructor(Configuration.class, TaskAttemptID.class).newInstance(job.getConfiguration(), new TaskAttemptID());
            } catch (Exception e) {
                throw new IOException("Failed creating instance", e);
            }
        } catch (Exception e2) {
            throw new IOException("Failed getting constructor", e2);
        }
    }

    public static <K, V> void writePartitionFile(Job job, Sampler<K, V> sampler) throws IOException, ClassNotFoundException, InterruptedException {
        Configuration configuration = job.getConfiguration();
        InputFormat<K, V> inputFormat = (InputFormat) ReflectionUtils.newInstance(job.getInputFormatClass(), configuration);
        int numReduceTasks = job.getNumReduceTasks();
        K[] sample = sampler.getSample(inputFormat, job);
        LOG.info("Using " + sample.length + " samples");
        RawComparator sortComparator = job.getSortComparator();
        Arrays.sort(sample, sortComparator);
        Path path = new Path(TotalOrderPartitioner.getPartitionFile(configuration));
        FileSystem fileSystem = path.getFileSystem(configuration);
        if (fileSystem.exists(path)) {
            fileSystem.delete(path, false);
        }
        SequenceFile.Writer createWriter = SequenceFile.createWriter(fileSystem, configuration, path, job.getMapOutputKeyClass(), NullWritable.class);
        NullWritable nullWritable = NullWritable.get();
        float length = sample.length / numReduceTasks;
        int i = -1;
        for (int i2 = 1; i2 < numReduceTasks; i2++) {
            int round = Math.round(length * i2);
            while (i >= round && sortComparator.compare(sample[i], sample[round]) == 0) {
                round++;
            }
            createWriter.append(sample[round], nullWritable);
            i = round;
        }
        createWriter.close();
    }

    public int run(String[] strArr) throws Exception {
        Job job = new Job(getConf());
        ArrayList arrayList = new ArrayList();
        Sampler sampler = null;
        int i = 0;
        while (i < strArr.length) {
            try {
                if ("-r".equals(strArr[i])) {
                    i++;
                    job.setNumReduceTasks(Integer.parseInt(strArr[i]));
                } else if ("-inFormat".equals(strArr[i])) {
                    i++;
                    job.setInputFormatClass(Class.forName(strArr[i]).asSubclass(InputFormat.class));
                } else if ("-keyClass".equals(strArr[i])) {
                    i++;
                    job.setMapOutputKeyClass(Class.forName(strArr[i]).asSubclass(WritableComparable.class));
                } else if ("-splitSample".equals(strArr[i])) {
                    int i2 = i + 1;
                    int parseInt = Integer.parseInt(strArr[i2]);
                    i = i2 + 1;
                    int parseInt2 = Integer.parseInt(strArr[i]);
                    if (0 >= parseInt2) {
                        parseInt2 = Integer.MAX_VALUE;
                    }
                    sampler = new SplitSampler(parseInt, parseInt2);
                } else if ("-splitRandom".equals(strArr[i])) {
                    int i3 = i + 1;
                    double parseDouble = Double.parseDouble(strArr[i3]);
                    int i4 = i3 + 1;
                    int parseInt3 = Integer.parseInt(strArr[i4]);
                    i = i4 + 1;
                    int parseInt4 = Integer.parseInt(strArr[i]);
                    if (0 >= parseInt4) {
                        parseInt4 = Integer.MAX_VALUE;
                    }
                    sampler = new RandomSampler(parseDouble, parseInt3, parseInt4);
                } else if ("-splitInterval".equals(strArr[i])) {
                    int i5 = i + 1;
                    double parseDouble2 = Double.parseDouble(strArr[i5]);
                    i = i5 + 1;
                    int parseInt5 = Integer.parseInt(strArr[i]);
                    if (0 >= parseInt5) {
                        parseInt5 = Integer.MAX_VALUE;
                    }
                    sampler = new IntervalSampler(parseDouble2, parseInt5);
                } else {
                    arrayList.add(strArr[i]);
                }
                i++;
            } catch (ArrayIndexOutOfBoundsException e) {
                System.out.println("ERROR: Required parameter missing from " + strArr[i - 1]);
                return printUsage();
            } catch (NumberFormatException e2) {
                System.out.println("ERROR: Integer expected instead of " + strArr[i]);
                return printUsage();
            }
        }
        if (job.getNumReduceTasks() <= 1) {
            System.err.println("Sampler requires more than one reducer");
            return printUsage();
        }
        if (arrayList.size() < 2) {
            System.out.println("ERROR: Wrong number of parameters: ");
            return printUsage();
        }
        if (null == sampler) {
            sampler = new RandomSampler(0.1d, 10000, 10);
        }
        TotalOrderPartitioner.setPartitionFile(getConf(), new Path((String) arrayList.remove(arrayList.size() - 1)));
        Iterator it = arrayList.iterator();
        while (it.hasNext()) {
            FileInputFormat.addInputPath(job, new Path((String) it.next()));
        }
        writePartitionFile(job, sampler);
        return 0;
    }

    public static void main(String[] strArr) throws Exception {
        System.exit(ToolRunner.run(new InputSampler(new Configuration()), strArr));
    }
}
