package com.twitter.elephantbird.mapreduce.input;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import com.twitter.elephantbird.mapreduce.output.LuceneIndexOutputFormat;
import com.twitter.elephantbird.util.HadoopCompat;
import com.twitter.elephantbird.util.HadoopUtils;
import com.twitter.elephantbird.util.HdfsUtils;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.Arrays;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.PriorityQueue;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.JobContext;

/* loaded from: input_file:com/twitter/elephantbird/mapreduce/input/LuceneIndexInputFormat.class */
public abstract class LuceneIndexInputFormat<T extends Writable> extends InputFormat<IntWritable, T> {
    private static final long DEFAULT_MAX_NUM_INDEXES_PER_SPLIT = 200;
    private static final long DEFAULT_MAX_COMBINED_INDEX_SIZE_PER_SPLIT = 10737418240L;
    private Path[] inputPaths = null;
    private PathFilter indexDirPathFilter = null;
    private long maxCombinedIndexSizePerSplit;
    private long maxNumIndexesPerSplit;
    public static final String QUERIES_KEY = LuceneIndexInputFormat.class.getCanonicalName() + ".queries";
    public static final String INPUT_PATHS_KEY = LuceneIndexInputFormat.class.getCanonicalName() + ".inputpaths";
    public static final String MAX_NUM_INDEXES_PER_SPLIT_KEY = LuceneIndexInputFormat.class.getCanonicalName() + ".max_num_indexes_per_split";
    public static final String MAX_COMBINED_INDEX_SIZE_PER_SPLIT_KEY = LuceneIndexInputFormat.class.getCanonicalName() + ".max_combined_index_size_per_split";
    private static final String[] EMPTY_NODE_ARRAY = new String[0];

    /* loaded from: input_file:com/twitter/elephantbird/mapreduce/input/LuceneIndexInputFormat$LuceneIndexInputSplit.class */
    public static class LuceneIndexInputSplit extends InputSplit implements Writable, Comparable<LuceneIndexInputSplit> {
        private List<Path> indexDirs;
        private Long length;

        public LuceneIndexInputSplit() {
        }

        public LuceneIndexInputSplit(List<Path> list, long j) {
            this.indexDirs = list;
            this.length = Long.valueOf(j);
        }

        public void combine(LuceneIndexInputSplit luceneIndexInputSplit) {
            this.indexDirs.addAll(luceneIndexInputSplit.getIndexDirs());
            this.length = Long.valueOf(this.length.longValue() + luceneIndexInputSplit.getLength());
        }

        public long getLength() {
            return this.length.longValue();
        }

        public String[] getLocations() throws IOException, InterruptedException {
            return LuceneIndexInputFormat.EMPTY_NODE_ARRAY;
        }

        public List<Path> getIndexDirs() {
            return this.indexDirs;
        }

        public void write(DataOutput dataOutput) throws IOException {
            dataOutput.writeLong(this.length.longValue());
            dataOutput.writeInt(this.indexDirs.size());
            Iterator<Path> it = this.indexDirs.iterator();
            while (it.hasNext()) {
                Text.writeString(dataOutput, it.next().toString());
            }
        }

        public void readFields(DataInput dataInput) throws IOException {
            this.length = Long.valueOf(dataInput.readLong());
            int readInt = dataInput.readInt();
            this.indexDirs = Lists.newLinkedList();
            for (int i = 0; i < readInt; i++) {
                this.indexDirs.add(new Path(Text.readString(dataInput)));
            }
        }

        @Override // java.lang.Comparable
        public int compareTo(LuceneIndexInputSplit luceneIndexInputSplit) {
            return this.length.compareTo(Long.valueOf(luceneIndexInputSplit.getLength()));
        }

        public String toString() {
            return "LuceneIndexInputSplit<indexDirs:" + this.indexDirs.toString() + " length:" + this.length + ">";
        }
    }

    public PathFilter getIndexDirPathFilter(Configuration configuration) throws IOException {
        return LuceneIndexOutputFormat.newIndexDirFilter(configuration);
    }

    @VisibleForTesting
    void loadConfig(Configuration configuration) throws IOException {
        this.inputPaths = getInputPaths(configuration);
        this.indexDirPathFilter = (PathFilter) Preconditions.checkNotNull(getIndexDirPathFilter(configuration), "You must provide a non-null PathFilter");
        this.maxCombinedIndexSizePerSplit = ((Long) Preconditions.checkNotNull(Long.valueOf(getMaxCombinedIndexSizePerSplit(configuration)), MAX_COMBINED_INDEX_SIZE_PER_SPLIT_KEY + " cannot be null")).longValue();
        this.maxNumIndexesPerSplit = ((Long) Preconditions.checkNotNull(Long.valueOf(getMaxNumIndexesPerSplit(configuration)), MAX_NUM_INDEXES_PER_SPLIT_KEY + " cannot be null")).longValue();
    }

    public List<InputSplit> getSplits(JobContext jobContext) throws IOException, InterruptedException {
        loadConfig(HadoopCompat.getConfiguration(jobContext));
        return combineSplits(findSplits(HadoopCompat.getConfiguration(jobContext)), this.maxCombinedIndexSizePerSplit, this.maxNumIndexesPerSplit);
    }

    protected PriorityQueue<LuceneIndexInputSplit> findSplits(Configuration configuration) throws IOException {
        PriorityQueue<LuceneIndexInputSplit> priorityQueue = new PriorityQueue<>();
        LinkedList<Path> newLinkedList = Lists.newLinkedList();
        for (Path path : this.inputPaths) {
            HdfsUtils.collectPaths(path, path.getFileSystem(configuration), this.indexDirPathFilter, newLinkedList);
        }
        for (Path path2 : newLinkedList) {
            priorityQueue.add(new LuceneIndexInputSplit(Lists.newLinkedList(Arrays.asList(path2)), HdfsUtils.getDirectorySize(path2, path2.getFileSystem(configuration))));
        }
        return priorityQueue;
    }

    protected List<InputSplit> combineSplits(PriorityQueue<LuceneIndexInputSplit> priorityQueue, long j, long j2) {
        LuceneIndexInputSplit peek;
        LinkedList newLinkedList = Lists.newLinkedList();
        LuceneIndexInputSplit poll = priorityQueue.poll();
        while (true) {
            LuceneIndexInputSplit luceneIndexInputSplit = poll;
            if (luceneIndexInputSplit == null) {
                return newLinkedList;
            }
            while (luceneIndexInputSplit.getLength() < j && (peek = priorityQueue.peek()) != null && luceneIndexInputSplit.getLength() + peek.getLength() <= j && luceneIndexInputSplit.getIndexDirs().size() < j2) {
                luceneIndexInputSplit.combine(peek);
                priorityQueue.poll();
            }
            newLinkedList.add(luceneIndexInputSplit);
            poll = priorityQueue.poll();
        }
    }

    public static void setQueries(List<String> list, Configuration configuration) throws IOException {
        Preconditions.checkNotNull(list);
        Preconditions.checkArgument(!list.isEmpty());
        HadoopUtils.writeStringListToConfAsJson(QUERIES_KEY, list, configuration);
    }

    public static List<String> getQueries(Configuration configuration) throws IOException {
        return (List) Preconditions.checkNotNull(HadoopUtils.readStringListFromConfAsJson(QUERIES_KEY, configuration), "You must call LuceneIndexInputFormat.setQueries()");
    }

    public static boolean queriesSet(Configuration configuration) {
        return configuration.get(QUERIES_KEY) != null;
    }

    public static void setInputPaths(List<Path> list, Configuration configuration) throws IOException {
        Preconditions.checkNotNull(list);
        Preconditions.checkArgument(!list.isEmpty());
        String[] strArr = new String[list.size()];
        int i = 0;
        for (Path path : list) {
            int i2 = i;
            i++;
            strArr[i2] = path.getFileSystem(configuration).makeQualified(path).toString();
        }
        configuration.setStrings(INPUT_PATHS_KEY, strArr);
    }

    public static Path[] getInputPaths(Configuration configuration) {
        String[] strArr = (String[]) Preconditions.checkNotNull(configuration.getStrings(INPUT_PATHS_KEY), "You must call LuceneIndexInputFormat.setInputPaths()");
        Path[] pathArr = new Path[strArr.length];
        for (int i = 0; i < strArr.length; i++) {
            pathArr[i] = new Path(strArr[i]);
        }
        return pathArr;
    }

    public static void setMaxCombinedIndexSizePerSplitBytes(long j, Configuration configuration) {
        configuration.setLong(MAX_COMBINED_INDEX_SIZE_PER_SPLIT_KEY, j);
    }

    public static long getMaxCombinedIndexSizePerSplit(Configuration configuration) {
        return configuration.getLong(MAX_COMBINED_INDEX_SIZE_PER_SPLIT_KEY, DEFAULT_MAX_COMBINED_INDEX_SIZE_PER_SPLIT);
    }

    public static void setMaxNumIndexesPerSplit(long j, Configuration configuration) {
        configuration.setLong(MAX_NUM_INDEXES_PER_SPLIT_KEY, j);
    }

    public static long getMaxNumIndexesPerSplit(Configuration configuration) {
        return configuration.getLong(MAX_NUM_INDEXES_PER_SPLIT_KEY, DEFAULT_MAX_NUM_INDEXES_PER_SPLIT);
    }
}
