package org.apache.druid.indexer.hadoop;

import com.fasterxml.jackson.core.type.TypeReference;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Supplier;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.druid.data.input.InputRow;
import org.apache.druid.indexer.HadoopDruidIndexerConfig;
import org.apache.druid.indexer.JobHelper;
import org.apache.druid.java.util.common.ISE;
import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.java.util.common.logger.Logger;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;

/* loaded from: input_file:org/apache/druid/indexer/hadoop/DatasourceInputFormat.class */
public class DatasourceInputFormat extends InputFormat<NullWritable, InputRow> {
    private static final Logger logger = new Logger(DatasourceInputFormat.class);
    private static final String CONF_DATASOURCES = "druid.datasource.input.datasources";
    private static final String CONF_SCHEMA = "druid.datasource.input.schema";
    private static final String CONF_SEGMENTS = "druid.datasource.input.segments";
    private static final String CONF_MAX_SPLIT_SIZE = "druid.datasource.input.split.max.size";
    private Supplier<org.apache.hadoop.mapred.InputFormat> supplier = new Supplier<org.apache.hadoop.mapred.InputFormat>() { // from class: org.apache.druid.indexer.hadoop.DatasourceInputFormat.1
        /* JADX WARN: Can't rename method to resolve collision */
        @Override // com.google.common.base.Supplier
        /* renamed from: get */
        public org.apache.hadoop.mapred.InputFormat get2() {
            return new TextInputFormat() { // from class: org.apache.druid.indexer.hadoop.DatasourceInputFormat.1.1
                protected boolean isSplitable(FileSystem fileSystem, Path path) {
                    return false;
                }

                protected FileStatus[] listStatus(JobConf jobConf) throws IOException {
                    ArrayList arrayList = new ArrayList();
                    for (Path path : FileInputFormat.getInputPaths(jobConf)) {
                        arrayList.add(path.getFileSystem(jobConf).getFileStatus(path));
                    }
                    return (FileStatus[]) arrayList.toArray(new FileStatus[0]);
                }
            };
        }
    };

    public List<InputSplit> getSplits(JobContext jobContext) throws IOException {
        JobConf jobConf = new JobConf(jobContext.getConfiguration());
        List<String> dataSources = getDataSources(jobConf);
        ArrayList arrayList = new ArrayList();
        for (String str : dataSources) {
            List<WindowedDataSegment> segments = getSegments(jobConf, str);
            if (segments == null || segments.size() == 0) {
                throw new ISE("No segments found to read for dataSource[%s]", str);
            }
            for (int i = 0; i < segments.size(); i++) {
                WindowedDataSegment windowedDataSegment = segments.get(i);
                logger.info("Segment %,d/%,d for dataSource[%s] has identifier[%s], interval[%s]", Integer.valueOf(i), Integer.valueOf(segments.size()), str, windowedDataSegment.getSegment().getId(), windowedDataSegment.getInterval());
            }
            long maxSplitSize = getMaxSplitSize(jobConf, str);
            if (maxSplitSize < 0) {
                long j = 0;
                Iterator<WindowedDataSegment> it2 = segments.iterator();
                while (it2.hasNext()) {
                    j += it2.next().getSegment().getSize();
                }
                int numMapTasks = jobConf.getNumMapTasks();
                if (numMapTasks > 0) {
                    maxSplitSize = j / numMapTasks;
                }
            }
            if (maxSplitSize > 0) {
                segments.sort(Comparator.comparingLong(windowedDataSegment2 -> {
                    return windowedDataSegment2.getSegment().getSize();
                }));
            }
            ArrayList arrayList2 = new ArrayList();
            long j2 = 0;
            org.apache.hadoop.mapred.InputFormat inputFormat = this.supplier.get2();
            for (WindowedDataSegment windowedDataSegment3 : segments) {
                if (j2 + windowedDataSegment3.getSegment().getSize() > maxSplitSize && j2 > 0) {
                    arrayList.add(toDataSourceSplit(arrayList2, inputFormat, jobConf));
                    arrayList2 = new ArrayList();
                    j2 = 0;
                }
                arrayList2.add(windowedDataSegment3);
                j2 += windowedDataSegment3.getSegment().getSize();
            }
            if (arrayList2.size() > 0) {
                arrayList.add(toDataSourceSplit(arrayList2, inputFormat, jobConf));
            }
        }
        logger.info("Number of splits [%d]", Integer.valueOf(arrayList.size()));
        return arrayList;
    }

    public RecordReader<NullWritable, InputRow> createRecordReader(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) {
        return new DatasourceRecordReader();
    }

    @VisibleForTesting
    DatasourceInputFormat setSupplier(Supplier<org.apache.hadoop.mapred.InputFormat> supplier) {
        this.supplier = supplier;
        return this;
    }

    private DatasourceInputSplit toDataSourceSplit(List<WindowedDataSegment> list, org.apache.hadoop.mapred.InputFormat inputFormat, JobConf jobConf) {
        return new DatasourceInputSplit(list, getFrequentLocations(getLocations(list, inputFormat, jobConf)));
    }

    @VisibleForTesting
    static Stream<String> getLocations(List<WindowedDataSegment> list, org.apache.hadoop.mapred.InputFormat inputFormat, JobConf jobConf) {
        return ((Stream) list.stream().sequential()).flatMap(windowedDataSegment -> {
            FileInputFormat.setInputPaths(jobConf, new Path[]{new Path(JobHelper.getURIFromSegment(windowedDataSegment.getSegment()))});
            try {
                return Arrays.stream(inputFormat.getSplits(jobConf, 1)).flatMap(inputSplit -> {
                    try {
                        return Arrays.stream(inputSplit.getLocations());
                    } catch (Exception e) {
                        logger.error(e, "Exception getting locations", new Object[0]);
                        return Stream.empty();
                    }
                });
            } catch (Exception e) {
                logger.error(e, "Exception getting splits", new Object[0]);
                return Stream.empty();
            }
        });
    }

    @VisibleForTesting
    static String[] getFrequentLocations(Stream<String> stream) {
        return (String[]) ((Map) stream.collect(Collectors.groupingBy(str -> {
            return str;
        }, Collectors.counting()))).entrySet().stream().sorted(Map.Entry.comparingByValue(Comparator.reverseOrder()).thenComparing(Map.Entry.comparingByKey())).limit(3L).map((v0) -> {
            return v0.getKey();
        }).toArray(i -> {
            return new String[i];
        });
    }

    public static List<String> getDataSources(Configuration configuration) throws IOException {
        String str = configuration.get(CONF_DATASOURCES);
        return str == null ? Collections.emptyList() : (List) HadoopDruidIndexerConfig.JSON_MAPPER.readValue(str, new TypeReference<List<String>>() { // from class: org.apache.druid.indexer.hadoop.DatasourceInputFormat.2
        });
    }

    public static DatasourceIngestionSpec getIngestionSpec(Configuration configuration, String str) throws IOException {
        String str2 = configuration.get(StringUtils.format("%s.%s", CONF_SCHEMA, str));
        if (str2 == null) {
            throw new NullPointerException(StringUtils.format("null spec for dataSource[%s]", str));
        }
        DatasourceIngestionSpec datasourceIngestionSpec = (DatasourceIngestionSpec) HadoopDruidIndexerConfig.JSON_MAPPER.readValue(str2, DatasourceIngestionSpec.class);
        if (datasourceIngestionSpec.getDimensions() == null || datasourceIngestionSpec.getDimensions().size() == 0) {
            throw new ISE("load schema does not have dimensions", new Object[0]);
        }
        if (datasourceIngestionSpec.getMetrics() == null || datasourceIngestionSpec.getMetrics().size() == 0) {
            throw new ISE("load schema does not have metrics", new Object[0]);
        }
        return datasourceIngestionSpec;
    }

    public static List<WindowedDataSegment> getSegments(Configuration configuration, String str) throws IOException {
        return (List) HadoopDruidIndexerConfig.JSON_MAPPER.readValue(configuration.get(StringUtils.format("%s.%s", CONF_SEGMENTS, str)), new TypeReference<List<WindowedDataSegment>>() { // from class: org.apache.druid.indexer.hadoop.DatasourceInputFormat.3
        });
    }

    public static long getMaxSplitSize(Configuration configuration, String str) {
        return configuration.getLong(StringUtils.format("%s.%s", CONF_MAX_SPLIT_SIZE, str), 0L);
    }

    public static void addDataSource(Configuration configuration, DatasourceIngestionSpec datasourceIngestionSpec, List<WindowedDataSegment> list, long j) throws IOException {
        List<String> dataSources = getDataSources(configuration);
        if (dataSources.contains(datasourceIngestionSpec.getDataSource())) {
            throw new ISE("Oops, cannot load the same dataSource twice!", new Object[0]);
        }
        ArrayList arrayList = new ArrayList(dataSources);
        arrayList.add(datasourceIngestionSpec.getDataSource());
        configuration.set(CONF_DATASOURCES, HadoopDruidIndexerConfig.JSON_MAPPER.writeValueAsString(arrayList));
        configuration.set(StringUtils.format("%s.%s", CONF_SCHEMA, datasourceIngestionSpec.getDataSource()), HadoopDruidIndexerConfig.JSON_MAPPER.writeValueAsString(datasourceIngestionSpec));
        configuration.set(StringUtils.format("%s.%s", CONF_SEGMENTS, datasourceIngestionSpec.getDataSource()), HadoopDruidIndexerConfig.JSON_MAPPER.writeValueAsString(list));
        configuration.set(StringUtils.format("%s.%s", CONF_MAX_SPLIT_SIZE, datasourceIngestionSpec.getDataSource()), String.valueOf(j));
    }
}
