package org.apache.gobblin.source.extractor.hadoop;

import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.apache.gobblin.configuration.SourceState;
import org.apache.gobblin.configuration.State;
import org.apache.gobblin.configuration.WorkUnitState;
import org.apache.gobblin.source.extractor.Extractor;
import org.apache.gobblin.source.extractor.extract.AbstractSource;
import org.apache.gobblin.source.workunit.Extract;
import org.apache.gobblin.source.workunit.WorkUnit;
import org.apache.gobblin.util.HadoopUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.TaskAttemptID;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.util.ReflectionUtils;

/* loaded from: input_file:org/apache/gobblin/source/extractor/hadoop/HadoopFileInputSource.class */
public abstract class HadoopFileInputSource<S, D, K, V> extends AbstractSource<S, D> {
    private static final String HADOOP_SOURCE_KEY_PREFIX = "source.hadoop.";
    public static final String FILE_INPUT_FORMAT_CLASS_KEY = "source.hadoop.file.input.format.class";
    public static final String FILE_SPLITS_DESIRED_KEY = "source.hadoop.file.splits.desired";
    public static final int DEFAULT_FILE_SPLITS_DESIRED = 1;
    public static final String FILE_INPUT_PATHS_KEY = "source.hadoop.file.input.paths";
    public static final String FILE_INPUT_SPLIT_MINSIZE = "source.hadoop.file.input.split.minsize";
    public static final String FILE_INPUT_SPLIT_MAXSIZE = "source.hadoop.file.input.split.maxsize";
    public static final String FILE_INPUT_READ_KEYS_KEY = "source.hadoop.file.read.keys";
    public static final boolean DEFAULT_FILE_INPUT_READ_KEYS = false;
    public static final String FILE_SPLIT_PATH_KEY = "source.hadoop.file.split.path";
    static final String FILE_SPLIT_BYTES_STRING_KEY = "source.hadoop.file.split.bytes.string";

    /* loaded from: input_file:org/apache/gobblin/source/extractor/hadoop/HadoopFileInputSource$DummyTaskAttemptIDFactory.class */
    private static class DummyTaskAttemptIDFactory extends TaskAttemptID {
        private DummyTaskAttemptIDFactory() {
        }

        public static TaskAttemptID newTaskAttemptID() {
            return TaskAttemptID.forName("attempt_" + Long.toString(System.currentTimeMillis()) + "_0_m_0_0");
        }
    }

    public List<WorkUnit> getWorkunits(SourceState sourceState) {
        try {
            Job job = Job.getInstance(new Configuration());
            if (sourceState.contains(FILE_INPUT_PATHS_KEY)) {
                Iterator it = sourceState.getPropAsList(FILE_INPUT_PATHS_KEY).iterator();
                while (it.hasNext()) {
                    FileInputFormat.addInputPath(job, new Path((String) it.next()));
                }
            }
            if (sourceState.contains(FILE_INPUT_SPLIT_MINSIZE)) {
                FileInputFormat.setMinInputSplitSize(job, sourceState.getPropAsLong(FILE_INPUT_SPLIT_MINSIZE));
            }
            if (sourceState.contains(FILE_INPUT_SPLIT_MAXSIZE)) {
                FileInputFormat.setMaxInputSplitSize(job, sourceState.getPropAsLong(FILE_INPUT_SPLIT_MAXSIZE));
            }
            List<FileSplit> splits = getFileInputFormat(sourceState, job.getConfiguration()).getSplits(job);
            if (splits == null || splits.isEmpty()) {
                return ImmutableList.of();
            }
            Extract.TableType valueOf = sourceState.contains("extract.table.type") ? Extract.TableType.valueOf(sourceState.getProp("extract.table.type").toUpperCase()) : null;
            String prop = sourceState.getProp("extract.namespace");
            String prop2 = sourceState.getProp("extract.table.name");
            ArrayList newArrayListWithCapacity = Lists.newArrayListWithCapacity(splits.size());
            for (FileSplit fileSplit : splits) {
                WorkUnit create = WorkUnit.create(createExtract(valueOf, prop, prop2));
                create.setProp(FILE_SPLIT_BYTES_STRING_KEY, HadoopUtils.serializeToString(fileSplit));
                create.setProp(FILE_SPLIT_PATH_KEY, fileSplit.getPath().toString());
                newArrayListWithCapacity.add(create);
            }
            return newArrayListWithCapacity;
        } catch (IOException e) {
            throw new RuntimeException("Failed to get workunits", e);
        }
    }

    public Extractor<S, D> getExtractor(WorkUnitState workUnitState) throws IOException {
        if (!workUnitState.contains(FILE_SPLIT_BYTES_STRING_KEY)) {
            throw new IOException("No serialized FileSplit found in WorkUnitState " + workUnitState.getId());
        }
        Configuration configuration = new Configuration();
        FileInputFormat<K, V> fileInputFormat = getFileInputFormat(workUnitState, configuration);
        FileSplit deserializeFromString = HadoopUtils.deserializeFromString(FileSplit.class, workUnitState.getProp(FILE_SPLIT_BYTES_STRING_KEY));
        TaskAttemptContext taskAttemptContext = getTaskAttemptContext(configuration, DummyTaskAttemptIDFactory.newTaskAttemptID());
        try {
            RecordReader<K, V> createRecordReader = fileInputFormat.createRecordReader(deserializeFromString, taskAttemptContext);
            createRecordReader.initialize(deserializeFromString, taskAttemptContext);
            return getExtractor(workUnitState, createRecordReader, deserializeFromString, workUnitState.getPropAsBoolean(FILE_INPUT_READ_KEYS_KEY, false));
        } catch (InterruptedException e) {
            throw new IOException(e);
        }
    }

    public void shutdown(SourceState sourceState) {
    }

    protected FileInputFormat<K, V> getFileInputFormat(State state, Configuration configuration) {
        Preconditions.checkArgument(state.contains(FILE_INPUT_FORMAT_CLASS_KEY));
        try {
            return (FileInputFormat) ReflectionUtils.newInstance(Class.forName(state.getProp(FILE_INPUT_FORMAT_CLASS_KEY)), configuration);
        } catch (ClassNotFoundException e) {
            throw new RuntimeException(e);
        }
    }

    protected abstract HadoopFileInputExtractor<S, D, K, V> getExtractor(WorkUnitState workUnitState, RecordReader<K, V> recordReader, FileSplit fileSplit, boolean z);

    private static TaskAttemptContext getTaskAttemptContext(Configuration configuration, TaskAttemptID taskAttemptID) {
        try {
            try {
                return (TaskAttemptContext) Class.forName("org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl").getDeclaredConstructor(Configuration.class, TaskAttemptID.class).newInstance(configuration, taskAttemptID);
            } catch (Exception e) {
                throw new RuntimeException(e);
            }
        } catch (ClassNotFoundException e2) {
            throw new RuntimeException(e2);
        }
    }
}
