package org.apache.gobblin.compaction.dataset;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import java.io.IOException;
import java.net.URI;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.regex.Pattern;
import org.apache.gobblin.compaction.mapreduce.MRCompactor;
import org.apache.gobblin.configuration.State;
import org.apache.gobblin.source.extractor.extract.kafka.ConfigStoreUtils;
import org.apache.gobblin.util.DatasetFilterUtils;
import org.apache.gobblin.util.HadoopUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

/* loaded from: input_file:org/apache/gobblin/compaction/dataset/DatasetsFinder.class */
public abstract class DatasetsFinder implements org.apache.gobblin.dataset.DatasetsFinder<Dataset> {
    public static final double HIGH_PRIORITY = 3.0d;
    public static final double NORMAL_PRIORITY = 2.0d;
    public static final double LOW_PRIORITY = 1.0d;
    public static final String TMP_OUTPUT_SUBDIR = "output";
    protected final State state;
    protected final Configuration conf;
    protected final FileSystem fs;
    protected final String inputDir;
    protected final String destDir;
    protected final String tmpOutputDir;
    protected final List<Pattern> blacklist;
    protected final List<Pattern> whitelist;
    protected final List<Pattern> highPriority;
    protected final List<Pattern> normalPriority;
    protected final boolean recompactDatasets;

    public DatasetsFinder(State state) {
        this(state, getFileSystem(state));
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    @VisibleForTesting
    public DatasetsFinder(State state, FileSystem fileSystem) {
        this.state = state;
        this.conf = HadoopUtils.getConfFromState(state);
        this.fs = fileSystem;
        this.inputDir = getInputDir();
        this.destDir = getDestDir();
        this.tmpOutputDir = getTmpOutputDir();
        this.blacklist = DatasetFilterUtils.getPatternList(state, MRCompactor.COMPACTION_BLACKLIST);
        this.whitelist = DatasetFilterUtils.getPatternList(state, MRCompactor.COMPACTION_WHITELIST);
        setTopicsFromConfigStore(state);
        this.highPriority = getHighPriorityPatterns();
        this.normalPriority = getNormalPriorityPatterns();
        this.recompactDatasets = getRecompactDatasets();
    }

    private void setTopicsFromConfigStore(State state) {
        HashSet hashSet = new HashSet();
        HashSet hashSet2 = new HashSet();
        ConfigStoreUtils.setTopicsFromConfigStore(state.getProperties(), hashSet, hashSet2, MRCompactor.COMPACTION_BLACKLIST, MRCompactor.COMPACTION_WHITELIST);
        this.blacklist.addAll(DatasetFilterUtils.getPatternsFromStrings(new ArrayList(hashSet)));
        this.whitelist.addAll(DatasetFilterUtils.getPatternsFromStrings(new ArrayList(hashSet2)));
    }

    public abstract Set<Dataset> findDistinctDatasets() throws IOException;

    public List<Dataset> findDatasets() throws IOException {
        return Lists.newArrayList(findDistinctDatasets());
    }

    public Path commonDatasetRoot() {
        return new Path(this.destDir);
    }

    private String getInputDir() {
        Preconditions.checkArgument(this.state.contains(MRCompactor.COMPACTION_INPUT_DIR), "Missing required property compaction.input.dir");
        return this.state.getProp(MRCompactor.COMPACTION_INPUT_DIR);
    }

    private String getDestDir() {
        Preconditions.checkArgument(this.state.contains(MRCompactor.COMPACTION_DEST_DIR), "Missing required property compaction.dest.dir");
        return this.state.getProp(MRCompactor.COMPACTION_DEST_DIR);
    }

    private String getTmpOutputDir() {
        return new Path(this.state.getProp(MRCompactor.COMPACTION_TMP_DEST_DIR, MRCompactor.DEFAULT_COMPACTION_TMP_DEST_DIR), TMP_OUTPUT_SUBDIR).toString();
    }

    private static FileSystem getFileSystem(State state) {
        try {
            return state.contains(MRCompactor.COMPACTION_FILE_SYSTEM_URI) ? FileSystem.get(URI.create(state.getProp(MRCompactor.COMPACTION_FILE_SYSTEM_URI)), HadoopUtils.getConfFromState(state)) : FileSystem.get(HadoopUtils.getConfFromState(state));
        } catch (IOException e) {
            throw new RuntimeException("Failed to get filesystem for datasetsFinder.", e);
        }
    }

    private List<Pattern> getHighPriorityPatterns() {
        return DatasetFilterUtils.getPatternsFromStrings(this.state.getPropAsList(MRCompactor.COMPACTION_HIGH_PRIORITY_TOPICS, ""));
    }

    private List<Pattern> getNormalPriorityPatterns() {
        return DatasetFilterUtils.getPatternsFromStrings(this.state.getPropAsList(MRCompactor.COMPACTION_NORMAL_PRIORITY_TOPICS, ""));
    }

    private boolean getRecompactDatasets() {
        return this.state.getPropAsBoolean(MRCompactor.COMPACTION_RECOMPACT_FROM_DEST_PATHS, false);
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public double getDatasetPriority(String str) {
        double d = 1.0d;
        if (DatasetFilterUtils.stringInPatterns(str, this.highPriority)) {
            d = 3.0d;
        } else if (DatasetFilterUtils.stringInPatterns(str, this.normalPriority)) {
            d = 2.0d;
        }
        return d;
    }
}
