package org.apache.gobblin.data.management.copy;

import com.google.common.base.Optional;
import com.google.common.collect.Lists;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.gobblin.configuration.SourceState;
import org.apache.gobblin.data.management.retention.profile.ConfigurableGlobDatasetFinder;
import org.apache.gobblin.metrics.event.EventSubmitter;
import org.apache.gobblin.util.PathUtils;
import org.apache.gobblin.util.filters.RegexPathFilter;
import org.apache.gobblin.util.reflection.GobblinConstructorUtils;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/apache/gobblin/data/management/copy/SubsetFilesCopyableDatasetFinder.class */
public class SubsetFilesCopyableDatasetFinder extends ConfigurableGlobDatasetFinder<CopyableDataset> {
    private static final Logger log = LoggerFactory.getLogger(SubsetFilesCopyableDatasetFinder.class);
    public static final String IDENTIFIER_PATTERN = "gobblin.copy.subsetFilesDatasetIdentifer";
    public static final String DEFAULT_IDENTIFIER_PATTERN = "(.*)";
    public static final String SUBSETFILES_REGEX_FILTER = "gobblin.copy.subsetFilesRegexFilter";
    public static final String DEFAULT_SUBSETFILES_REGEX_FILTER = ".*";
    protected final Path rootPath;
    protected Pattern identifierPattern;
    protected PathFilter pathFilter;
    protected final Map<String, List<FileStatus>> idToFileStatuses;
    private Optional<EventSubmitter> eventSubmitter;
    private SourceState state;

    public SubsetFilesCopyableDatasetFinder(FileSystem fileSystem, Properties properties) throws IOException {
        super(fileSystem, properties);
        this.identifierPattern = Pattern.compile(properties.getProperty(IDENTIFIER_PATTERN, DEFAULT_IDENTIFIER_PATTERN));
        this.pathFilter = new RegexPathFilter(properties.getProperty(SUBSETFILES_REGEX_FILTER, DEFAULT_SUBSETFILES_REGEX_FILTER));
        this.rootPath = PathUtils.deepestNonGlobPath(this.datasetPattern);
        this.idToFileStatuses = new HashMap();
    }

    public SubsetFilesCopyableDatasetFinder(FileSystem fileSystem, Properties properties, EventSubmitter eventSubmitter) throws IOException {
        this(fileSystem, properties);
        this.eventSubmitter = Optional.of(eventSubmitter);
    }

    public SubsetFilesCopyableDatasetFinder(FileSystem fileSystem, Properties properties, EventSubmitter eventSubmitter, SourceState sourceState) throws IOException {
        this(fileSystem, properties, eventSubmitter);
        this.state = sourceState;
    }

    @Override // org.apache.gobblin.data.management.retention.profile.ConfigurableGlobDatasetFinder
    public List<CopyableDataset> findDatasets() throws IOException {
        ArrayList newArrayList = Lists.newArrayList();
        for (FileStatus fileStatus : getDatasetDirs()) {
            newArrayList.addAll(generateDatasetsByIdentifier(fileStatus.getPath()));
        }
        return newArrayList;
    }

    public List<CopyableDataset> generateDatasetsByIdentifier(Path path) throws IOException {
        ArrayList newArrayList = Lists.newArrayList();
        for (FileStatus fileStatus : this.fs.listStatus(path, getPathFilter())) {
            Matcher matcher = this.identifierPattern.matcher(fileStatus.getPath().getName().toString());
            if (matcher.find()) {
                String group = matcher.group(1);
                if (this.idToFileStatuses.containsKey(group)) {
                    log.debug("Adding " + fileStatus.getPath() + " to " + group);
                    this.idToFileStatuses.get(group).add(fileStatus);
                } else {
                    ArrayList arrayList = new ArrayList();
                    arrayList.add(fileStatus);
                    log.debug("Adding " + fileStatus.getPath() + " to " + group);
                    this.idToFileStatuses.put(group, arrayList);
                }
            }
        }
        Iterator<String> it = this.idToFileStatuses.keySet().iterator();
        while (it.hasNext()) {
            newArrayList.add(datasetAndPathWithIdentifier(path, it.next()));
        }
        return newArrayList;
    }

    public CopyableDataset datasetAndPathWithIdentifier(Path path, String str) throws IOException {
        try {
            return (CopyableDataset) GobblinConstructorUtils.invokeLongestConstructor(SubsetFilesCopyableDataset.class, new Object[]{this.fs, path, this.props, str, this.idToFileStatuses.get(str), this.eventSubmitter, this.state});
        } catch (ReflectiveOperationException e) {
            throw new IOException(e);
        }
    }

    /* JADX WARN: Can't rename method to resolve collision */
    @Override // org.apache.gobblin.data.management.retention.profile.ConfigurableGlobDatasetFinder
    public CopyableDataset datasetAtPath(Path path) throws IOException {
        throw new IOException("Not supported in " + getClass().getSimpleName());
    }

    public PathFilter getPathFilter() {
        return this.pathFilter;
    }

    public void setPathFilter(PathFilter pathFilter) {
        this.pathFilter = pathFilter;
    }
}
