package org.apache.gobblin.compaction.dataset;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.Sets;
import java.io.IOException;
import java.util.HashSet;
import java.util.Set;
import org.apache.commons.lang.StringUtils;
import org.apache.gobblin.compaction.dataset.Dataset;
import org.apache.gobblin.compaction.mapreduce.MRCompactor;
import org.apache.gobblin.compaction.mapreduce.test.TestCompactionTaskUtils;
import org.apache.gobblin.configuration.State;
import org.apache.gobblin.util.DatasetFilterUtils;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.joda.time.DateTime;
import org.joda.time.DateTimeZone;
import org.joda.time.format.DateTimeFormat;
import org.joda.time.format.DateTimeFormatter;
import org.joda.time.format.PeriodFormatter;
import org.joda.time.format.PeriodFormatterBuilder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/apache/gobblin/compaction/dataset/TimeBasedSubDirDatasetsFinder.class */
public class TimeBasedSubDirDatasetsFinder extends DatasetsFinder {
    private static final Logger log = LoggerFactory.getLogger(TimeBasedSubDirDatasetsFinder.class);
    private static final String COMPACTION_TIMEBASED_PREFIX = "compaction.timebased.";
    public static final String COMPACTION_TIMEBASED_FOLDER_PATTERN = "compaction.timebased.folder.pattern";
    public static final String DEFAULT_COMPACTION_TIMEBASED_FOLDER_PATTERN = "YYYY/MM/dd";
    public static final String COMPACTION_TIMEBASED_SUBDIR_PATTERN = "compaction.timebased.subdir.pattern";
    public static final String DEFAULT_COMPACTION_TIMEBASED_SUBDIR_PATTERN = "*";
    public static final String COMPACTION_TIMEBASED_MAX_TIME_AGO = "compaction.timebased.max.time.ago";
    public static final String DEFAULT_COMPACTION_TIMEBASED_MAX_TIME_AGO = "3d";
    public static final String COMPACTION_TIMEBASED_MIN_TIME_AGO = "compaction.timebased.min.time.ago";
    public static final String DEFAULT_COMPACTION_TIMEBASED_MIN_TIME_AGO = "1d";
    public static final String MIN_RECOMPACTION_DURATION = "compaction.timebased.min.recompaction.duration";
    public static final String DEFAULT_MIN_RECOMPACTION_DURATION = "0h";
    protected final String folderTimePattern;
    protected final String subDirPattern;
    protected final DateTimeZone timeZone;
    protected final DateTimeFormatter timeFormatter;
    protected final String inputSubDir;
    protected final String inputLateSubDir;
    protected final String destSubDir;
    protected final String destLateSubDir;

    @VisibleForTesting
    public TimeBasedSubDirDatasetsFinder(State state, FileSystem fileSystem) throws Exception {
        super(state, fileSystem);
        this.inputSubDir = getInputSubDir();
        this.inputLateSubDir = getInputLateSubDir();
        this.destSubDir = getDestSubDir();
        this.destLateSubDir = getDestLateSubDir();
        this.folderTimePattern = getFolderPattern();
        this.subDirPattern = getSubDirPattern();
        this.timeZone = DateTimeZone.forID(this.state.getProp(MRCompactor.COMPACTION_TIMEZONE, MRCompactor.DEFAULT_COMPACTION_TIMEZONE));
        this.timeFormatter = DateTimeFormat.forPattern(this.folderTimePattern).withZone(this.timeZone);
    }

    public TimeBasedSubDirDatasetsFinder(State state) throws Exception {
        super(state);
        this.inputSubDir = getInputSubDir();
        this.inputLateSubDir = getInputLateSubDir();
        this.destSubDir = getDestSubDir();
        this.destLateSubDir = getDestLateSubDir();
        this.folderTimePattern = getFolderPattern();
        this.subDirPattern = getSubDirPattern();
        this.timeZone = DateTimeZone.forID(this.state.getProp(MRCompactor.COMPACTION_TIMEZONE, MRCompactor.DEFAULT_COMPACTION_TIMEZONE));
        this.timeFormatter = DateTimeFormat.forPattern(this.folderTimePattern).withZone(this.timeZone);
    }

    protected String getDatasetName(String str, String str2) {
        return StringUtils.removeStart(str.substring(str.indexOf(str2) + str2.length()), TestCompactionTaskUtils.PATH_SEPARATOR);
    }

    @Override // org.apache.gobblin.compaction.dataset.DatasetsFinder
    public Set<Dataset> findDistinctDatasets() throws IOException {
        HashSet newHashSet = Sets.newHashSet();
        for (FileStatus fileStatus : this.fs.globStatus(new Path(this.inputDir, this.subDirPattern))) {
            log.info("Scanning directory : " + fileStatus.getPath().toString());
            if (fileStatus.isDirectory()) {
                String datasetName = getDatasetName(fileStatus.getPath().toString(), this.inputDir);
                if (DatasetFilterUtils.survived(datasetName, this.blacklist, this.whitelist)) {
                    log.info("Found dataset: " + datasetName);
                    Path path = new Path(this.inputDir, new Path(datasetName, this.inputSubDir));
                    Path path2 = new Path(this.inputDir, new Path(datasetName, this.inputLateSubDir));
                    Path path3 = new Path(this.destDir, new Path(datasetName, this.destSubDir));
                    Path path4 = new Path(this.destDir, new Path(datasetName, this.destLateSubDir));
                    Path path5 = new Path(this.tmpOutputDir, new Path(datasetName, this.destSubDir));
                    double datasetPriority = getDatasetPriority(datasetName);
                    for (FileStatus fileStatus2 : this.fs.globStatus(new Path(path, getFolderStructure()))) {
                        Path path6 = fileStatus2.getPath();
                        try {
                            DateTime folderTime = getFolderTime(path6, path);
                            if (folderWithinAllowedPeriod(path6, folderTime)) {
                                Path appendFolderTime = appendFolderTime(path2, folderTime);
                                Path appendFolderTime2 = appendFolderTime(path3, folderTime);
                                Path appendFolderTime3 = appendFolderTime(path4, folderTime);
                                Dataset build = new Dataset.Builder().withPriority(datasetPriority).withDatasetName(datasetName).addInputPath(this.recompactDatasets ? appendFolderTime2 : path6).addInputLatePath(this.recompactDatasets ? appendFolderTime3 : appendFolderTime).withOutputPath(appendFolderTime2).withOutputLatePath(appendFolderTime3).withOutputTmpPath(appendFolderTime(path5, folderTime)).build();
                                build.setJobProp(MRCompactor.COMPACTION_JOB_DEST_PARTITION, folderTime.toString(this.timeFormatter));
                                build.setJobProp(MRCompactor.COMPACTION_INPUT_PATH_TIME, Long.valueOf(folderTime.getMillis()));
                                newHashSet.add(build);
                            }
                        } catch (RuntimeException e) {
                            log.warn("{} is not a valid folder. Will be skipped due to exception.", path6, e);
                        }
                    }
                }
            }
        }
        return newHashSet;
    }

    private String getInputSubDir() {
        return this.state.getProp(MRCompactor.COMPACTION_INPUT_SUBDIR, MRCompactor.DEFAULT_COMPACTION_INPUT_SUBDIR);
    }

    private String getInputLateSubDir() {
        return this.state.getProp(MRCompactor.COMPACTION_INPUT_SUBDIR, MRCompactor.DEFAULT_COMPACTION_INPUT_SUBDIR) + MRCompactor.COMPACTION_LATE_DIR_SUFFIX;
    }

    private String getDestLateSubDir() {
        return this.state.getProp(MRCompactor.COMPACTION_DEST_SUBDIR, MRCompactor.DEFAULT_COMPACTION_DEST_SUBDIR) + MRCompactor.COMPACTION_LATE_DIR_SUFFIX;
    }

    private String getDestSubDir() {
        return this.state.getProp(MRCompactor.COMPACTION_DEST_SUBDIR, MRCompactor.DEFAULT_COMPACTION_DEST_SUBDIR);
    }

    protected String getFolderStructure() {
        return this.folderTimePattern.replaceAll("[a-zA-Z0-9='-]+", DEFAULT_COMPACTION_TIMEBASED_SUBDIR_PATTERN);
    }

    private String getFolderPattern() {
        String prop = this.state.getProp(COMPACTION_TIMEBASED_FOLDER_PATTERN, DEFAULT_COMPACTION_TIMEBASED_FOLDER_PATTERN);
        log.info("Compaction folder pattern: " + prop);
        return prop;
    }

    private String getSubDirPattern() {
        String prop = this.state.getProp(COMPACTION_TIMEBASED_SUBDIR_PATTERN, DEFAULT_COMPACTION_TIMEBASED_SUBDIR_PATTERN);
        log.info("Compaction subdir pattern: " + prop);
        return prop;
    }

    protected DateTime getFolderTime(Path path, Path path2) {
        return this.timeFormatter.parseDateTime(StringUtils.removeStart(path.toString().substring(path.toString().indexOf(path2.toString()) + path2.toString().length()), TestCompactionTaskUtils.PATH_SEPARATOR));
    }

    protected boolean folderWithinAllowedPeriod(Path path, DateTime dateTime) {
        DateTime dateTime2 = new DateTime(this.timeZone);
        PeriodFormatter periodFormatter = getPeriodFormatter();
        DateTime earliestAllowedFolderTime = getEarliestAllowedFolderTime(dateTime2, periodFormatter);
        DateTime latestAllowedFolderTime = getLatestAllowedFolderTime(dateTime2, periodFormatter);
        if (dateTime.isBefore(earliestAllowedFolderTime)) {
            log.info(String.format("Folder time for %s is %s, earlier than the earliest allowed folder time, %s. Skipping", path, dateTime, earliestAllowedFolderTime));
            return false;
        }
        if (!dateTime.isAfter(latestAllowedFolderTime)) {
            return true;
        }
        log.info(String.format("Folder time for %s is %s, later than the latest allowed folder time, %s. Skipping", path, dateTime, latestAllowedFolderTime));
        return false;
    }

    public static PeriodFormatter getPeriodFormatter() {
        return new PeriodFormatterBuilder().appendMonths().appendSuffix("m").appendDays().appendSuffix("d").appendHours().appendSuffix("h").appendMinutes().appendSuffix("min").toFormatter();
    }

    private DateTime getEarliestAllowedFolderTime(DateTime dateTime, PeriodFormatter periodFormatter) {
        return dateTime.minus(periodFormatter.parsePeriod(this.state.getProp(COMPACTION_TIMEBASED_MAX_TIME_AGO, DEFAULT_COMPACTION_TIMEBASED_MAX_TIME_AGO)));
    }

    private DateTime getLatestAllowedFolderTime(DateTime dateTime, PeriodFormatter periodFormatter) {
        return dateTime.minus(periodFormatter.parsePeriod(this.state.getProp(COMPACTION_TIMEBASED_MIN_TIME_AGO, DEFAULT_COMPACTION_TIMEBASED_MIN_TIME_AGO)));
    }

    protected Path appendFolderTime(Path path, DateTime dateTime) {
        return new Path(path, dateTime.toString(this.timeFormatter));
    }
}
