package org.apache.gobblin.compaction.mapreduce;

import com.google.common.base.Functions;
import com.google.common.base.Optional;
import com.google.common.base.Preconditions;
import com.google.common.base.Stopwatch;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import com.google.common.io.Closer;
import com.google.common.util.concurrent.FutureCallback;
import com.google.common.util.concurrent.Futures;
import com.google.common.util.concurrent.ListenableFuture;
import java.io.IOException;
import java.lang.reflect.InvocationTargetException;
import java.net.URI;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.PriorityBlockingQueue;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.regex.Pattern;
import org.apache.gobblin.compaction.Compactor;
import org.apache.gobblin.compaction.dataset.Dataset;
import org.apache.gobblin.compaction.dataset.DatasetsFinder;
import org.apache.gobblin.compaction.dataset.TimeBasedSubDirDatasetsFinder;
import org.apache.gobblin.compaction.event.CompactionSlaEventHelper;
import org.apache.gobblin.compaction.listeners.CompactorCompletionListener;
import org.apache.gobblin.compaction.listeners.CompactorCompletionListenerFactory;
import org.apache.gobblin.compaction.listeners.CompactorListener;
import org.apache.gobblin.compaction.mapreduce.MRCompactorJobPropCreator;
import org.apache.gobblin.compaction.mapreduce.MRCompactorJobRunner;
import org.apache.gobblin.compaction.verify.DataCompletenessVerifier;
import org.apache.gobblin.configuration.State;
import org.apache.gobblin.metrics.GobblinMetrics;
import org.apache.gobblin.metrics.MetricContext;
import org.apache.gobblin.metrics.MetricReporterException;
import org.apache.gobblin.metrics.MultiReporterException;
import org.apache.gobblin.metrics.Tag;
import org.apache.gobblin.metrics.event.EventSubmitter;
import org.apache.gobblin.util.ClassAliasResolver;
import org.apache.gobblin.util.ClusterNameTags;
import org.apache.gobblin.util.DatasetFilterUtils;
import org.apache.gobblin.util.ExecutorsUtils;
import org.apache.gobblin.util.FileListUtils;
import org.apache.gobblin.util.HadoopUtils;
import org.apache.gobblin.util.recordcount.CompactionRecordCountProvider;
import org.apache.gobblin.util.recordcount.IngestionRecordCountProvider;
import org.apache.gobblin.util.reflection.GobblinConstructorUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocalFileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.Job;
import org.joda.time.DateTime;
import org.joda.time.DateTimeZone;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

@Deprecated
/* loaded from: input_file:org/apache/gobblin/compaction/mapreduce/MRCompactor.class */
public class MRCompactor implements Compactor {
    public static final String COMPACTION_PREFIX = "compaction.";
    public static final String COMPACTION_THREAD_POOL_SIZE = "compaction.thread.pool.size";
    public static final int DEFAULT_COMPACTION_THREAD_POOL_SIZE = 30;
    public static final String COMPACTION_INPUT_DIR = "compaction.input.dir";
    public static final String COMPACTION_INPUT_SUBDIR = "compaction.input.subdir";
    public static final String DEFAULT_COMPACTION_INPUT_SUBDIR = "hourly";
    public static final String COMPACTION_DEST_DIR = "compaction.dest.dir";
    public static final String COMPACTION_DEST_SUBDIR = "compaction.dest.subdir";
    public static final String DEFAULT_COMPACTION_DEST_SUBDIR = "daily";
    public static final String COMPACTION_TMP_DEST_DIR = "compaction.tmp.dest.dir";
    public static final String DEFAULT_COMPACTION_TMP_DEST_DIR = "/tmp/gobblin-compaction";
    public static final String COMPACTION_JOB_DIR = "compaction.tmp.job.dir";
    public static final String COMPACTION_LATE_DIR_SUFFIX = "_late";
    public static final String COMPACTION_BLACKLIST = "compaction.blacklist";
    public static final String COMPACTION_WHITELIST = "compaction.whitelist";
    public static final String COMPACTION_HIGH_PRIORITY_TOPICS = "compaction.high.priority.topics";
    public static final String COMPACTION_NORMAL_PRIORITY_TOPICS = "compaction.normal.priority.topics";
    public static final String COMPACTION_JOB_RUNNER_CLASS = "compaction.job.runner.class";
    public static final String DEFAULT_COMPACTION_JOB_RUNNER_CLASS = "org.apache.gobblin.compaction.mapreduce.avro.MRCompactorAvroKeyDedupJobRunner";
    public static final String COMPACTION_TIMEZONE = "compaction.timezone";
    public static final String DEFAULT_COMPACTION_TIMEZONE = "America/Los_Angeles";
    public static final String COMPACTION_FILE_SYSTEM_URI = "compaction.file.system.uri";
    public static final String COMPACTION_MR_JOB_TIMEOUT_MINUTES = "compaction.mr.job.timeout.minutes";
    public static final long DEFAULT_COMPACTION_MR_JOB_TIMEOUT_MINUTES = Long.MAX_VALUE;
    public static final String COMPACTION_DATASETS_FINDER = "compaction.datasets.finder";
    public static final String COMPACTION_DATASETS_MAX_COUNT = "compaction.datasets.max.count";
    public static final double DEFUALT_COMPACTION_DATASETS_MAX_COUNT = 1.0E9d;
    public static final String COMPACTION_RENAME_SOURCE_DIR_ENABLED = "compaction.rename.source.dir.enabled";
    public static final boolean DEFAULT_COMPACTION_RENAME_SOURCE_DIR_ENABLED = false;
    public static final String COMPACTION_RENAME_SOURCE_DIR_SUFFIX = "_COMPLETE";
    public static final String COMPACTION_INPUT_RECORD_COUNT_PROVIDER = "compaction.input.record.count.provider";
    public static final String COMPACTION_OUTPUT_RECORD_COUNT_PROVIDER = "compaction.output.record.count.provider";
    public static final String COMPACTION_RECOMPACT_FROM_INPUT_FOR_LATE_DATA = "compaction.recompact.from.input.for.late.data";
    public static final boolean DEFAULT_COMPACTION_RECOMPACT_FROM_INPUT_FOR_LATE_DATA = false;
    public static final String COMPACTION_LATEDATA_THRESHOLD_FOR_RECOMPACT_PER_DATASET = "compaction.latedata.threshold.for.recompact.per.topic";
    public static final double DEFAULT_COMPACTION_LATEDATA_THRESHOLD_FOR_RECOMPACT_PER_DATASET = 1.0d;
    public static final String COMPACTION_LATEDATA_THRESHOLD_FILE_NUM = "compaction.latedata.threshold.file.num";
    public static final int DEFAULT_COMPACTION_LATEDATA_THRESHOLD_FILE_NUM = 1000;
    public static final String COMPACTION_LATEDATA_THRESHOLD_DURATION = "compaction.latedata.threshold.duration";
    public static final String DEFAULT_COMPACTION_LATEDATA_THRESHOLD_DURATION = "24h";
    public static final String COMPACTION_RECOMPACT_CONDITION = "compaction.recompact.condition";
    public static final String DEFAULT_COMPACTION_RECOMPACT_CONDITION = "RecompactBasedOnRatio";
    public static final String COMPACTION_RECOMPACT_COMBINE_CONDITIONS = "compaction.recompact.combine.conditions";
    public static final String COMPACTION_RECOMPACT_COMBINE_CONDITIONS_OPERATION = "compaction.recompact.combine.conditions.operation";
    public static final String DEFAULT_COMPACTION_RECOMPACT_COMBINE_CONDITIONS_OPERATION = "or";
    public static final String COMPACTION_COMPLETE_LISTERNER = "compaction.complete.listener";
    public static final String DEFAULT_COMPACTION_COMPLETE_LISTERNER = "SimpleCompactorCompletionHook";
    public static final String COMPACTION_INPUT_DEDUPLICATED = "compaction.input.deduplicated";
    public static final boolean DEFAULT_COMPACTION_INPUT_DEDUPLICATED = false;
    public static final String COMPACTION_OUTPUT_DEDUPLICATED = "compaction.output.deduplicated";
    public static final boolean DEFAULT_COMPACTION_OUTPUT_DEDUPLICATED = true;
    public static final String COMPACTION_COMPLETENESS_VERIFICATION_PREFIX = "compaction.completeness.verification.";
    public static final String COMPACTION_RECOMPACT_FROM_DEST_PATHS = "compaction.recompact.from.dest.paths";
    public static final String COMPACTION_RECOMPACT_ALL_DATA = "compaction.recompact.all.data";
    public static final boolean DEFAULT_COMPACTION_RECOMPACT_FROM_DEST_PATHS = false;
    public static final boolean DEFAULT_COMPACTION_RECOMPACT_ALL_DATA = true;
    public static final String COMPACTION_COMPLETENESS_VERIFICATION_BLACKLIST = "compaction.completeness.verification.blacklist";
    public static final String COMPACTION_COMPLETENESS_VERIFICATION_WHITELIST = "compaction.completeness.verification.whitelist";
    public static final String COMPACTION_VERIFICATION_TIMEOUT_MINUTES = "compaction.completeness.verification.timeout.minutes";
    public static final long DEFAULT_COMPACTION_VERIFICATION_TIMEOUT_MINUTES = 30;
    public static final String COMPACTION_COMPLETENESS_VERIFICATION_ENABLED = "compaction.completeness.verification.enabled";
    public static final boolean DEFAULT_COMPACTION_COMPLETENESS_VERIFICATION_ENABLED = false;
    public static final String COMPACTION_COMPLETENESS_VERIFICATION_NUM_DATASETS_VERIFIED_TOGETHER = "compaction.completeness.verification.num.datasets.verified.together";
    public static final int DEFAULT_COMPACTION_COMPLETENESS_VERIFICATION_NUM_DATASETS_VERIFIED_TOGETHER = 10;
    public static final String COMPACTION_COMPLETENESS_VERIFICATION_PUBLISH_DATA_IF_CANNOT_VERIFY = "compaction.completeness.verification.publish.data.if.cannot.verify";
    public static final boolean DEFAULT_COMPACTION_COMPLETENESS_VERIFICATION_PUBLISH_DATA_IF_CANNOT_VERIFY = false;
    public static final String COMPACTION_SHOULD_DEDUPLICATE = "compaction.should.deduplicate";
    public static final String COMPACTION_JOB_DEST_PARTITION = "compaction.job.dest.partition";
    public static final String COMPACTION_ENABLE_SUCCESS_FILE = "compaction.fileoutputcommitter.marksuccessfuljobs";
    public static final String COMPACTION_JOB_LATE_DATA_MOVEMENT_TASK = "compaction.job.late.data.movement.task";
    public static final String COMPACTION_JOB_LATE_DATA_FILES = "compaction.job.late.data.files";
    public static final String COMPACTION_COMPLETE_FILE_NAME = "_COMPACTION_COMPLETE";
    public static final String COMPACTION_LATE_FILES_DIRECTORY = "late";
    public static final String COMPACTION_JARS = "compaction.jars";
    public static final String COMPACTION_JAR_SUBDIR = "_gobblin_compaction_jars";
    public static final String COMPACTION_TRACKING_EVENTS_NAMESPACE = "compaction.tracking.events";
    public static final String COMPACTION_INPUT_PATH_TIME = "compaction.input.path.time";
    public static final String COMPACTION_FILE_EXTENSION = "compaction.extension";
    private static final long COMPACTION_JOB_WAIT_INTERVAL_SECONDS = 10;
    private final State state = new State();
    private final List<? extends Tag<?>> tags;
    private final Configuration conf;
    private final String tmpOutputDir;
    private final FileSystem fs;
    private final JobRunnerExecutor jobExecutor;
    private final Set<Dataset> datasets;
    private final Map<Dataset, MRCompactorJobRunner> jobRunnables;
    private final Closer closer;
    private final Optional<DataCompletenessVerifier> verifier;
    private final Stopwatch stopwatch;
    private final GobblinMetrics gobblinMetrics;
    private final EventSubmitter eventSubmitter;
    private final Optional<CompactorListener> compactorListener;
    private final DateTime initilizeTime;
    private final long dataVerifTimeoutMinutes;
    private final long compactionTimeoutMinutes;
    private final boolean shouldVerifDataCompl;
    private final boolean shouldPublishDataIfCannotVerifyCompl;
    private final CompactorCompletionListener compactionCompleteListener;
    private static final Logger LOG = LoggerFactory.getLogger(MRCompactor.class);
    public static final String DEFAULT_COMPACTION_DATASETS_FINDER = TimeBasedSubDirDatasetsFinder.class.getName();
    public static final String DEFAULT_COMPACTION_INPUT_RECORD_COUNT_PROVIDER = IngestionRecordCountProvider.class.getName();
    public static final String DEFAULT_COMPACTION_OUTPUT_RECORD_COUNT_PROVIDER = CompactionRecordCountProvider.class.getName();
    private static final Map<Dataset, Job> RUNNING_MR_JOBS = Maps.newConcurrentMap();

    /* JADX INFO: Access modifiers changed from: package-private */
    /* renamed from: org.apache.gobblin.compaction.mapreduce.MRCompactor$2, reason: invalid class name */
    /* loaded from: input_file:org/apache/gobblin/compaction/mapreduce/MRCompactor$2.class */
    public static /* synthetic */ class AnonymousClass2 {
        static final /* synthetic */ int[] $SwitchMap$org$apache$gobblin$compaction$verify$DataCompletenessVerifier$Results$Result$Status = new int[DataCompletenessVerifier.Results.Result.Status.values().length];

        static {
            try {
                $SwitchMap$org$apache$gobblin$compaction$verify$DataCompletenessVerifier$Results$Result$Status[DataCompletenessVerifier.Results.Result.Status.PASSED.ordinal()] = 1;
            } catch (NoSuchFieldError e) {
            }
            try {
                $SwitchMap$org$apache$gobblin$compaction$verify$DataCompletenessVerifier$Results$Result$Status[DataCompletenessVerifier.Results.Result.Status.FAILED.ordinal()] = 2;
            } catch (NoSuchFieldError e2) {
            }
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:org/apache/gobblin/compaction/mapreduce/MRCompactor$JobRunnerExecutor.class */
    public class JobRunnerExecutor extends ThreadPoolExecutor {
        public JobRunnerExecutor(int i, int i2, long j, TimeUnit timeUnit, BlockingQueue<Runnable> blockingQueue) {
            super(i, i2, j, timeUnit, blockingQueue);
        }

        @Override // java.util.concurrent.ThreadPoolExecutor
        protected void afterExecute(Runnable runnable, Throwable th) {
            Preconditions.checkArgument(runnable instanceof MRCompactorJobRunner, String.format("Runnable expected to be instance of %s, actual %s", MRCompactorJobRunner.class.getSimpleName(), runnable.getClass().getSimpleName()));
            MRCompactorJobRunner mRCompactorJobRunner = (MRCompactorJobRunner) runnable;
            MRCompactor.this.jobRunnables.remove(mRCompactorJobRunner.getDataset());
            if (th == null) {
                if (mRCompactorJobRunner.status() == MRCompactorJobRunner.Status.COMMITTED) {
                    if (mRCompactorJobRunner.getDataset().needToRecompact()) {
                        MRCompactor.modifyDatasetStateToRecompact(mRCompactorJobRunner.getDataset());
                    } else {
                        mRCompactorJobRunner.getDataset().setState(Dataset.DatasetState.COMPACTION_COMPLETE);
                    }
                    if (MRCompactor.this.compactorListener.isPresent()) {
                        try {
                            ((CompactorListener) MRCompactor.this.compactorListener.get()).onDatasetCompactionCompletion(mRCompactorJobRunner.getDataset());
                        } catch (Exception e) {
                            th = e;
                        }
                    }
                } else if (mRCompactorJobRunner.getDataset().state() != Dataset.DatasetState.GIVEN_UP || MRCompactor.this.shouldPublishDataIfCannotVerifyCompl) {
                    mRCompactorJobRunner.getDataset().reducePriority();
                } else {
                    MRCompactor.LOG.info(String.format("Dataset %s will not be compacted, since data completeness cannot be verified", mRCompactorJobRunner.getDataset()));
                    mRCompactorJobRunner.getDataset().setState(Dataset.DatasetState.COMPACTION_COMPLETE);
                }
            }
            if (th != null) {
                afterExecuteWithThrowable(mRCompactorJobRunner, th);
            }
        }

        private void afterExecuteWithThrowable(MRCompactorJobRunner mRCompactorJobRunner, Throwable th) {
            mRCompactorJobRunner.getDataset().skip(th);
        }
    }

    public MRCompactor(Properties properties, List<? extends Tag<?>> list, Optional<CompactorListener> optional) throws IOException {
        this.state.addAll(properties);
        this.initilizeTime = getCurrentTime();
        this.tags = list;
        this.conf = HadoopUtils.getConfFromState(this.state);
        this.tmpOutputDir = getTmpOutputDir();
        this.fs = getFileSystem();
        this.datasets = getDatasetsFinder().findDistinctDatasets();
        this.jobExecutor = createJobExecutor();
        this.jobRunnables = Maps.newConcurrentMap();
        this.closer = Closer.create();
        this.stopwatch = Stopwatch.createStarted();
        this.gobblinMetrics = initializeMetrics();
        this.eventSubmitter = new EventSubmitter.Builder(GobblinMetrics.get(this.state.getProp("job.name")).getMetricContext(), COMPACTION_TRACKING_EVENTS_NAMESPACE).build();
        this.compactorListener = optional;
        this.dataVerifTimeoutMinutes = getDataVerifTimeoutMinutes();
        this.compactionTimeoutMinutes = getCompactionTimeoutMinutes();
        this.shouldVerifDataCompl = shouldVerifyDataCompleteness();
        this.compactionCompleteListener = getCompactionCompleteListener();
        this.verifier = this.shouldVerifDataCompl ? Optional.of(this.closer.register(new DataCompletenessVerifier(this.state))) : Optional.absent();
        this.shouldPublishDataIfCannotVerifyCompl = shouldPublishDataIfCannotVerifyCompl();
    }

    public DateTime getInitializeTime() {
        return this.initilizeTime;
    }

    private String getTmpOutputDir() {
        return this.state.getProp(COMPACTION_TMP_DEST_DIR, DEFAULT_COMPACTION_TMP_DEST_DIR);
    }

    private FileSystem getFileSystem() throws IOException {
        return this.state.contains(COMPACTION_FILE_SYSTEM_URI) ? FileSystem.get(URI.create(this.state.getProp(COMPACTION_FILE_SYSTEM_URI)), this.conf) : FileSystem.get(this.conf);
    }

    private DatasetsFinder getDatasetsFinder() {
        try {
            return (DatasetsFinder) Class.forName(this.state.getProp(COMPACTION_DATASETS_FINDER, DEFAULT_COMPACTION_DATASETS_FINDER)).getConstructor(State.class).newInstance(this.state);
        } catch (Exception e) {
            throw new RuntimeException("Failed to initiailize DatasetsFinder.", e);
        }
    }

    private DateTime getCurrentTime() {
        return new DateTime(DateTimeZone.forID(this.state.getProp(COMPACTION_TIMEZONE, DEFAULT_COMPACTION_TIMEZONE)));
    }

    private JobRunnerExecutor createJobExecutor() {
        int threadPoolSize = getThreadPoolSize();
        return new JobRunnerExecutor(threadPoolSize, threadPoolSize, DEFAULT_COMPACTION_MR_JOB_TIMEOUT_MINUTES, TimeUnit.NANOSECONDS, new PriorityBlockingQueue());
    }

    private int getThreadPoolSize() {
        return this.state.getPropAsInt(COMPACTION_THREAD_POOL_SIZE, 30);
    }

    private GobblinMetrics initializeMetrics() {
        ImmutableList.Builder builder = ImmutableList.builder();
        builder.addAll(this.tags);
        builder.addAll(Tag.fromMap(ClusterNameTags.getClusterNameTags()));
        GobblinMetrics gobblinMetrics = GobblinMetrics.get(this.state.getProp("job.name"), (MetricContext) null, builder.build());
        try {
            gobblinMetrics.startMetricReporting(this.state.getProperties());
        } catch (MultiReporterException e) {
            for (MetricReporterException metricReporterException : e.getExceptions()) {
                LOG.error("Failed to start {} {} reporter.", new Object[]{metricReporterException.getSinkType().name(), metricReporterException.getReporterType().name(), metricReporterException});
            }
        }
        return gobblinMetrics;
    }

    @Override // org.apache.gobblin.compaction.Compactor
    public void compact() throws IOException {
        RuntimeException propagate;
        try {
            try {
                copyDependencyJarsToHdfs();
                processDatasets();
                throwExceptionsIfAnyDatasetCompactionFailed();
                onCompactionCompletion();
                try {
                    shutdownExecutors();
                    this.closer.close();
                } finally {
                }
            } finally {
            }
        } catch (Throwable th) {
            try {
                shutdownExecutors();
                this.closer.close();
                throw th;
            } finally {
            }
        }
    }

    private CompactorCompletionListener getCompactionCompleteListener() {
        try {
            return ((CompactorCompletionListenerFactory) GobblinConstructorUtils.invokeFirstConstructor(new ClassAliasResolver(CompactorCompletionListenerFactory.class).resolveClass(this.state.getProp(COMPACTION_COMPLETE_LISTERNER, DEFAULT_COMPACTION_COMPLETE_LISTERNER)), new List[]{ImmutableList.of()})).createCompactorCompactionListener(this.state);
        } catch (ClassNotFoundException | IllegalAccessException | InstantiationException | NoSuchMethodException | InvocationTargetException e) {
            throw new IllegalArgumentException(e);
        }
    }

    private void onCompactionCompletion() {
        this.compactionCompleteListener.onCompactionCompletion(this);
    }

    private void copyDependencyJarsToHdfs() throws IOException {
        if (this.state.contains("job.jars")) {
            LocalFileSystem local = FileSystem.getLocal(this.conf);
            Path path = new Path(this.tmpOutputDir, COMPACTION_JAR_SUBDIR);
            this.state.setProp(COMPACTION_JARS, path.toString());
            this.fs.delete(path, true);
            Iterator it = this.state.getPropAsList("job.jars").iterator();
            while (it.hasNext()) {
                for (FileStatus fileStatus : local.globStatus(new Path((String) it.next()))) {
                    Path path2 = new Path(this.fs.makeQualified(path), fileStatus.getPath().getName());
                    this.fs.copyFromLocalFile(fileStatus.getPath(), path2);
                    LOG.info(String.format("%s will be added to classpath", path2));
                }
            }
        }
    }

    private void deleteDependencyJars() throws IllegalArgumentException, IOException {
        if (this.state.contains(COMPACTION_JARS)) {
            this.fs.delete(new Path(this.state.getProp(COMPACTION_JARS)), true);
        }
    }

    private void processDatasets() {
        createJobPropsForDatasets();
        processCompactionJobs();
    }

    private void createJobPropsForDatasets() {
        HashSet newHashSet = Sets.newHashSet();
        Iterator<Dataset> it = this.datasets.iterator();
        while (it.hasNext()) {
            newHashSet.addAll(createJobPropsForDataset(it.next()));
        }
        this.datasets.clear();
        this.datasets.addAll(newHashSet);
    }

    private List<Dataset> createJobPropsForDataset(Dataset dataset) {
        List<Dataset> of;
        LOG.info("Creating compaction jobs for dataset " + dataset + " with priority " + dataset.priority());
        MRCompactorJobPropCreator jobPropCreator = getJobPropCreator(dataset);
        try {
            of = jobPropCreator.createJobProps();
        } catch (Throwable th) {
            of = ImmutableList.of(jobPropCreator.createFailedJobProps(th));
        }
        return of;
    }

    MRCompactorJobPropCreator getJobPropCreator(Dataset dataset) {
        try {
            return new MRCompactorJobPropCreator.Builder().withDataset(dataset).withFileSystem(this.fs).withState(this.state).build();
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }

    public Set<Dataset> getDatasets() {
        return this.datasets;
    }

    private void processCompactionJobs() {
        if (this.shouldVerifDataCompl) {
            verifyDataCompleteness();
        } else {
            setAllDatasetStatesToVerified();
        }
        submitCompactionJobsAndWaitForCompletion();
    }

    private boolean shouldVerifyDataCompleteness() {
        return this.state.getPropAsBoolean(COMPACTION_COMPLETENESS_VERIFICATION_ENABLED, false);
    }

    private void verifyDataCompleteness() {
        List<Pattern> patternList = DatasetFilterUtils.getPatternList(this.state, COMPACTION_COMPLETENESS_VERIFICATION_BLACKLIST);
        List<Pattern> patternList2 = DatasetFilterUtils.getPatternList(this.state, COMPACTION_COMPLETENESS_VERIFICATION_WHITELIST);
        int numDatasetsVerifiedTogether = getNumDatasetsVerifiedTogether();
        ArrayList newArrayList = Lists.newArrayList();
        for (Dataset dataset : this.datasets) {
            if (dataset.state() == Dataset.DatasetState.UNVERIFIED) {
                if (shouldVerifyCompletenessForDataset(dataset, patternList, patternList2)) {
                    newArrayList.add(dataset);
                    if (newArrayList.size() >= numDatasetsVerifiedTogether) {
                        addCallback(newArrayList, ((DataCompletenessVerifier) this.verifier.get()).verify(newArrayList));
                        newArrayList = Lists.newArrayList();
                    }
                } else {
                    dataset.setState(Dataset.DatasetState.VERIFIED);
                }
            }
        }
        if (newArrayList.isEmpty()) {
            return;
        }
        addCallback(newArrayList, ((DataCompletenessVerifier) this.verifier.get()).verify(newArrayList));
    }

    private boolean shouldVerifyCompletenessForDataset(Dataset dataset, List<Pattern> list, List<Pattern> list2) {
        boolean propAsBoolean = this.state.getPropAsBoolean(COMPACTION_RENAME_SOURCE_DIR_ENABLED, false);
        LOG.info("Should verify completeness with renaming source dir : " + propAsBoolean);
        return !datasetAlreadyCompacted(this.fs, dataset, propAsBoolean) && DatasetFilterUtils.survived(dataset.getName(), list, list2);
    }

    public static Set<Path> getDeepestLevelRenamedDirsWithFileExistence(FileSystem fileSystem, Set<Path> set) throws IOException {
        HashSet newHashSet = Sets.newHashSet();
        for (FileStatus fileStatus : FileListUtils.listFilesRecursively(fileSystem, set)) {
            if (fileStatus.getPath().getParent().toString().endsWith(COMPACTION_RENAME_SOURCE_DIR_SUFFIX)) {
                newHashSet.add(fileStatus.getPath().getParent());
            }
        }
        return newHashSet;
    }

    public static Set<Path> getDeepestLevelUnrenamedDirsWithFileExistence(FileSystem fileSystem, Set<Path> set) throws IOException {
        HashSet newHashSet = Sets.newHashSet();
        for (FileStatus fileStatus : FileListUtils.listFilesRecursively(fileSystem, set)) {
            if (!fileStatus.getPath().getParent().toString().endsWith(COMPACTION_RENAME_SOURCE_DIR_SUFFIX)) {
                newHashSet.add(fileStatus.getPath().getParent());
            }
        }
        return newHashSet;
    }

    public static void renameSourceDirAsCompactionComplete(FileSystem fileSystem, Dataset dataset) {
        try {
            for (Path path : dataset.getRenamePaths()) {
                Path path2 = new Path(path.getParent(), path.getName() + COMPACTION_RENAME_SOURCE_DIR_SUFFIX);
                LOG.info("[{}] Renaming {} to {}", new Object[]{dataset.getDatasetName(), path, path2});
                fileSystem.rename(path, path2);
            }
        } catch (Exception e) {
            LOG.error("Rename input path failed", e);
        }
    }

    public static boolean datasetAlreadyCompacted(FileSystem fileSystem, Dataset dataset, boolean z) {
        return z ? checkAlreadyCompactedBasedOnSourceDirName(fileSystem, dataset) : checkAlreadyCompactedBasedOnCompletionFile(fileSystem, dataset);
    }

    private static boolean checkAlreadyCompactedBasedOnSourceDirName(FileSystem fileSystem, Dataset dataset) {
        try {
            return !getDeepestLevelRenamedDirsWithFileExistence(fileSystem, dataset.inputPaths()).isEmpty();
        } catch (IOException e) {
            LOG.error("Failed to get deepest directories from source", e);
            return false;
        }
    }

    private static boolean checkAlreadyCompactedBasedOnCompletionFile(FileSystem fileSystem, Dataset dataset) {
        Path path = new Path(dataset.outputPath(), COMPACTION_COMPLETE_FILE_NAME);
        try {
            return fileSystem.exists(path);
        } catch (IOException e) {
            LOG.error("Failed to verify the existence of file " + path, e);
            return false;
        }
    }

    public static long readCompactionTimestamp(FileSystem fileSystem, Path path) throws IOException {
        FSDataInputStream open = fileSystem.open(new Path(path, COMPACTION_COMPLETE_FILE_NAME));
        Throwable th = null;
        try {
            try {
                long readLong = open.readLong();
                if (open != null) {
                    if (0 != 0) {
                        try {
                            open.close();
                        } catch (Throwable th2) {
                            th.addSuppressed(th2);
                        }
                    } else {
                        open.close();
                    }
                }
                return readLong;
            } finally {
            }
        } catch (Throwable th3) {
            if (open != null) {
                if (th != null) {
                    try {
                        open.close();
                    } catch (Throwable th4) {
                        th.addSuppressed(th4);
                    }
                } else {
                    open.close();
                }
            }
            throw th3;
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    public void addCallback(final List<Dataset> list, ListenableFuture<DataCompletenessVerifier.Results> listenableFuture) {
        Futures.addCallback(listenableFuture, new FutureCallback<DataCompletenessVerifier.Results>() { // from class: org.apache.gobblin.compaction.mapreduce.MRCompactor.1
            public void onSuccess(DataCompletenessVerifier.Results results) {
                ArrayList newArrayList = Lists.newArrayList();
                Iterator<DataCompletenessVerifier.Results.Result> it = results.iterator();
                while (it.hasNext()) {
                    DataCompletenessVerifier.Results.Result next = it.next();
                    Optional fromNullable = Optional.fromNullable(MRCompactor.this.jobRunnables.get(next.dataset()));
                    switch (AnonymousClass2.$SwitchMap$org$apache$gobblin$compaction$verify$DataCompletenessVerifier$Results$Result$Status[next.status().ordinal()]) {
                        case 1:
                            MRCompactor.LOG.info("Completeness verification for dataset " + next.dataset() + " passed.");
                            MRCompactor.this.submitVerificationSuccessSlaEvent(next);
                            next.dataset().setState(Dataset.DatasetState.VERIFIED);
                            if (!fromNullable.isPresent()) {
                                break;
                            } else {
                                ((MRCompactorJobRunner) fromNullable.get()).proceed();
                                break;
                            }
                        case 2:
                            if (!MRCompactor.this.shouldGiveUpVerification()) {
                                MRCompactor.LOG.info("Completeness verification for dataset " + next.dataset() + " failed. Will verify again.");
                                newArrayList.add(next.dataset());
                                break;
                            } else {
                                MRCompactor.LOG.info("Completeness verification for dataset " + next.dataset() + " has timed out.");
                                MRCompactor.this.submitVerificationSuccessSlaEvent(next);
                                next.dataset().setState(Dataset.DatasetState.GIVEN_UP);
                                next.dataset().addThrowable(new RuntimeException(String.format("Completeness verification for dataset %s failed or timed out.", next.dataset())));
                                break;
                            }
                        default:
                            throw new IllegalStateException("Unrecognized result status: " + next.status());
                    }
                }
                if (newArrayList.isEmpty()) {
                    return;
                }
                MRCompactor.this.addCallback(newArrayList, ((DataCompletenessVerifier) MRCompactor.this.verifier.get()).verify(newArrayList));
            }

            public void onFailure(Throwable th) {
                MRCompactor.LOG.error("Failed to verify completeness for the following datasets: " + list, th);
                if (!MRCompactor.this.shouldGiveUpVerification()) {
                    MRCompactor.this.addCallback(list, ((DataCompletenessVerifier) MRCompactor.this.verifier.get()).verify(list));
                    return;
                }
                for (Dataset dataset : list) {
                    MRCompactor.LOG.warn(String.format("Completeness verification for dataset %s has timed out.", dataset));
                    MRCompactor.this.submitFailureSlaEvent(dataset, CompactionSlaEventHelper.COMPLETION_VERIFICATION_FAILED_EVENT_NAME);
                    dataset.setState(Dataset.DatasetState.GIVEN_UP);
                    dataset.addThrowable(new RuntimeException(String.format("Completeness verification for dataset %s failed or timed out.", dataset)));
                }
            }
        });
    }

    private int getNumDatasetsVerifiedTogether() {
        return this.state.getPropAsInt(COMPACTION_COMPLETENESS_VERIFICATION_NUM_DATASETS_VERIFIED_TOGETHER, 10);
    }

    private void setAllDatasetStatesToVerified() {
        Iterator<Dataset> it = this.datasets.iterator();
        while (it.hasNext()) {
            it.next().compareAndSetState(Dataset.DatasetState.UNVERIFIED, Dataset.DatasetState.VERIFIED);
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    public boolean shouldGiveUpVerification() {
        return this.stopwatch.elapsed(TimeUnit.MINUTES) >= this.dataVerifTimeoutMinutes;
    }

    private boolean shouldPublishDataIfCannotVerifyCompl() {
        return this.state.getPropAsBoolean(COMPACTION_COMPLETENESS_VERIFICATION_PUBLISH_DATA_IF_CANNOT_VERIFY, false);
    }

    private void submitCompactionJobsAndWaitForCompletion() {
        LOG.info("Submitting compaction jobs. Number of datasets: " + this.datasets.size());
        boolean z = false;
        while (!z) {
            z = true;
            for (Dataset dataset : this.datasets) {
                MRCompactorJobRunner mRCompactorJobRunner = this.jobRunnables.get(dataset);
                if (dataset.state() == Dataset.DatasetState.VERIFIED || dataset.state() == Dataset.DatasetState.UNVERIFIED) {
                    z = false;
                    if (mRCompactorJobRunner == null || mRCompactorJobRunner.status() == MRCompactorJobRunner.Status.ABORTED) {
                        runCompactionForDataset(dataset, dataset.state() == Dataset.DatasetState.VERIFIED);
                    }
                } else if (dataset.state() == Dataset.DatasetState.GIVEN_UP) {
                    if (this.shouldPublishDataIfCannotVerifyCompl) {
                        z = false;
                        if (mRCompactorJobRunner == null || mRCompactorJobRunner.status() == MRCompactorJobRunner.Status.ABORTED) {
                            runCompactionForDataset(dataset, true);
                        } else {
                            mRCompactorJobRunner.proceed();
                        }
                    } else if (mRCompactorJobRunner != null) {
                        mRCompactorJobRunner.abort();
                    }
                }
            }
            if (this.stopwatch.elapsed(TimeUnit.MINUTES) >= this.compactionTimeoutMinutes) {
                LOG.error("Compaction timed-out. Killing all running jobs");
                Iterator<MRCompactorJobRunner> it = this.jobRunnables.values().iterator();
                while (it.hasNext()) {
                    it.next().abort();
                }
                return;
            }
            try {
                Thread.sleep(TimeUnit.SECONDS.toMillis(COMPACTION_JOB_WAIT_INTERVAL_SECONDS));
            } catch (InterruptedException e) {
                Thread.currentThread().interrupt();
                throw new RuntimeException("Interrupted while waiting", e);
            }
        }
    }

    private void runCompactionForDataset(Dataset dataset, boolean z) {
        LOG.info("Running compaction for dataset " + dataset);
        try {
            MRCompactorJobRunner mRCompactorJobRunner = getMRCompactorJobRunner(dataset);
            this.jobRunnables.put(dataset, mRCompactorJobRunner);
            if (z) {
                mRCompactorJobRunner.proceed();
            }
            this.jobExecutor.execute(mRCompactorJobRunner);
        } catch (Throwable th) {
            dataset.skip(th);
        }
    }

    private MRCompactorJobRunner getMRCompactorJobRunner(Dataset dataset) {
        try {
            return (MRCompactorJobRunner) Class.forName(this.state.getProp(COMPACTION_JOB_RUNNER_CLASS, DEFAULT_COMPACTION_JOB_RUNNER_CLASS)).getDeclaredConstructor(Dataset.class, FileSystem.class).newInstance(dataset, this.fs);
        } catch (Exception e) {
            throw new RuntimeException("Cannot instantiate MRCompactorJobRunner", e);
        }
    }

    public static void addRunningHadoopJob(Dataset dataset, Job job) {
        RUNNING_MR_JOBS.put(dataset, job);
    }

    private long getCompactionTimeoutMinutes() {
        return this.state.getPropAsLong(COMPACTION_MR_JOB_TIMEOUT_MINUTES, DEFAULT_COMPACTION_MR_JOB_TIMEOUT_MINUTES);
    }

    private long getDataVerifTimeoutMinutes() {
        return this.state.getPropAsLong(COMPACTION_VERIFICATION_TIMEOUT_MINUTES, 30L);
    }

    private void throwExceptionsIfAnyDatasetCompactionFailed() {
        int i = 0;
        for (Dataset dataset : getDatasetsWithThrowables()) {
            i++;
            Iterator<Throwable> it = dataset.throwables().iterator();
            while (it.hasNext()) {
                LOG.error("Error processing dataset " + dataset, it.next());
                submitFailureSlaEvent(dataset, CompactionSlaEventHelper.COMPACTION_FAILED_EVENT_NAME);
            }
        }
        if (i > 0) {
            throw new RuntimeException(String.format("Failed to process %d datasets.", Integer.valueOf(i)));
        }
    }

    private Set<Dataset> getDatasetsWithThrowables() {
        HashSet newHashSet = Sets.newHashSet();
        for (Dataset dataset : this.datasets) {
            if (!dataset.throwables().isEmpty()) {
                newHashSet.add(dataset);
            }
        }
        return newHashSet;
    }

    private void shutdownExecutors() {
        LOG.info("Shutting down Executors");
        ExecutorsUtils.shutdownExecutorService(this.jobExecutor, Optional.of(LOG));
    }

    @Override // org.apache.gobblin.compaction.Compactor
    public void cancel() throws IOException {
        try {
            for (Map.Entry<Dataset, Job> entry : RUNNING_MR_JOBS.entrySet()) {
                Job value = entry.getValue();
                if (!value.isComplete()) {
                    LOG.info(String.format("Killing hadoop job %s for dataset %s", value.getJobID(), entry.getKey()));
                    value.killJob();
                }
            }
            try {
                ExecutorsUtils.shutdownExecutorService(this.jobExecutor, Optional.of(LOG), 0L, TimeUnit.NANOSECONDS);
                if (this.verifier.isPresent()) {
                    ((DataCompletenessVerifier) this.verifier.get()).closeNow();
                }
            } finally {
            }
        } catch (Throwable th) {
            try {
                ExecutorsUtils.shutdownExecutorService(this.jobExecutor, Optional.of(LOG), 0L, TimeUnit.NANOSECONDS);
                if (this.verifier.isPresent()) {
                    ((DataCompletenessVerifier) this.verifier.get()).closeNow();
                }
                throw th;
            } finally {
            }
        }
    }

    public static void modifyDatasetStateToRecompact(Dataset dataset) {
        LOG.info("{} changes to recompact mode", dataset.getDatasetName());
        State state = new State();
        state.setProp(COMPACTION_RECOMPACT_FROM_DEST_PATHS, Boolean.TRUE);
        state.setProp(COMPACTION_JOB_LATE_DATA_MOVEMENT_TASK, Boolean.FALSE);
        dataset.modifyDatasetForRecompact(state);
        dataset.setState(Dataset.DatasetState.VERIFIED);
    }

    /* JADX INFO: Access modifiers changed from: private */
    public void submitVerificationSuccessSlaEvent(DataCompletenessVerifier.Results.Result result) {
        try {
            CompactionSlaEventHelper.getEventSubmitterBuilder(result.dataset(), Optional.absent(), this.fs).eventSubmitter(this.eventSubmitter).eventName(CompactionSlaEventHelper.COMPLETION_VERIFICATION_SUCCESS_EVENT_NAME).additionalMetadata(Maps.transformValues(result.verificationContext(), Functions.toStringFunction())).build().submit();
        } catch (Throwable th) {
            LOG.warn("Failed to submit verification success event:" + th, th);
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    public void submitFailureSlaEvent(Dataset dataset, String str) {
        try {
            CompactionSlaEventHelper.getEventSubmitterBuilder(dataset, Optional.absent(), this.fs).eventSubmitter(this.eventSubmitter).eventName(str).build().submit();
        } catch (Throwable th) {
            LOG.warn("Failed to submit failure sla event:" + th, th);
        }
    }
}
