package org.apache.iceberg.mr.hive.actions;

import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.net.URI;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.function.Consumer;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.apache.iceberg.DataTask;
import org.apache.iceberg.FileScanTask;
import org.apache.iceberg.HasTableOperations;
import org.apache.iceberg.ManifestFile;
import org.apache.iceberg.MetadataTableType;
import org.apache.iceberg.MetadataTableUtils;
import org.apache.iceberg.ReachableFileUtil;
import org.apache.iceberg.Snapshot;
import org.apache.iceberg.StructLike;
import org.apache.iceberg.Table;
import org.apache.iceberg.actions.DeleteOrphanFiles;
import org.apache.iceberg.io.CloseableIterable;
import org.apache.iceberg.io.CloseableIterator;
import org.apache.iceberg.relocated.com.google.common.collect.Iterables;
import org.apache.iceberg.relocated.com.google.common.collect.Sets;
import org.apache.iceberg.relocated.com.google.common.util.concurrent.MoreExecutors;
import org.apache.iceberg.util.Tasks;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/apache/iceberg/mr/hive/actions/HiveIcebergDeleteOrphanFiles.class */
public class HiveIcebergDeleteOrphanFiles implements DeleteOrphanFiles {
    public static final String METADATA_FOLDER_NAME = "metadata";
    public static final String DATA_FOLDER_NAME = "data";
    private static final Logger LOG;
    private String tableLocation;
    private Consumer<String> deleteFunc;
    private final Configuration conf;
    private final Table table;
    static final /* synthetic */ boolean $assertionsDisabled;
    private long olderThanTimestamp = System.currentTimeMillis() - TimeUnit.DAYS.toMillis(3);
    private ExecutorService deleteExecutorService = MoreExecutors.newDirectExecutorService();

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:org/apache/iceberg/mr/hive/actions/HiveIcebergDeleteOrphanFiles$HiveIcebergDeleteOrphanFilesResult.class */
    public static class HiveIcebergDeleteOrphanFilesResult implements DeleteOrphanFiles.Result {
        private final Set<String> deletedFiles = Sets.newHashSet();

        HiveIcebergDeleteOrphanFilesResult() {
        }

        @Override // org.apache.iceberg.actions.DeleteOrphanFiles.Result
        public Iterable<String> orphanFileLocations() {
            return this.deletedFiles;
        }

        public void addDeletedFiles(Set<String> set) {
            this.deletedFiles.addAll(set);
        }
    }

    public HiveIcebergDeleteOrphanFiles(Configuration configuration, Table table) {
        this.conf = configuration;
        this.table = table;
        this.deleteFunc = str -> {
            table.io().deleteFile(str);
        };
        this.tableLocation = table.location();
    }

    @Override // org.apache.iceberg.actions.DeleteOrphanFiles
    public HiveIcebergDeleteOrphanFiles location(String str) {
        this.tableLocation = str;
        return this;
    }

    @Override // org.apache.iceberg.actions.DeleteOrphanFiles
    public HiveIcebergDeleteOrphanFiles olderThan(long j) {
        this.olderThanTimestamp = j;
        return this;
    }

    @Override // org.apache.iceberg.actions.DeleteOrphanFiles
    public HiveIcebergDeleteOrphanFiles deleteWith(Consumer<String> consumer) {
        this.deleteFunc = consumer;
        return this;
    }

    @Override // org.apache.iceberg.actions.DeleteOrphanFiles
    public HiveIcebergDeleteOrphanFiles executeDeleteWith(ExecutorService executorService) {
        this.deleteExecutorService = executorService;
        return this;
    }

    /* JADX WARN: Can't rename method to resolve collision */
    @Override // org.apache.iceberg.actions.Action
    public DeleteOrphanFiles.Result execute() {
        LOG.info("Cleaning orphan files for {}", this.table.name());
        HiveIcebergDeleteOrphanFilesResult hiveIcebergDeleteOrphanFilesResult = new HiveIcebergDeleteOrphanFilesResult();
        hiveIcebergDeleteOrphanFilesResult.addDeletedFiles(cleanContentFiles(this.olderThanTimestamp));
        hiveIcebergDeleteOrphanFilesResult.addDeletedFiles(cleanMetadata(this.olderThanTimestamp));
        LOG.debug("Deleting {} files while cleaning orphan files for {}", Integer.valueOf(hiveIcebergDeleteOrphanFilesResult.deletedFiles.size()), this.table.name());
        Tasks.Builder onFailure = Tasks.foreach(hiveIcebergDeleteOrphanFilesResult.deletedFiles).executeWith(this.deleteExecutorService).retry(3).stopRetryOn(FileNotFoundException.class).suppressFailureWhenFinished().onFailure((str, exc) -> {
            LOG.warn("Delete failed for file: {}", str, exc);
        });
        Consumer<String> consumer = this.deleteFunc;
        consumer.getClass();
        onFailure.run((v1) -> {
            r1.accept(v1);
        });
        return hiveIcebergDeleteOrphanFilesResult;
    }

    private Set<String> cleanContentFiles(long j) {
        Sets.SetView union = Sets.union(getAllContentFilePath(), getAllStatisticsFilePath(this.table));
        LOG.debug("Valid content file for {} are {}", this.table.name(), Integer.valueOf(union.size()));
        try {
            return getFilesToBeDeleted(j, union, new Path(this.tableLocation, DATA_FOLDER_NAME));
        } catch (IOException e) {
            throw new UncheckedIOException(e);
        }
    }

    public Set<String> getAllContentFilePath() {
        HashSet newHashSet = Sets.newHashSet();
        CloseableIterator it = CloseableIterable.concat(entriesOfManifest(getMetadataTable().newScan().planFiles())).iterator();
        while (it.hasNext()) {
            newHashSet.add(getUriPath((String) ((StructLike) ((StructLike) it.next()).get(4, StructLike.class)).get(1, String.class)));
        }
        return newHashSet;
    }

    private Iterable<CloseableIterable<StructLike>> entriesOfManifest(CloseableIterable<FileScanTask> closeableIterable) {
        return Iterables.transform(closeableIterable, fileScanTask -> {
            if ($assertionsDisabled || fileScanTask != null) {
                return ((DataTask) fileScanTask).rows();
            }
            throw new AssertionError();
        });
    }

    public static Set<String> getAllStatisticsFilePath(Table table) {
        return (Set) ReachableFileUtil.statisticsFilesLocations(table).stream().map(HiveIcebergDeleteOrphanFiles::getUriPath).collect(Collectors.toSet());
    }

    protected Set<String> cleanMetadata(long j) {
        LOG.info("{} start clean metadata files", this.table.name());
        try {
            Set<String> validMetadataFiles = getValidMetadataFiles(this.table);
            LOG.debug("Valid metadata files for {} are {}", this.table.name(), validMetadataFiles);
            return getFilesToBeDeleted(j, validMetadataFiles, new Path(this.tableLocation, METADATA_FOLDER_NAME));
        } catch (IOException e) {
            throw new UncheckedIOException(e);
        }
    }

    private Set<String> getFilesToBeDeleted(long j, Set<String> set, Path path) throws IOException {
        HashSet newHashSet = Sets.newHashSet();
        RemoteIterator listFiles = path.getFileSystem(this.conf).listFiles(path, true);
        while (listFiles.hasNext()) {
            LocatedFileStatus locatedFileStatus = (LocatedFileStatus) listFiles.next();
            if (locatedFileStatus.getModificationTime() < j && !set.contains(getUriPath(locatedFileStatus.getPath().toString()))) {
                newHashSet.add(locatedFileStatus.getPath().toString());
            }
        }
        return newHashSet;
    }

    private Table getMetadataTable() {
        return MetadataTableUtils.createMetadataTableInstance(((HasTableOperations) this.table).operations(), this.table.name(), this.table.name() + "#" + MetadataTableType.ALL_ENTRIES.name(), MetadataTableType.ALL_ENTRIES);
    }

    private static Set<String> getValidMetadataFiles(Table table) {
        HashSet newHashSet = Sets.newHashSet();
        for (Snapshot snapshot : table.snapshots()) {
            newHashSet.add(getUriPath(snapshot.manifestListLocation()));
            Iterator<ManifestFile> it = snapshot.allManifests(table.io()).iterator();
            while (it.hasNext()) {
                newHashSet.add(getUriPath(it.next().path()));
            }
        }
        Stream map = ((Stream) Stream.of((Object[]) new Stream[]{ReachableFileUtil.metadataFileLocations(table, false).stream(), ReachableFileUtil.statisticsFilesLocations(table).stream(), Stream.of(ReachableFileUtil.versionHintLocation(table))}).reduce(Stream::concat).orElse(Stream.empty())).map(HiveIcebergDeleteOrphanFiles::getUriPath);
        newHashSet.getClass();
        map.forEach((v1) -> {
            r1.add(v1);
        });
        return newHashSet;
    }

    private static String getUriPath(String str) {
        return URI.create(str).getPath();
    }

    @Override // org.apache.iceberg.actions.DeleteOrphanFiles
    public /* bridge */ /* synthetic */ DeleteOrphanFiles deleteWith(Consumer consumer) {
        return deleteWith((Consumer<String>) consumer);
    }

    static {
        $assertionsDisabled = !HiveIcebergDeleteOrphanFiles.class.desiredAssertionStatus();
        LOG = LoggerFactory.getLogger(HiveIcebergDeleteOrphanFiles.class);
    }
}
