package org.apache.kylin.rest.job;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
import java.util.stream.Collectors;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.kylin.common.KylinConfig;
import org.apache.kylin.common.util.AbstractApplication;
import org.apache.kylin.common.util.HadoopUtil;
import org.apache.kylin.common.util.OptionsHelper;
import org.apache.kylin.cube.CubeInstance;
import org.apache.kylin.cube.CubeManager;
import org.apache.kylin.cube.CubeSegment;
import org.apache.kylin.cube.model.DictionaryDesc;
import org.apache.kylin.job.execution.ExecutableManager;
import org.apache.kylin.metadata.project.ProjectManager;
import org.apache.tools.ant.taskdefs.optional.j2ee.HotDeploymentTool;
import org.joda.time.DateTimeConstants;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:WEB-INF/lib/kylin-server-base-4.0.3.jar:org/apache/kylin/rest/job/StorageCleanupJob.class */
public class StorageCleanupJob extends AbstractApplication {
    private static final Logger logger = LoggerFactory.getLogger((Class<?>) StorageCleanupJob.class);
    public static final int DEFAULT_CLEANUP_HOUR_THRESHOLD = 168;
    public static final boolean DEFAULT_CLEANUP_DICT = true;
    public static final boolean DEFAULT_CLEANUP_SNAPSHOT = true;
    public static final boolean DEFAULT_CLEANUP_JOB_TMP = false;
    public static final boolean DEFAULT_CLEANUP = false;
    private static final String GLOBAL_DICT_PREFIX = "/dict/global_dict/";
    private static final String TABLE_SNAPSHOT_PREFIX = "/table_snapshot/";
    protected static final Option OPTION_HELP;
    protected static final Option OPTION_DELETE;
    protected static final Option OPTION_CLEANUP_TABLE_SNAPSHOT;
    protected static final Option OPTION_CLEANUP_GLOBAL_DICT;
    protected static final Option OPTION_CLEANUP_JOB_TMP;
    protected static final Option OPTION_CLEANUP_THRESHOLD_HOUR;
    protected final KylinConfig config;
    protected final FileSystem fs;
    protected final ExecutableManager executableManager;
    protected boolean delete;
    protected boolean cleanupTableSnapshot;
    protected boolean cleanupGlobalDict;
    protected boolean cleanupJobTmp;
    protected int cleanupThreshold;
    protected long storageTimeCut;
    protected static final List<String> protectedDir;
    protected static PathFilter pathFilter;

    public StorageCleanupJob() throws IOException {
        this(KylinConfig.getInstanceFromEnv(), HadoopUtil.getWorkingFileSystem(HadoopUtil.getCurrentConfiguration()));
    }

    public StorageCleanupJob(KylinConfig kylinConfig, FileSystem fileSystem) {
        this.delete = false;
        this.cleanupTableSnapshot = true;
        this.cleanupGlobalDict = true;
        this.cleanupJobTmp = false;
        this.cleanupThreshold = 168;
        this.config = kylinConfig;
        this.fs = fileSystem;
        this.executableManager = ExecutableManager.getInstance(kylinConfig);
    }

    @Override // org.apache.kylin.common.util.AbstractApplication
    protected Options getOptions() {
        Options options = new Options();
        options.addOption(OPTION_HELP);
        options.addOption(OPTION_DELETE);
        options.addOption(OPTION_CLEANUP_GLOBAL_DICT);
        options.addOption(OPTION_CLEANUP_TABLE_SNAPSHOT);
        options.addOption(OPTION_CLEANUP_JOB_TMP);
        options.addOption(OPTION_CLEANUP_THRESHOLD_HOUR);
        return options;
    }

    @Override // org.apache.kylin.common.util.AbstractApplication
    protected void execute(OptionsHelper optionsHelper) throws Exception {
        logger.info("options: '" + optionsHelper.getOptionsAsString() + "'");
        this.delete = Boolean.parseBoolean(optionsHelper.getOptionValue(OPTION_DELETE));
        if (optionsHelper.hasOption(OPTION_CLEANUP_TABLE_SNAPSHOT)) {
            this.cleanupTableSnapshot = Boolean.parseBoolean(optionsHelper.getOptionValue(OPTION_CLEANUP_TABLE_SNAPSHOT));
        }
        if (optionsHelper.hasOption(OPTION_CLEANUP_GLOBAL_DICT)) {
            this.cleanupGlobalDict = Boolean.parseBoolean(optionsHelper.getOptionValue(OPTION_CLEANUP_GLOBAL_DICT));
        }
        if (optionsHelper.hasOption(OPTION_CLEANUP_JOB_TMP)) {
            this.cleanupJobTmp = Boolean.parseBoolean(optionsHelper.getOptionValue(OPTION_CLEANUP_JOB_TMP));
        }
        if (optionsHelper.hasOption(OPTION_CLEANUP_THRESHOLD_HOUR)) {
            this.cleanupThreshold = Integer.parseInt(optionsHelper.getOptionValue(OPTION_CLEANUP_THRESHOLD_HOUR));
        }
        this.storageTimeCut = System.currentTimeMillis() - ((this.cleanupThreshold * DateTimeConstants.SECONDS_PER_HOUR) * 1000);
        logger.info("===================================================================\ndelete : {}; cleanupTableSnapshot : {}; cleanupGlobalDict : {}; cleanupJobTmp : {}; cleanBeforeDate : {}.", Boolean.valueOf(this.delete), Boolean.valueOf(this.cleanupTableSnapshot), Boolean.valueOf(this.cleanupGlobalDict), Boolean.valueOf(this.cleanupJobTmp), new Date(this.storageTimeCut));
        cleanup();
    }

    public void cleanup() throws Exception {
        FileStatus[] listStatus;
        ProjectManager projectManager = ProjectManager.getInstance(this.config);
        CubeManager cubeManager = CubeManager.getInstance(this.config);
        List list = (List) projectManager.listAllProjects().stream().map((v0) -> {
            return v0.getName();
        }).collect(Collectors.toList());
        logger.info("Start to clean up unreferenced projects and cubes ...");
        List<CubeInstance> listAllCubes = cubeManager.listAllCubes();
        Path path = new Path(this.config.getHdfsWorkingDirectory());
        if (this.fs.exists(path) && (listStatus = this.fs.listStatus(path, pathFilter)) != null) {
            for (FileStatus fileStatus : listStatus) {
                if (eligibleStorage(fileStatus)) {
                    String name = fileStatus.getPath().getName();
                    if (list.contains(name)) {
                        cleanupGlobalDict(name, (List) listAllCubes.stream().filter(cubeInstance -> {
                            return name.equals(cubeInstance.getProject());
                        }).collect(Collectors.toList()));
                        cleanupTableSnapshot(name, (List) listAllCubes.stream().filter(cubeInstance2 -> {
                            return name.equals(cubeInstance2.getProject());
                        }).collect(Collectors.toList()));
                        cleanupDeletedCubes(name, (List) listAllCubes.stream().map((v0) -> {
                            return v0.getName();
                        }).collect(Collectors.toList()));
                    } else {
                        deleteOp(fileStatus.getPath(), StorageCleanType.PROJECT_DIR);
                    }
                }
            }
        }
        logger.info("Start to clean up no unreferenced segments ...");
        for (CubeInstance cubeInstance3 : listAllCubes) {
            List list2 = (List) cubeInstance3.getSegments().stream().map(cubeSegment -> {
                return cubeSegment.getName() + "_" + cubeSegment.getStorageLocationIdentifier();
            }).collect(Collectors.toList());
            Path path2 = new Path(this.config.getHdfsWorkingDirectory(cubeInstance3.getProject()) + "/parquet/" + cubeInstance3.getName());
            if (this.fs.exists(path2)) {
                FileStatus[] listStatus2 = this.fs.listStatus(path2);
                if (listStatus2 != null) {
                    for (FileStatus fileStatus2 : listStatus2) {
                        if (eligibleStorage(fileStatus2) && !list2.contains(fileStatus2.getPath().getName())) {
                            deleteOp(fileStatus2.getPath(), StorageCleanType.SEGMENT_DIR);
                        }
                    }
                }
            } else {
                logger.warn("Cube path doesn't exist! The path is {}", path2);
            }
        }
        if (this.cleanupJobTmp) {
            logger.info("Start to clean up stale job_tmp ...");
            Iterator it2 = list.iterator();
            while (it2.hasNext()) {
                for (FileStatus fileStatus3 : this.fs.listStatus(new Path(this.config.getJobTmpDir((String) it2.next())))) {
                    if (eligibleStorage(fileStatus3)) {
                        deleteOp(fileStatus3.getPath(), StorageCleanType.JOB_TMP);
                    }
                }
            }
        }
    }

    private void cleanupDeletedCubes(String str, List<String> list) throws Exception {
        FileStatus[] listStatus;
        Path path = new Path(this.config.getHdfsWorkingDirectory(str) + HadoopUtil.PARQUET_STORAGE_ROOT);
        if (!this.fs.exists(path) || (listStatus = this.fs.listStatus(path)) == null) {
            return;
        }
        for (FileStatus fileStatus : listStatus) {
            if (eligibleStorage(fileStatus) && !list.contains(fileStatus.getPath().getName())) {
                deleteOp(fileStatus.getPath(), StorageCleanType.CUBE_DIR);
            }
        }
    }

    private void cleanupTableSnapshot(String str, List<CubeInstance> list) throws IOException {
        if (this.cleanupTableSnapshot) {
            Path path = new Path(this.config.getHdfsWorkingDirectory(str) + TABLE_SNAPSHOT_PREFIX);
            ArrayList arrayList = new ArrayList();
            if (this.fs.exists(path)) {
                for (FileStatus fileStatus : this.fs.listStatus(path)) {
                    for (FileStatus fileStatus2 : this.fs.listStatus(fileStatus.getPath())) {
                        if (eligibleStorage(fileStatus2)) {
                            arrayList.add(fileStatus2.getPath());
                        }
                    }
                }
            }
            Iterator<CubeInstance> it2 = list.iterator();
            while (it2.hasNext()) {
                Iterator<T> it3 = it2.next().getSegments().iterator();
                while (it3.hasNext()) {
                    Iterator<String> it4 = ((CubeSegment) it3.next()).getSnapshotPaths().iterator();
                    while (it4.hasNext()) {
                        arrayList.remove(new Path(this.config.getHdfsWorkingDirectory() + File.separator + it4.next()));
                    }
                }
            }
            Iterator it5 = arrayList.iterator();
            while (it5.hasNext()) {
                deleteOp((Path) it5.next(), StorageCleanType.TABLE_SNAPSHOT);
            }
        }
    }

    private void cleanupGlobalDict(String str, List<CubeInstance> list) throws IOException {
        if (this.cleanupGlobalDict) {
            Path path = new Path(this.config.getHdfsWorkingDirectory(str) + GLOBAL_DICT_PREFIX);
            ArrayList arrayList = new ArrayList();
            if (this.fs.exists(path)) {
                for (FileStatus fileStatus : this.fs.listStatus(path)) {
                    for (FileStatus fileStatus2 : this.fs.listStatus(fileStatus.getPath())) {
                        if (eligibleStorage(fileStatus2)) {
                            arrayList.add(fileStatus2.getPath());
                        }
                    }
                }
            }
            for (CubeInstance cubeInstance : list) {
                if (cubeInstance.getDescriptor().getDictionaries() != null) {
                    Iterator<DictionaryDesc> it2 = cubeInstance.getDescriptor().getDictionaries().iterator();
                    while (it2.hasNext()) {
                        String[] split = it2.next().getColumnRef().getColumnWithTable().split("\\.");
                        Path path2 = split.length == 3 ? new Path(path + File.separator + split[1] + File.separator + split[2]) : new Path(path + File.separator + split[0] + File.separator + split[1]);
                        if (path2 != null) {
                            arrayList.remove(path2);
                        }
                    }
                }
            }
            Iterator it3 = arrayList.iterator();
            while (it3.hasNext()) {
                deleteOp((Path) it3.next(), StorageCleanType.GLOBAL_DICTIONARY);
            }
        }
    }

    private void deleteOp(Path path, StorageCleanType storageCleanType) throws IOException {
        if (!this.delete) {
            logger.info("Dry run, pending delete unreferenced path {}, {}", storageCleanType, path);
        } else {
            logger.info("Deleting unreferenced {}, {}", storageCleanType, path);
            this.fs.delete(path, true);
        }
    }

    private boolean eligibleStorage(FileStatus fileStatus) {
        return fileStatus != null && fileStatus.getModificationTime() < this.storageTimeCut;
    }

    static {
        OptionBuilder.hasArg(false);
        OptionBuilder.isRequired(false);
        OptionBuilder.withDescription("Print supported operations.");
        OPTION_HELP = OptionBuilder.create("help");
        OptionBuilder.withArgName(HotDeploymentTool.ACTION_DELETE);
        OptionBuilder.hasArg();
        OptionBuilder.isRequired(false);
        OptionBuilder.withType(Boolean.class.getName());
        OptionBuilder.withDescription("Boolean, whether or not to do real delete operation. Default value is false, means a dry run.");
        OPTION_DELETE = OptionBuilder.create(HotDeploymentTool.ACTION_DELETE);
        OptionBuilder.withArgName("cleanupTableSnapshot");
        OptionBuilder.hasArg();
        OptionBuilder.isRequired(false);
        OptionBuilder.withType(Boolean.class.getName());
        OptionBuilder.withDescription("Boolean, whether or not to delete unreferenced snapshot files. Default value is true .");
        OPTION_CLEANUP_TABLE_SNAPSHOT = OptionBuilder.create("cleanupTableSnapshot");
        OptionBuilder.withArgName("cleanupGlobalDict");
        OptionBuilder.hasArg();
        OptionBuilder.isRequired(false);
        OptionBuilder.withType(Boolean.class.getName());
        OptionBuilder.withDescription("Boolean, whether or not to delete unreferenced global dict files. Default value is true .");
        OPTION_CLEANUP_GLOBAL_DICT = OptionBuilder.create("cleanupGlobalDict");
        OptionBuilder.withArgName("cleanupJobTmp");
        OptionBuilder.hasArg();
        OptionBuilder.isRequired(false);
        OptionBuilder.withType(Boolean.class.getName());
        OptionBuilder.withDescription("Boolean, whether or not to delete job tmp files. Default value is false .");
        OPTION_CLEANUP_JOB_TMP = OptionBuilder.create("cleanupJobTmp");
        OptionBuilder.withArgName("cleanupThreshold");
        OptionBuilder.hasArg();
        OptionBuilder.isRequired(false);
        OptionBuilder.withType(Integer.class.getName());
        OptionBuilder.withDescription("Integer, used to specific delete unreferenced storage that have not been modified before how many hours (recent files are protected). Default value is 168 hours.");
        OPTION_CLEANUP_THRESHOLD_HOUR = OptionBuilder.create("cleanupThreshold");
        protectedDir = Arrays.asList("cube_statistics", "resources-jdbc", "_sparder_logs");
        pathFilter = path -> {
            return !protectedDir.contains(path.getName());
        };
    }
}
