package org.apache.kylin.engine.spark.utils;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Locale;
import org.apache.hadoop.fs.ContentSummary;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.kylin.common.util.HadoopUtil;
import org.apache.kylin.engine.spark.NSparkCubingEngine;
import org.apache.kylin.engine.spark.job.NSparkCubingUtil;
import org.apache.kylin.engine.spark.metadata.cube.model.LayoutEntity;
import org.apache.kylin.shaded.com.google.common.annotations.VisibleForTesting;
import org.apache.spark.sql.Column;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.ldap.transaction.compensating.support.DefaultTempEntryRenamingStrategy;

/* loaded from: input_file:WEB-INF/lib/kylin-spark-engine-4.0.2.jar:org/apache/kylin/engine/spark/utils/Repartitioner.class */
public class Repartitioner {
    private static String tempDirSuffix = DefaultTempEntryRenamingStrategy.DEFAULT_TEMP_SUFFIX;
    protected static final Logger logger = LoggerFactory.getLogger((Class<?>) Repartitioner.class);
    private int MB = 1048576;
    private int shardSize;
    private int fileLengthThreshold;
    private long totalRowCount;
    private long rowCountThreshold;
    private long cuboid;
    private ContentSummary contentSummary;
    private List<Integer> shardByColumns;

    public Repartitioner(int i, int i2, LayoutEntity layoutEntity, long j, ContentSummary contentSummary, List<Integer> list) {
        this.shardByColumns = new ArrayList();
        this.shardSize = i;
        this.fileLengthThreshold = i2;
        this.totalRowCount = layoutEntity.getRows();
        this.cuboid = layoutEntity.getId();
        this.rowCountThreshold = j;
        this.contentSummary = contentSummary;
        if (list != null) {
            this.shardByColumns = list;
        }
    }

    boolean needRepartitionForFileSize() {
        return ((((double) this.contentSummary.getLength()) * 1.0d) / ((double) this.MB)) / ((double) this.contentSummary.getFileCount()) < ((double) this.fileLengthThreshold) && this.contentSummary.getFileCount() > 1;
    }

    boolean needRepartitionForShardByColumns() {
        return (this.shardByColumns == null || this.shardByColumns.isEmpty()) ? false : true;
    }

    private boolean needRepartitionForRowCount() {
        return ((double) this.contentSummary.getFileCount()) < (((double) this.totalRowCount) / ((double) this.rowCountThreshold)) * 0.75d;
    }

    @VisibleForTesting
    public boolean needRepartition() {
        if (needRepartitionForShardByColumns()) {
            return true;
        }
        boolean z = needRepartitionForFileSize() || needRepartitionForRowCount();
        if (z && getRepartitionNumByStorage() == this.contentSummary.getFileCount()) {
            z = false;
        }
        return z;
    }

    public int getShardSize() {
        return this.shardSize;
    }

    public int getFileLengthThreshold() {
        return this.fileLengthThreshold;
    }

    public ContentSummary getContentSummary() {
        return this.contentSummary;
    }

    private List<Integer> getShardByColumns() {
        return this.shardByColumns;
    }

    private int getFileLengthRepartitionNum() {
        return (int) Math.ceil(((this.contentSummary.getLength() * 1.0d) / this.MB) / this.shardSize);
    }

    private int getRowCountRepartitionNum() {
        return (int) Math.ceil((1.0d * this.totalRowCount) / this.rowCountThreshold);
    }

    public int getRepartitionNumByStorage() {
        int fileLengthRepartitionNum = getFileLengthRepartitionNum();
        int rowCountRepartitionNum = getRowCountRepartitionNum();
        int ceil = (int) Math.ceil((1.0d * (fileLengthRepartitionNum + rowCountRepartitionNum)) / 2.0d);
        logger.info("Before repartition, cuboid[{}] has {} row, {} bytes and {} files. Partition count calculated by file size is {}, calculated by row count is {}, final is {}.", Long.valueOf(this.cuboid), Long.valueOf(this.totalRowCount), Long.valueOf(this.contentSummary.getLength()), Long.valueOf(this.contentSummary.getFileCount()), Integer.valueOf(fileLengthRepartitionNum), Integer.valueOf(rowCountRepartitionNum), Integer.valueOf(ceil));
        return ceil;
    }

    public void setShardSize(int i) {
        this.shardSize = i;
    }

    public void setFileLengthThreshold(int i) {
        this.fileLengthThreshold = i;
    }

    public void setContentSummary(ContentSummary contentSummary) {
        this.contentSummary = contentSummary;
    }

    public void doRepartition(NSparkCubingEngine.NSparkCubingStorage nSparkCubingStorage, String str, int i, Column[] columnArr, SparkSession sparkSession) throws IOException {
        Dataset<Row> sortWithinPartitions;
        String str2 = str + tempDirSuffix;
        Path path = new Path(str2);
        FileSystem workingFileSystem = HadoopUtil.getWorkingFileSystem();
        if (!needRepartition()) {
            Path path2 = new Path(str);
            if (workingFileSystem.exists(path2)) {
                logger.info("Path {} is exists, delete it.", path2);
                workingFileSystem.delete(path2, true);
            }
            if (!workingFileSystem.rename(new Path(str2), path2)) {
                throw new RuntimeException(String.format(Locale.ROOT, "Rename temp path to target path wrong. Temp path: %s, target path: %s.", str2, str));
            }
            logger.info("Rename temp path to target path successfully. Temp path: {}, target path: {}.", str2, str);
            return;
        }
        logger.info("Start repartition and rewrite");
        long currentTimeMillis = System.currentTimeMillis();
        if (needRepartitionForShardByColumns()) {
            logger.info("Cuboid[{}] repartition by column {} to {}", Long.valueOf(this.cuboid), NSparkCubingUtil.getColumns(getShardByColumns())[0], Integer.valueOf(i));
            sortWithinPartitions = nSparkCubingStorage.getFrom(str2, sparkSession).repartition(i, NSparkCubingUtil.getColumns(getShardByColumns())).sortWithinPartitions(columnArr);
        } else {
            logger.info("Cuboid[{}] repartition to {}", Long.valueOf(this.cuboid), Integer.valueOf(i));
            sortWithinPartitions = nSparkCubingStorage.getFrom(str2, sparkSession).repartition(i).sortWithinPartitions(columnArr);
        }
        nSparkCubingStorage.saveTo(str, sortWithinPartitions, sparkSession);
        if (workingFileSystem.delete(path, true)) {
            logger.info("Delete temp cuboid path successful. Temp path: {}.", str2);
        } else {
            logger.error("Delete temp cuboid path wrong, leave garbage. Temp path: {}.", str2);
        }
        logger.info("Repartition and rewrite ends. Cost: {} ms.", Long.valueOf(System.currentTimeMillis() - currentTimeMillis));
    }
}
