package org.apache.kylin.engine.mr.steps;

import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import java.io.DataOutputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.apache.calcite.util.StackWriter;
import org.apache.commons.io.output.ByteArrayOutputStream;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.ArrayPrimitiveWritable;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs;
import org.apache.kylin.common.KylinConfig;
import org.apache.kylin.common.util.Bytes;
import org.apache.kylin.common.util.Dictionary;
import org.apache.kylin.cube.CubeInstance;
import org.apache.kylin.cube.CubeManager;
import org.apache.kylin.cube.model.CubeDesc;
import org.apache.kylin.dict.DictionaryGenerator;
import org.apache.kylin.dict.IDictionaryBuilder;
import org.apache.kylin.engine.mr.KylinReducer;
import org.apache.kylin.engine.mr.common.AbstractHadoopJob;
import org.apache.kylin.engine.mr.common.BatchConstants;
import org.apache.kylin.measure.hllc.HLLCounter;
import org.apache.kylin.metadata.model.TblColRef;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.util.AntPathMatcher;

/* loaded from: input_file:org/apache/kylin/engine/mr/steps/FactDistinctColumnsReducer.class */
public class FactDistinctColumnsReducer extends KylinReducer<SelfDefineSortableKey, Text, NullWritable, Text> {
    private static final Logger logger = LoggerFactory.getLogger((Class<?>) FactDistinctColumnsReducer.class);
    private List<Long> baseCuboidRowCountInMappers;
    protected long baseCuboidId;
    protected CubeDesc cubeDesc;
    private int samplingPercentage;
    private KylinConfig cubeConfig;
    private int taskId;
    private FactDistinctColumnsReducerMapping reducerMapping;
    private boolean buildDictInReducer;
    private IDictionaryBuilder builder;
    public static final String DICT_FILE_POSTFIX = ".rldict";
    public static final String DIMENSION_COL_INFO_FILE_POSTFIX = ".dci";
    private MultipleOutputs mos;
    protected Map<Long, HLLCounter> cuboidHLLMap = null;
    private long totalRowsBeforeMerge = 0;
    private TblColRef col = null;
    private boolean isStatistics = false;
    private int rowCount = 0;
    private String maxValue = null;
    private String minValue = null;

    @Override // org.apache.kylin.engine.mr.KylinReducer
    protected void doSetup(Reducer<SelfDefineSortableKey, Text, NullWritable, Text>.Context context) throws IOException {
        super.bindCurrentConfiguration(context.getConfiguration());
        Configuration configuration = context.getConfiguration();
        this.mos = new MultipleOutputs(context);
        KylinConfig loadKylinPropsAndMetadata = AbstractHadoopJob.loadKylinPropsAndMetadata();
        CubeInstance cube = CubeManager.getInstance(loadKylinPropsAndMetadata).getCube(configuration.get(BatchConstants.CFG_CUBE_NAME));
        this.cubeConfig = cube.getConfig();
        this.cubeDesc = cube.getDescriptor();
        this.taskId = context.getTaskAttemptID().getTaskID().getId();
        this.reducerMapping = new FactDistinctColumnsReducerMapping(cube);
        logger.info("reducer no " + this.taskId + ", role play " + this.reducerMapping.getRolePlayOfReducer(this.taskId));
        if (this.reducerMapping.isCuboidRowCounterReducer(this.taskId)) {
            this.isStatistics = true;
            this.baseCuboidId = cube.getCuboidScheduler().getBaseCuboidId();
            this.baseCuboidRowCountInMappers = Lists.newArrayList();
            this.cuboidHLLMap = Maps.newHashMap();
            this.samplingPercentage = Integer.parseInt(context.getConfiguration().get(BatchConstants.CFG_STATISTICS_SAMPLING_PERCENT));
            logger.info("Reducer " + this.taskId + " handling stats");
            return;
        }
        this.col = this.reducerMapping.getColForReducer(this.taskId);
        Preconditions.checkNotNull(this.col);
        this.buildDictInReducer = loadKylinPropsAndMetadata.isBuildDictInReducerEnabled();
        if (this.cubeDesc.getDictionaryBuilderClass(this.col) != null) {
            this.buildDictInReducer = false;
        }
        if (this.reducerMapping.getReducerNumForDimCol(this.col) > 1) {
            this.buildDictInReducer = false;
        }
        if (this.buildDictInReducer) {
            this.builder = DictionaryGenerator.newDictionaryBuilder(this.col.getType());
            this.builder.init(null, 0, null);
        }
        logger.info("Reducer " + this.taskId + " handling column " + this.col + ", buildDictInReducer=" + this.buildDictInReducer);
    }

    @Override // org.apache.kylin.engine.mr.KylinReducer
    public void doReduce(SelfDefineSortableKey selfDefineSortableKey, Iterable<Text> iterable, Reducer<SelfDefineSortableKey, Text, NullWritable, Text>.Context context) throws IOException, InterruptedException {
        Text text = selfDefineSortableKey.getText();
        if (this.isStatistics) {
            long j = Bytes.toLong(text.getBytes(), 1, 8);
            for (Text text2 : iterable) {
                HLLCounter hLLCounter = new HLLCounter(this.cubeConfig.getCubeStatsHLLPrecision());
                hLLCounter.readRegisters(ByteBuffer.wrap(text2.getBytes(), 0, text2.getLength()));
                this.totalRowsBeforeMerge += hLLCounter.getCountEstimate();
                if (j == this.baseCuboidId) {
                    this.baseCuboidRowCountInMappers.add(Long.valueOf(hLLCounter.getCountEstimate()));
                }
                if (this.cuboidHLLMap.get(Long.valueOf(j)) != null) {
                    this.cuboidHLLMap.get(Long.valueOf(j)).merge(hLLCounter);
                } else {
                    this.cuboidHLLMap.put(Long.valueOf(j), hLLCounter);
                }
            }
        } else {
            String bytes = Bytes.toString(text.getBytes(), 1, text.getLength() - 1);
            logAFewRows(bytes);
            if (this.cubeDesc.listDimensionColumnsExcludingDerived(true).contains(this.col) && this.col.getType().needCompare()) {
                if (this.minValue == null || this.col.getType().compare(this.minValue, bytes) > 0) {
                    this.minValue = bytes;
                }
                if (this.maxValue == null || this.col.getType().compare(this.maxValue, bytes) < 0) {
                    this.maxValue = bytes;
                }
            }
            if (this.cubeDesc.getAllColumnsNeedDictionaryBuilt().contains(this.col)) {
                if (this.buildDictInReducer) {
                    this.builder.addValue(bytes);
                } else {
                    this.mos.write("column", NullWritable.get(), new Text(Bytes.copy(text.getBytes(), 1, text.getLength() - 1)), this.col.getIdentity() + AntPathMatcher.DEFAULT_PATH_SEPARATOR);
                }
            }
        }
        this.rowCount++;
    }

    private void logAFewRows(String str) {
        if (this.rowCount < 10) {
            logger.info("Received value: " + str);
        }
    }

    @Override // org.apache.kylin.engine.mr.KylinReducer
    protected void doCleanup(Reducer<SelfDefineSortableKey, Text, NullWritable, Text>.Context context) throws IOException, InterruptedException {
        if (this.isStatistics) {
            ArrayList newArrayList = Lists.newArrayList();
            newArrayList.addAll(this.cuboidHLLMap.keySet());
            Collections.sort(newArrayList);
            logMapperAndCuboidStatistics(newArrayList);
            outputStatistics(newArrayList);
        } else {
            if (this.cubeDesc.listDimensionColumnsExcludingDerived(true).contains(this.col)) {
                outputDimRangeInfo();
            }
            if (this.buildDictInReducer) {
                outputDict(this.col, this.builder.build());
            }
        }
        this.mos.close();
    }

    private void outputDimRangeInfo() throws IOException, InterruptedException {
        if (this.col == null || this.minValue == null) {
            return;
        }
        String str = this.col.getIdentity() + AntPathMatcher.DEFAULT_PATH_SEPARATOR + this.col.getName() + DIMENSION_COL_INFO_FILE_POSTFIX;
        this.mos.write(BatchConstants.CFG_OUTPUT_PARTITION, NullWritable.get(), new Text(this.minValue.getBytes(StandardCharsets.UTF_8)), str);
        this.mos.write(BatchConstants.CFG_OUTPUT_PARTITION, NullWritable.get(), new Text(this.maxValue.getBytes(StandardCharsets.UTF_8)), str);
        logger.info("write dimension range info for col : " + this.col.getName() + "  minValue:" + this.minValue + " maxValue:" + this.maxValue);
    }

    private void outputDict(TblColRef tblColRef, Dictionary<String> dictionary) throws IOException, InterruptedException {
        String str = tblColRef.getIdentity() + AntPathMatcher.DEFAULT_PATH_SEPARATOR + tblColRef.getName() + DICT_FILE_POSTFIX;
        ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
        Throwable th = null;
        try {
            DataOutputStream dataOutputStream = new DataOutputStream(byteArrayOutputStream);
            Throwable th2 = null;
            try {
                try {
                    dataOutputStream.writeUTF(dictionary.getClass().getName());
                    dictionary.write(dataOutputStream);
                    this.mos.write("dict", NullWritable.get(), new ArrayPrimitiveWritable(byteArrayOutputStream.toByteArray()), str);
                    if (dataOutputStream != null) {
                        if (0 != 0) {
                            try {
                                dataOutputStream.close();
                            } catch (Throwable th3) {
                                th2.addSuppressed(th3);
                            }
                        } else {
                            dataOutputStream.close();
                        }
                    }
                    if (byteArrayOutputStream != null) {
                        if (0 == 0) {
                            byteArrayOutputStream.close();
                            return;
                        }
                        try {
                            byteArrayOutputStream.close();
                        } catch (Throwable th4) {
                            th.addSuppressed(th4);
                        }
                    }
                } catch (Throwable th5) {
                    th2 = th5;
                    throw th5;
                }
            } catch (Throwable th6) {
                if (dataOutputStream != null) {
                    if (th2 != null) {
                        try {
                            dataOutputStream.close();
                        } catch (Throwable th7) {
                            th2.addSuppressed(th7);
                        }
                    } else {
                        dataOutputStream.close();
                    }
                }
                throw th6;
            }
        } catch (Throwable th8) {
            if (byteArrayOutputStream != null) {
                if (0 != 0) {
                    try {
                        byteArrayOutputStream.close();
                    } catch (Throwable th9) {
                        th.addSuppressed(th9);
                    }
                } else {
                    byteArrayOutputStream.close();
                }
            }
            throw th8;
        }
    }

    private void outputStatistics(List<Long> list) throws IOException, InterruptedException {
        ByteBuffer allocate = ByteBuffer.allocate(1048576);
        long j = 0;
        Iterator<HLLCounter> it = this.cuboidHLLMap.values().iterator();
        while (it.hasNext()) {
            j += it.next().getCountEstimate();
        }
        this.mos.write(BatchConstants.CFG_OUTPUT_STATISTICS, new LongWritable(-1L), new BytesWritable(Bytes.toBytes(j == 0 ? 0.0d : this.totalRowsBeforeMerge / j)), "statistics/statistics");
        this.mos.write(BatchConstants.CFG_OUTPUT_STATISTICS, new LongWritable(-2L), new BytesWritable(Bytes.toBytes(this.baseCuboidRowCountInMappers.size())), "statistics/statistics");
        this.mos.write(BatchConstants.CFG_OUTPUT_STATISTICS, new LongWritable(0L), new BytesWritable(Bytes.toBytes(this.samplingPercentage)), "statistics/statistics");
        Iterator<Long> it2 = list.iterator();
        while (it2.hasNext()) {
            long longValue = it2.next().longValue();
            allocate.clear();
            this.cuboidHLLMap.get(Long.valueOf(longValue)).writeRegisters(allocate);
            allocate.flip();
            this.mos.write(BatchConstants.CFG_OUTPUT_STATISTICS, new LongWritable(longValue), new BytesWritable(allocate.array(), allocate.limit()), "statistics/statistics");
        }
    }

    private void logMapperAndCuboidStatistics(List<Long> list) throws IOException {
        logger.info("Cuboid number for task: " + this.taskId + StackWriter.INDENT_TAB + list.size());
        logger.info("Samping percentage: \t" + this.samplingPercentage);
        logger.info("The following statistics are collected based on sampling data.");
        logger.info("Number of Mappers: " + this.baseCuboidRowCountInMappers.size());
        for (int i = 0; i < this.baseCuboidRowCountInMappers.size(); i++) {
            if (this.baseCuboidRowCountInMappers.get(i).longValue() > 0) {
                logger.info("Base Cuboid in Mapper " + i + " row count: \t " + this.baseCuboidRowCountInMappers.get(i));
            }
        }
        long j = 0;
        Iterator<Long> it = list.iterator();
        while (it.hasNext()) {
            long longValue = it.next().longValue();
            j += this.cuboidHLLMap.get(Long.valueOf(longValue)).getCountEstimate();
            logger.info("Cuboid " + longValue + " row count is: \t " + this.cuboidHLLMap.get(Long.valueOf(longValue)).getCountEstimate());
        }
        logger.info("Sum of row counts (before merge) is: \t " + this.totalRowsBeforeMerge);
        logger.info("After merge, the row count: \t " + j);
    }
}
