package org.apache.kylin.engine.mr.steps;

import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.util.ReflectionUtils;
import org.apache.kylin.common.KylinConfig;
import org.apache.kylin.common.persistence.ResourceStore;
import org.apache.kylin.common.util.ByteArray;
import org.apache.kylin.common.util.Bytes;
import org.apache.kylin.common.util.HadoopUtil;
import org.apache.kylin.common.util.StringUtil;
import org.apache.kylin.cube.CubeDescManager;
import org.apache.kylin.cube.CubeInstance;
import org.apache.kylin.cube.CubeManager;
import org.apache.kylin.cube.CubeSegment;
import org.apache.kylin.cube.model.CubeDesc;
import org.apache.kylin.dict.DictionaryInfo;
import org.apache.kylin.dict.DictionaryManager;
import org.apache.kylin.engine.mr.KylinMapper;
import org.apache.kylin.engine.mr.common.AbstractHadoopJob;
import org.apache.kylin.engine.mr.common.BatchConstants;
import org.apache.kylin.engine.mr.common.CubeStatsWriter;
import org.apache.kylin.engine.mr.common.SerializableConfiguration;
import org.apache.kylin.measure.hllc.HLLCounter;
import org.apache.kylin.metadata.model.TblColRef;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:WEB-INF/lib/kylin-engine-mr-2.6.1.jar:org/apache/kylin/engine/mr/steps/MergeDictionaryMapper.class */
public class MergeDictionaryMapper extends KylinMapper<IntWritable, NullWritable, IntWritable, Text> {
    private static final Logger logger = LoggerFactory.getLogger((Class<?>) MergeDictionaryMapper.class);
    List<CubeSegment> mergingSegments;
    TblColRef[] tblColRefs;
    DictionaryManager dictMgr;

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // org.apache.kylin.engine.mr.KylinMapper
    public void doSetup(Mapper<IntWritable, NullWritable, IntWritable, Text>.Context context) throws IOException, InterruptedException {
        super.doSetup(context);
        SerializableConfiguration serializableConfiguration = new SerializableConfiguration(context.getConfiguration());
        String str = context.getConfiguration().get(BatchConstants.ARG_META_URL);
        String str2 = context.getConfiguration().get(BatchConstants.ARG_CUBE_NAME);
        String str3 = context.getConfiguration().get(MergeDictionaryJob.OPTION_MERGE_SEGMENT_IDS.getOpt());
        KylinConfig loadKylinConfigFromHdfs = AbstractHadoopJob.loadKylinConfigFromHdfs(serializableConfiguration, str);
        CubeInstance cube = CubeManager.getInstance(loadKylinConfigFromHdfs).getCube(str2);
        CubeDesc cubeDesc = CubeDescManager.getInstance(loadKylinConfigFromHdfs).getCubeDesc(cube.getDescName());
        this.mergingSegments = getMergingSegments(cube, StringUtil.splitByComma(str3));
        this.tblColRefs = (TblColRef[]) cubeDesc.getAllColumnsNeedDictionaryBuilt().toArray(new TblColRef[0]);
        this.dictMgr = DictionaryManager.getInstance(loadKylinConfigFromHdfs);
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // org.apache.kylin.engine.mr.KylinMapper
    public void doMap(IntWritable intWritable, NullWritable nullWritable, Mapper<IntWritable, NullWritable, IntWritable, Text>.Context context) throws IOException, InterruptedException {
        DictionaryInfo dictionaryInfo;
        int i = intWritable.get();
        if (i < this.tblColRefs.length) {
            TblColRef tblColRef = this.tblColRefs[i];
            ArrayList newArrayList = Lists.newArrayList();
            for (CubeSegment cubeSegment : this.mergingSegments) {
                if (cubeSegment.getDictResPath(tblColRef) != null && (dictionaryInfo = this.dictMgr.getDictionaryInfo(cubeSegment.getDictResPath(tblColRef))) != null && !newArrayList.contains(dictionaryInfo)) {
                    newArrayList.add(dictionaryInfo);
                }
            }
            DictionaryInfo mergeDictionary = this.dictMgr.mergeDictionary(newArrayList);
            context.write(new IntWritable(-1), new Text((tblColRef.getTableAlias() + ":" + tblColRef.getName()) + "=" + (mergeDictionary == null ? "" : mergeDictionary.getResourcePath())));
            return;
        }
        KylinConfig loadKylinConfigFromHdfs = AbstractHadoopJob.loadKylinConfigFromHdfs(new SerializableConfiguration(context.getConfiguration()), context.getConfiguration().get(BatchConstants.ARG_META_URL));
        String str = context.getConfiguration().get(BatchConstants.ARG_CUBE_NAME);
        String str2 = context.getConfiguration().get(BatchConstants.ARG_SEGMENT_ID);
        String str3 = context.getConfiguration().get(MergeDictionaryJob.OPTION_OUTPUT_PATH_STAT.getOpt());
        CubeInstance cube = CubeManager.getInstance(loadKylinConfigFromHdfs).getCube(str);
        logger.info("Statistics output path: {}", str3);
        CubeSegment segmentById = cube.getSegmentById(str2);
        ResourceStore store = ResourceStore.getStore(loadKylinConfigFromHdfs);
        HashMap newHashMap = Maps.newHashMap();
        Configuration configuration = null;
        int i2 = 0;
        Iterator<CubeSegment> it = this.mergingSegments.iterator();
        while (it.hasNext()) {
            InputStream content = store.getResource(it.next().getStatisticsResourcePath()).content();
            FileOutputStream fileOutputStream = null;
            try {
                File createTempFile = File.createTempFile(str2, ".seq");
                fileOutputStream = new FileOutputStream(createTempFile);
                IOUtils.copy(content, fileOutputStream);
                org.apache.hadoop.io.IOUtils.closeStream(content);
                org.apache.hadoop.io.IOUtils.closeStream(fileOutputStream);
                FileSystem fileSystem = HadoopUtil.getFileSystem("file:///" + createTempFile.getAbsolutePath());
                SequenceFile.Reader reader = null;
                try {
                    try {
                        configuration = HadoopUtil.getCurrentConfiguration();
                        reader = new SequenceFile.Reader(fileSystem, new Path(createTempFile.getAbsolutePath()), configuration);
                        LongWritable longWritable = (LongWritable) ReflectionUtils.newInstance(reader.getKeyClass(), configuration);
                        BytesWritable bytesWritable = (BytesWritable) ReflectionUtils.newInstance(reader.getValueClass(), configuration);
                        while (reader.next(longWritable, bytesWritable)) {
                            if (longWritable.get() == 0) {
                                i2 += Bytes.toInt(bytesWritable.getBytes());
                            } else if (longWritable.get() > 0) {
                                HLLCounter hLLCounter = new HLLCounter(loadKylinConfigFromHdfs.getCubeStatsHLLPrecision());
                                hLLCounter.readRegisters(new ByteArray(bytesWritable.getBytes()).asBuffer());
                                if (newHashMap.get(Long.valueOf(longWritable.get())) != null) {
                                    ((HLLCounter) newHashMap.get(Long.valueOf(longWritable.get()))).merge(hLLCounter);
                                } else {
                                    newHashMap.put(Long.valueOf(longWritable.get()), hLLCounter);
                                }
                            }
                        }
                        org.apache.hadoop.io.IOUtils.closeStream(reader);
                    } catch (Exception e) {
                        e.printStackTrace();
                        throw e;
                    }
                } catch (Throwable th) {
                    org.apache.hadoop.io.IOUtils.closeStream(reader);
                    throw th;
                }
            } catch (Throwable th2) {
                org.apache.hadoop.io.IOUtils.closeStream(content);
                org.apache.hadoop.io.IOUtils.closeStream(fileOutputStream);
                throw th2;
            }
        }
        CubeStatsWriter.writeCuboidStatistics(configuration, new Path(str3), newHashMap, i2 / this.mergingSegments.size());
        Path path = new Path(str3, BatchConstants.CFG_STATISTICS_CUBOID_ESTIMATION_FILENAME);
        FSDataInputStream open = HadoopUtil.getFileSystem(path, configuration).open(path);
        try {
            store.putResource(segmentById.getStatisticsResourcePath(), open, System.currentTimeMillis());
            org.apache.hadoop.io.IOUtils.closeStream(open);
            context.write(new IntWritable(-1), new Text(""));
        } catch (Throwable th3) {
            org.apache.hadoop.io.IOUtils.closeStream(open);
            throw th3;
        }
    }

    private List<CubeSegment> getMergingSegments(CubeInstance cubeInstance, String[] strArr) {
        ArrayList newArrayListWithCapacity = Lists.newArrayListWithCapacity(strArr.length);
        for (String str : strArr) {
            newArrayListWithCapacity.add(cubeInstance.getSegmentById(str));
        }
        return newArrayListWithCapacity;
    }
}
