/*
 * Decompiled with CFR 0.152.
 */
package org.apache.hadoop.hive.ql.io.parquet.vector;

import java.io.IOException;
import java.time.ZoneId;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.TreeMap;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.common.io.DataCache;
import org.apache.hadoop.hive.common.io.FileMetadataCache;
import org.apache.hadoop.hive.common.io.encoded.MemoryBufferOrBuffers;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.llap.LlapCacheAwareFs;
import org.apache.hadoop.hive.llap.LlapUtil;
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx;
import org.apache.hadoop.hive.ql.io.HdfsUtils;
import org.apache.hadoop.hive.ql.io.parquet.ParquetRecordReaderBase;
import org.apache.hadoop.hive.ql.io.parquet.ProjectionPusher;
import org.apache.hadoop.hive.ql.io.parquet.read.DataWritableReadSupport;
import org.apache.hadoop.hive.ql.io.parquet.vector.ParquetFooterInputFromCache;
import org.apache.hadoop.hive.ql.io.parquet.vector.VectorizedColumnReader;
import org.apache.hadoop.hive.ql.io.parquet.vector.VectorizedDummyColumnReader;
import org.apache.hadoop.hive.ql.io.parquet.vector.VectorizedListColumnReader;
import org.apache.hadoop.hive.ql.io.parquet.vector.VectorizedMapColumnReader;
import org.apache.hadoop.hive.ql.io.parquet.vector.VectorizedPrimitiveColumnReader;
import org.apache.hadoop.hive.ql.io.parquet.vector.VectorizedStructColumnReader;
import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
import org.apache.hadoop.hive.serde2.SerDeStats;
import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapred.FileSplit;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hive.shaded.parquet.ParquetRuntimeException;
import org.apache.hive.shaded.parquet.bytes.BytesUtils;
import org.apache.hive.shaded.parquet.column.ColumnDescriptor;
import org.apache.hive.shaded.parquet.column.page.PageReadStore;
import org.apache.hive.shaded.parquet.filter2.compat.FilterCompat;
import org.apache.hive.shaded.parquet.filter2.compat.RowGroupFilter;
import org.apache.hive.shaded.parquet.format.converter.ParquetMetadataConverter;
import org.apache.hive.shaded.parquet.hadoop.ParquetFileReader;
import org.apache.hive.shaded.parquet.hadoop.ParquetFileWriter;
import org.apache.hive.shaded.parquet.hadoop.ParquetInputFormat;
import org.apache.hive.shaded.parquet.hadoop.ParquetInputSplit;
import org.apache.hive.shaded.parquet.hadoop.metadata.BlockMetaData;
import org.apache.hive.shaded.parquet.hadoop.metadata.ColumnChunkMetaData;
import org.apache.hive.shaded.parquet.hadoop.metadata.ColumnPath;
import org.apache.hive.shaded.parquet.hadoop.metadata.ParquetMetadata;
import org.apache.hive.shaded.parquet.hadoop.util.HadoopStreams;
import org.apache.hive.shaded.parquet.io.InputFile;
import org.apache.hive.shaded.parquet.io.SeekableInputStream;
import org.apache.hive.shaded.parquet.schema.GroupType;
import org.apache.hive.shaded.parquet.schema.InvalidSchemaException;
import org.apache.hive.shaded.parquet.schema.MessageType;
import org.apache.hive.shaded.parquet.schema.PrimitiveType;
import org.apache.hive.shaded.parquet.schema.Type;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class VectorizedParquetRecordReader
extends ParquetRecordReaderBase
implements RecordReader<NullWritable, VectorizedRowBatch> {
    public static final Logger LOG = LoggerFactory.getLogger(VectorizedParquetRecordReader.class);
    private List<Integer> colsToInclude;
    protected MessageType fileSchema;
    protected MessageType requestedSchema;
    private List<String> columnNamesList;
    private List<TypeInfo> columnTypesList;
    private VectorizedRowBatchCtx rbCtx;
    private Object[] partitionValues;
    private Path cacheFsPath;
    private static final int MAP_DEFINITION_LEVEL_MAX = 3;
    private VectorizedColumnReader[] columnReaders;
    private long rowsReturned = 0L;
    private long totalCountLoadedSoFar = 0L;
    protected long totalRowCount = 0L;
    private ZoneId writerTimezone;
    private FileMetadataCache metadataCache;
    private DataCache cache;
    private Configuration cacheConf;

    public VectorizedParquetRecordReader(InputSplit oldInputSplit, JobConf conf) {
        this(oldInputSplit, conf, null, null, null);
    }

    public VectorizedParquetRecordReader(InputSplit oldInputSplit, JobConf conf, FileMetadataCache metadataCache, DataCache dataCache, Configuration cacheConf) {
        try {
            this.metadataCache = metadataCache;
            this.cache = dataCache;
            this.cacheConf = cacheConf;
            this.serDeStats = new SerDeStats();
            this.projectionPusher = new ProjectionPusher();
            this.colsToInclude = ColumnProjectionUtils.getReadColumnIDs((Configuration)conf);
            this.jobConf = conf;
            this.rbCtx = Utilities.getVectorizedRowBatchCtx((Configuration)this.jobConf);
            ParquetInputSplit inputSplit = this.getSplit(oldInputSplit, conf);
            if (inputSplit != null) {
                this.initialize((org.apache.hadoop.mapreduce.InputSplit)inputSplit, conf);
            }
            this.initPartitionValues((FileSplit)oldInputSplit, conf);
        }
        catch (Throwable e) {
            LOG.error("Failed to create the vectorized reader due to exception " + e);
            throw new RuntimeException(e);
        }
    }

    private void initPartitionValues(FileSplit fileSplit, JobConf conf) throws IOException {
        int partitionColumnCount = this.rbCtx.getPartitionColumnCount();
        if (partitionColumnCount > 0) {
            this.partitionValues = new Object[partitionColumnCount];
            VectorizedRowBatchCtx.getPartitionValues(this.rbCtx, (Configuration)conf, fileSplit, this.partitionValues);
        } else {
            this.partitionValues = null;
        }
    }

    public void initialize(org.apache.hadoop.mapreduce.InputSplit oldSplit, JobConf configuration) throws IOException, InterruptedException {
        List<BlockMetaData> blocks;
        Object filter;
        ParquetMetadata footer;
        if (oldSplit == null) {
            return;
        }
        ParquetInputSplit split = (ParquetInputSplit)oldSplit;
        boolean indexAccess = configuration.getBoolean("parquet.column.index.access", false);
        this.file = split.getPath();
        long[] rowGroupOffsets = split.getRowGroupOffsets();
        String columnNames = configuration.get("columns");
        this.columnNamesList = DataWritableReadSupport.getColumnNames(columnNames);
        String columnTypes = configuration.get("columns.types");
        this.columnTypesList = DataWritableReadSupport.getColumnTypes(columnTypes);
        Object cacheKey = null;
        String cacheTag = null;
        if (this.metadataCache != null) {
            cacheKey = HdfsUtils.getFileId(this.file.getFileSystem((Configuration)configuration), this.file, HiveConf.getBoolVar(this.cacheConf, HiveConf.ConfVars.LLAP_CACHE_ALLOW_SYNTHETIC_FILEID), HiveConf.getBoolVar(this.cacheConf, HiveConf.ConfVars.LLAP_CACHE_DEFAULT_FS_FILE_ID));
        }
        if (cacheKey != null) {
            if (HiveConf.getBoolVar(this.cacheConf, HiveConf.ConfVars.LLAP_TRACK_CACHE_USAGE)) {
                cacheTag = LlapUtil.getDbAndTableNameForMetrics(this.file, true);
            }
            FileSystem fs = this.file.getFileSystem((Configuration)configuration);
            if (cacheKey instanceof Long && HiveConf.getBoolVar(this.cacheConf, HiveConf.ConfVars.LLAP_IO_USE_FILEID_PATH)) {
                this.file = HdfsUtils.getFileIdPath(fs, this.file, (Long)cacheKey);
            }
        }
        if (rowGroupOffsets == null) {
            footer = this.readSplitFooter(configuration, this.file, cacheKey, ParquetMetadataConverter.range(split.getStart(), split.getEnd()), cacheTag);
            MessageType fileSchema = footer.getFileMetaData().getSchema();
            filter = ParquetInputFormat.getFilter((Configuration)configuration);
            blocks = RowGroupFilter.filterRowGroups((FilterCompat.Filter)filter, footer.getBlocks(), fileSchema);
        } else {
            footer = this.readSplitFooter(configuration, this.file, cacheKey, ParquetMetadataConverter.NO_FILTER, cacheTag);
            HashSet<Long> offsets = new HashSet<Long>();
            for (long offset : rowGroupOffsets) {
                offsets.add(offset);
            }
            blocks = new ArrayList<BlockMetaData>();
            filter = footer.getBlocks().iterator();
            while (filter.hasNext()) {
                BlockMetaData block = (BlockMetaData)filter.next();
                if (!offsets.contains(block.getStartingPos())) continue;
                blocks.add(block);
            }
            if (blocks.size() != rowGroupOffsets.length) {
                long[] foundRowGroupOffsets = new long[footer.getBlocks().size()];
                for (int i = 0; i < foundRowGroupOffsets.length; ++i) {
                    foundRowGroupOffsets[i] = footer.getBlocks().get(i).getStartingPos();
                }
                throw new IllegalStateException("All the offsets listed in the split should be found in the file. expected: " + Arrays.toString(rowGroupOffsets) + " found: " + blocks + " out of: " + Arrays.toString(foundRowGroupOffsets) + " in range " + split.getStart() + ", " + split.getEnd());
            }
        }
        for (BlockMetaData block : blocks) {
            this.totalRowCount += block.getRowCount();
        }
        this.fileSchema = footer.getFileMetaData().getSchema();
        this.writerTimezone = DataWritableReadSupport.getWriterTimeZoneId(footer.getFileMetaData().getKeyValueMetaData());
        this.colsToInclude = ColumnProjectionUtils.getReadColumnIDs((Configuration)configuration);
        this.requestedSchema = DataWritableReadSupport.getRequestedSchema(indexAccess, this.columnNamesList, this.columnTypesList, this.fileSchema, (Configuration)configuration);
        Path path = this.wrapPathForCache(this.file, cacheKey, configuration, blocks, cacheTag);
        this.reader = new ParquetFileReader((Configuration)configuration, footer.getFileMetaData(), path, blocks, this.requestedSchema.getColumns());
    }

    private Path wrapPathForCache(Path path, Object fileKey, JobConf configuration, List<BlockMetaData> blocks, String tag) throws IOException {
        if (fileKey == null || this.cache == null) {
            return path;
        }
        HashSet<ColumnPath> includedCols = new HashSet<ColumnPath>();
        for (ColumnDescriptor col : this.requestedSchema.getColumns()) {
            includedCols.add(ColumnPath.get(col.getPath()));
        }
        TreeMap<Long, Long> chunkIndex = new TreeMap<Long, Long>();
        for (BlockMetaData block : blocks) {
            for (ColumnChunkMetaData mc : block.getColumns()) {
                if (!includedCols.contains(mc.getPath())) continue;
                chunkIndex.put(mc.getStartingPos(), mc.getStartingPos() + mc.getTotalSize());
            }
        }
        configuration.set("fs.llapcache.impl", LlapCacheAwareFs.class.getCanonicalName());
        this.cacheFsPath = path = LlapCacheAwareFs.registerFile(this.cache, path, fileKey, chunkIndex, (Configuration)configuration, tag);
        return path;
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private ParquetMetadata readSplitFooter(JobConf configuration, Path file, Object cacheKey, ParquetMetadataConverter.MetadataFilter filter, String tag) throws IOException {
        MemoryBufferOrBuffers footerData;
        MemoryBufferOrBuffers memoryBufferOrBuffers = footerData = cacheKey == null || this.metadataCache == null ? null : this.metadataCache.getFileMetadata(cacheKey);
        if (footerData != null) {
            if (LOG.isInfoEnabled()) {
                LOG.info("Found the footer in cache for " + cacheKey);
            }
            try {
                ParquetMetadata parquetMetadata = ParquetFileReader.readFooter(new ParquetFooterInputFromCache(footerData), filter);
                return parquetMetadata;
            }
            finally {
                this.metadataCache.decRefBuffer(footerData);
            }
        }
        FileSystem fs = file.getFileSystem((Configuration)configuration);
        FileStatus stat = fs.getFileStatus(file);
        if (cacheKey == null || this.metadataCache == null) {
            return this.readFooterFromFile(file, fs, stat, filter);
        }
        Throwable throwable = null;
        try (SeekableInputStream stream = HadoopStreams.wrap(fs.open(file));){
            long footerLengthIndex = stat.getLen() - 4L - (long)ParquetFileWriter.MAGIC.length;
            stream.seek(footerLengthIndex);
            int footerLength = BytesUtils.readIntLittleEndian(stream);
            stream.seek(footerLengthIndex - (long)footerLength);
            if (LOG.isInfoEnabled()) {
                LOG.info("Caching the footer of length " + footerLength + " for " + cacheKey);
            }
            footerData = this.metadataCache.putFileMetadata(cacheKey, footerLength, stream, tag);
            try {
                ParquetMetadata parquetMetadata = ParquetFileReader.readFooter(new ParquetFooterInputFromCache(footerData), filter);
                this.metadataCache.decRefBuffer(footerData);
                return parquetMetadata;
            }
            catch (Throwable throwable2) {
                try {
                    this.metadataCache.decRefBuffer(footerData);
                    throw throwable2;
                }
                catch (Throwable throwable3) {
                    throwable = throwable3;
                    throw throwable3;
                }
            }
        }
    }

    private ParquetMetadata readFooterFromFile(final Path file, final FileSystem fs, final FileStatus stat, ParquetMetadataConverter.MetadataFilter filter) throws IOException {
        InputFile inputFile = new InputFile(){

            @Override
            public SeekableInputStream newStream() throws IOException {
                return HadoopStreams.wrap(fs.open(file));
            }

            @Override
            public long getLength() throws IOException {
                return stat.getLen();
            }
        };
        return ParquetFileReader.readFooter(inputFile, filter);
    }

    public boolean next(NullWritable nullWritable, VectorizedRowBatch vectorizedRowBatch) throws IOException {
        return this.nextBatch(vectorizedRowBatch);
    }

    public NullWritable createKey() {
        return NullWritable.get();
    }

    public VectorizedRowBatch createValue() {
        return this.rbCtx.createVectorizedRowBatch();
    }

    public long getPos() throws IOException {
        return 0L;
    }

    public void close() throws IOException {
        if (this.cacheFsPath != null) {
            LlapCacheAwareFs.unregisterFile(this.cacheFsPath);
        }
        if (this.reader != null) {
            this.reader.close();
        }
    }

    public float getProgress() throws IOException {
        return 0.0f;
    }

    private boolean nextBatch(VectorizedRowBatch columnarBatch) throws IOException {
        columnarBatch.reset();
        if (this.rowsReturned >= this.totalRowCount) {
            return false;
        }
        if (this.partitionValues != null) {
            this.rbCtx.addPartitionColsToBatch(columnarBatch, this.partitionValues);
        }
        this.checkEndOfRowGroup();
        int num = (int)Math.min(1024L, this.totalCountLoadedSoFar - this.rowsReturned);
        if (this.colsToInclude.size() > 0) {
            for (int i = 0; i < this.columnReaders.length; ++i) {
                if (this.columnReaders[i] == null) continue;
                columnarBatch.cols[this.colsToInclude.get((int)i).intValue()].isRepeating = true;
                this.columnReaders[i].readBatch(num, columnarBatch.cols[this.colsToInclude.get(i)], this.columnTypesList.get(this.colsToInclude.get(i)));
            }
        }
        this.rowsReturned += (long)num;
        columnarBatch.size = num;
        return true;
    }

    private void checkEndOfRowGroup() throws IOException {
        if (this.rowsReturned != this.totalCountLoadedSoFar) {
            return;
        }
        PageReadStore pages = this.reader.readNextRowGroup();
        if (pages == null) {
            throw new IOException("expecting more rows but reached last block. Read " + this.rowsReturned + " out of " + this.totalRowCount);
        }
        List<ColumnDescriptor> columns = this.requestedSchema.getColumns();
        List<Type> types = this.requestedSchema.getFields();
        this.columnReaders = new VectorizedColumnReader[columns.size()];
        if (!ColumnProjectionUtils.isReadAllColumns((Configuration)this.jobConf)) {
            if (!this.colsToInclude.isEmpty()) {
                for (int i = 0; i < types.size(); ++i) {
                    this.columnReaders[i] = this.buildVectorizedParquetReader(this.columnTypesList.get(this.colsToInclude.get(i)), types.get(i), pages, this.requestedSchema.getColumns(), this.skipTimestampConversion, this.writerTimezone, 0);
                }
            }
        } else {
            for (int i = 0; i < types.size(); ++i) {
                this.columnReaders[i] = this.buildVectorizedParquetReader(this.columnTypesList.get(i), types.get(i), pages, this.requestedSchema.getColumns(), this.skipTimestampConversion, this.writerTimezone, 0);
            }
        }
        this.totalCountLoadedSoFar += pages.getRowCount();
    }

    private List<ColumnDescriptor> getAllColumnDescriptorByType(int depth, Type type, List<ColumnDescriptor> columns) throws ParquetRuntimeException {
        ArrayList<ColumnDescriptor> res = new ArrayList<ColumnDescriptor>();
        for (ColumnDescriptor descriptor : columns) {
            if (depth >= descriptor.getPath().length) {
                throw new InvalidSchemaException("Corrupted Parquet schema");
            }
            if (!type.getName().equals(descriptor.getPath()[depth])) continue;
            res.add(descriptor);
        }
        return res;
    }

    private PrimitiveType getElementType(Type type) {
        if (type.isPrimitive()) {
            return type.asPrimitiveType();
        }
        if (type.asGroupType().getFields().size() > 1) {
            throw new RuntimeException("Current Parquet Vectorization reader doesn't support nested type");
        }
        return type.asGroupType().getFields().get(0).asGroupType().getFields().get(0).asPrimitiveType();
    }

    private VectorizedColumnReader buildVectorizedParquetReader(TypeInfo typeInfo, Type type, PageReadStore pages, List<ColumnDescriptor> columnDescriptors, boolean skipTimestampConversion, ZoneId writerTimezone, int depth) throws IOException {
        List<ColumnDescriptor> descriptors = this.getAllColumnDescriptorByType(depth, type, columnDescriptors);
        switch (typeInfo.getCategory()) {
            case PRIMITIVE: {
                if (columnDescriptors == null || columnDescriptors.isEmpty()) {
                    throw new RuntimeException("Failed to find related Parquet column descriptor with type " + type);
                }
                if (this.fileSchema.getColumns().contains(descriptors.get(0))) {
                    return new VectorizedPrimitiveColumnReader(descriptors.get(0), pages.getPageReader(descriptors.get(0)), skipTimestampConversion, writerTimezone, type, typeInfo);
                }
                return new VectorizedDummyColumnReader();
            }
            case STRUCT: {
                StructTypeInfo structTypeInfo = (StructTypeInfo)typeInfo;
                ArrayList<VectorizedColumnReader> fieldReaders = new ArrayList<VectorizedColumnReader>();
                ArrayList<TypeInfo> fieldTypes = structTypeInfo.getAllStructFieldTypeInfos();
                List<Type> types = type.asGroupType().getFields();
                for (int i = 0; i < fieldTypes.size(); ++i) {
                    VectorizedColumnReader r = this.buildVectorizedParquetReader((TypeInfo)fieldTypes.get(i), types.get(i), pages, descriptors, skipTimestampConversion, writerTimezone, depth + 1);
                    if (r == null) {
                        throw new RuntimeException("Fail to build Parquet vectorized reader based on Hive type " + ((TypeInfo)fieldTypes.get(i)).getTypeName() + " and Parquet type" + types.get(i).toString());
                    }
                    fieldReaders.add(r);
                }
                return new VectorizedStructColumnReader(fieldReaders);
            }
            case LIST: {
                this.checkListColumnSupport(((ListTypeInfo)typeInfo).getListElementTypeInfo());
                if (columnDescriptors == null || columnDescriptors.isEmpty()) {
                    throw new RuntimeException("Failed to find related Parquet column descriptor with type " + type);
                }
                return new VectorizedListColumnReader(descriptors.get(0), pages.getPageReader(descriptors.get(0)), skipTimestampConversion, writerTimezone, this.getElementType(type), typeInfo);
            }
            case MAP: {
                if (columnDescriptors == null || columnDescriptors.isEmpty()) {
                    throw new RuntimeException("Failed to find related Parquet column descriptor with type " + type);
                }
                int nestGroup = 0;
                GroupType groupType = type.asGroupType();
                while (groupType.getFieldCount() < 2) {
                    if (nestGroup > 3) {
                        throw new RuntimeException("More than 3 level is found in Map definition, Failed to get the field types for Map with type " + type);
                    }
                    groupType = groupType.getFields().get(0).asGroupType();
                    ++nestGroup;
                }
                List<Type> kvTypes = groupType.getFields();
                VectorizedListColumnReader keyListColumnReader = new VectorizedListColumnReader(descriptors.get(0), pages.getPageReader(descriptors.get(0)), skipTimestampConversion, writerTimezone, kvTypes.get(0), typeInfo);
                VectorizedListColumnReader valueListColumnReader = new VectorizedListColumnReader(descriptors.get(1), pages.getPageReader(descriptors.get(1)), skipTimestampConversion, writerTimezone, kvTypes.get(1), typeInfo);
                return new VectorizedMapColumnReader(keyListColumnReader, valueListColumnReader);
            }
        }
        throw new RuntimeException("Unsupported category " + typeInfo.getCategory().name());
    }

    private void checkListColumnSupport(TypeInfo elementType) {
        if (elementType instanceof PrimitiveTypeInfo) {
            switch (((PrimitiveTypeInfo)elementType).getPrimitiveCategory()) {
                case INTERVAL_DAY_TIME: 
                case TIMESTAMP: {
                    throw new RuntimeException("Unsupported primitive type used in list:: " + elementType);
                }
            }
        } else {
            throw new RuntimeException("Unsupported type used in list:" + elementType);
        }
    }
}

