package water.parser.parquet;

import java.io.IOException;
import java.util.Arrays;
import java.util.Collections;
import java.util.Iterator;
import org.apache.parquet.format.converter.ParquetMetadataConverter;
import org.apache.parquet.hadoop.ParquetFileWriter;
import org.apache.parquet.hadoop.metadata.BlockMetaData;
import org.apache.parquet.hadoop.metadata.ParquetMetadata;
import org.apache.parquet.schema.MessageType;
import org.apache.parquet.schema.OriginalType;
import org.apache.parquet.schema.PrimitiveType;
import org.apache.parquet.schema.Type;
import water.Job;
import water.Key;
import water.exceptions.H2OUnsupportedDataFileException;
import water.fvec.ByteVec;
import water.fvec.Chunk;
import water.fvec.Vec;
import water.parser.FVecParseReader;
import water.parser.ParseReader;
import water.parser.ParseSetup;
import water.parser.ParseWriter;
import water.parser.Parser;
import water.parser.PreviewParseWriter;
import water.parser.StreamParseWriter;
import water.util.IcedHashMapGeneric;
import water.util.Log;

/* loaded from: input_file:water/parser/parquet/ParquetParser.class */
public class ParquetParser extends Parser {
    private static final int MAX_PREVIEW_RECORDS = 1000;
    private final byte[] _metadata;
    static final /* synthetic */ boolean $assertionsDisabled;

    /* JADX INFO: Access modifiers changed from: package-private */
    /* renamed from: water.parser.parquet.ParquetParser$1, reason: invalid class name */
    /* loaded from: input_file:water/parser/parquet/ParquetParser$1.class */
    public static /* synthetic */ class AnonymousClass1 {
        static final /* synthetic */ int[] $SwitchMap$org$apache$parquet$schema$PrimitiveType$PrimitiveTypeName = new int[PrimitiveType.PrimitiveTypeName.values().length];

        static {
            try {
                $SwitchMap$org$apache$parquet$schema$PrimitiveType$PrimitiveTypeName[PrimitiveType.PrimitiveTypeName.BOOLEAN.ordinal()] = 1;
            } catch (NoSuchFieldError e) {
            }
            try {
                $SwitchMap$org$apache$parquet$schema$PrimitiveType$PrimitiveTypeName[PrimitiveType.PrimitiveTypeName.INT32.ordinal()] = 2;
            } catch (NoSuchFieldError e2) {
            }
            try {
                $SwitchMap$org$apache$parquet$schema$PrimitiveType$PrimitiveTypeName[PrimitiveType.PrimitiveTypeName.FLOAT.ordinal()] = 3;
            } catch (NoSuchFieldError e3) {
            }
            try {
                $SwitchMap$org$apache$parquet$schema$PrimitiveType$PrimitiveTypeName[PrimitiveType.PrimitiveTypeName.DOUBLE.ordinal()] = 4;
            } catch (NoSuchFieldError e4) {
            }
            try {
                $SwitchMap$org$apache$parquet$schema$PrimitiveType$PrimitiveTypeName[PrimitiveType.PrimitiveTypeName.INT64.ordinal()] = 5;
            } catch (NoSuchFieldError e5) {
            }
            try {
                $SwitchMap$org$apache$parquet$schema$PrimitiveType$PrimitiveTypeName[PrimitiveType.PrimitiveTypeName.INT96.ordinal()] = 6;
            } catch (NoSuchFieldError e6) {
            }
        }
    }

    /* loaded from: input_file:water/parser/parquet/ParquetParser$ParquetParseSetup.class */
    public static class ParquetParseSetup extends ParseSetup {
        transient byte[] parquetMetadata;

        public ParquetParseSetup() {
        }

        /* JADX WARN: Type inference failed for: r8v2, types: [java.lang.String[], java.lang.String[][]] */
        public ParquetParseSetup(String[] strArr, byte[] bArr, String[][] strArr2, byte[] bArr2) {
            super(ParquetParserProvider.PARQUET_INFO, (byte) 124, true, 1, strArr.length, strArr, bArr, (String[][]) new String[strArr.length], (String[][]) null, strArr2);
            this.parquetMetadata = bArr2;
            if (!getForceColTypes() || bArr2 == null) {
                return;
            }
            this.parquetColumnTypes = ParquetParser.extractColumnTypes(VecParquetReader.readFooter(bArr2));
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:water/parser/parquet/ParquetParser$ParquetPreviewParseWriter.class */
    public static class ParquetPreviewParseWriter extends PreviewParseWriter {
        private String[] _colNames;
        private byte[] _roughTypes;

        public ParquetPreviewParseWriter() {
        }

        ParquetPreviewParseWriter(ParquetParseSetup parquetParseSetup) {
            super(parquetParseSetup.getColumnNames().length);
            this._colNames = parquetParseSetup.getColumnNames();
            this._roughTypes = parquetParseSetup.getColumnTypes();
            setColumnNames(this._colNames);
            this._nlines = 0;
            this._data[0] = new String[this._colNames.length];
        }

        public byte[] guessTypes() {
            return ParquetParser.correctTypeConversions(this._roughTypes, super.guessTypes());
        }

        ParquetParseSetup toParseSetup(byte[] bArr) {
            return new ParquetParseSetup(this._colNames, guessTypes(), this._data, bArr);
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public ParquetParser(ParseSetup parseSetup, Key<Job> key) {
        super(parseSetup, key);
        this._metadata = ((ParquetParseSetup) parseSetup).parquetMetadata;
    }

    protected final StreamParseWriter sequentialParse(Vec vec, StreamParseWriter streamParseWriter) {
        ParquetMetadata readFooter = VecParquetReader.readFooter(this._metadata);
        int nChunks = vec.nChunks();
        long j = totalRecords(readFooter);
        long j2 = (j / nChunks) + (j % ((long) nChunks) > 0 ? 1 : 0);
        if (j2 != ((int) j2)) {
            throw new IllegalStateException("Unsupported Parquet file. Too many records (#" + j + ", nChunks=" + nChunks + ").");
        }
        WriterDelegate writerDelegate = new WriterDelegate(streamParseWriter, this._setup.getColumnTypes().length);
        VecParquetReader vecParquetReader = new VecParquetReader(vec, readFooter, writerDelegate, this._setup.getColumnTypes(), this._keepColumns);
        StreamParseWriter streamParseWriter2 = streamParseWriter;
        long j3 = 0;
        for (int i = 0; i < nChunks; i++) {
            do {
                try {
                    Long read = vecParquetReader.read();
                    if (read != null) {
                        j3++;
                    }
                    if (read == null) {
                        break;
                    }
                } catch (IOException e) {
                    throw new RuntimeException("Failed to parse records", e);
                }
            } while (writerDelegate.lineNum() < j2);
            if (this._jobKey != null) {
                Job.update(vec.length() / nChunks, this._jobKey);
            }
            streamParseWriter2.close();
            streamParseWriter.reduce(streamParseWriter2);
            streamParseWriter2 = streamParseWriter2.nextChunk();
            writerDelegate.setWriter(streamParseWriter2);
        }
        if ($assertionsDisabled || j3 == j) {
            return streamParseWriter;
        }
        throw new AssertionError();
    }

    private long totalRecords(ParquetMetadata parquetMetadata) {
        long j = 0;
        Iterator it = parquetMetadata.getBlocks().iterator();
        while (it.hasNext()) {
            j += ((BlockMetaData) it.next()).getRowCount();
        }
        return j;
    }

    protected final ParseWriter parseChunk(int i, ParseReader parseReader, ParseWriter parseWriter) {
        if (!(parseReader instanceof FVecParseReader)) {
            throw new IllegalStateException("We only accept parser readers backed by a Vec (no streaming support!).");
        }
        Chunk chunk = ((FVecParseReader) parseReader).getChunk();
        Vec vec = chunk.vec();
        ParquetMetadata readFooter = VecParquetReader.readFooter(this._metadata, ParquetMetadataConverter.range(chunk.start(), chunk.start() + chunk.len()));
        if (readFooter.getBlocks().isEmpty()) {
            Log.trace(new Object[]{"Chunk #", Integer.valueOf(i), " doesn't contain any Parquet block center."});
            return parseWriter;
        }
        Log.info(new Object[]{"Processing ", Integer.valueOf(readFooter.getBlocks().size()), " blocks of chunk #", Integer.valueOf(i)});
        do {
            try {
            } catch (IOException e) {
                throw new RuntimeException("Failed to parse records", e);
            }
        } while (new VecParquetReader(vec, readFooter, parseWriter, this._setup.getColumnTypes(), this._keepColumns, this._setup.get_parse_columns_indices().length).read() != null);
        return parseWriter;
    }

    public static ParquetParseSetup guessFormatSetup(ByteVec byteVec, byte[] bArr) {
        if (bArr.length < ParquetFileWriter.MAGIC.length) {
            return null;
        }
        for (int i = 0; i < ParquetFileWriter.MAGIC.length; i++) {
            if (bArr[i] != ParquetFileWriter.MAGIC[i]) {
                return null;
            }
        }
        byte[] readFooterAsBytes = VecParquetReader.readFooterAsBytes(byteVec);
        ParquetMetadata readFooter = VecParquetReader.readFooter(readFooterAsBytes);
        checkCompatibility(readFooter);
        return toInitialSetup(readFooter.getFileMetaData().getSchema(), readFooterAsBytes);
    }

    public static String[] extractColumnTypes(ParquetMetadata parquetMetadata) {
        MessageType schema = parquetMetadata.getFileMetaData().getSchema();
        int fieldCount = schema.getFieldCount();
        String[] strArr = new String[fieldCount];
        for (int i = 0; i < fieldCount; i++) {
            strArr[i] = schema.getType(i).asPrimitiveType().getPrimitiveTypeName().name();
        }
        return strArr;
    }

    private static ParquetParseSetup toInitialSetup(MessageType messageType, byte[] bArr) {
        return new ParquetParseSetup(columnNames(messageType), roughGuessTypes(messageType), (String[][]) null, bArr);
    }

    public static ParquetParseSetup guessDataSetup(ByteVec byteVec, ParquetParseSetup parquetParseSetup, boolean[] zArr) {
        return readFirstRecords(parquetParseSetup, byteVec, MAX_PREVIEW_RECORDS, zArr).toParseSetup(parquetParseSetup.parquetMetadata);
    }

    public static byte[] correctTypeConversions(ByteVec byteVec, byte[] bArr) {
        return correctTypeConversions(roughGuessTypes(VecParquetReader.readFooter(VecParquetReader.readFooterAsBytes(byteVec), ParquetMetadataConverter.NO_FILTER).getFileMetaData().getSchema()), bArr);
    }

    /* JADX INFO: Access modifiers changed from: private */
    public static byte[] correctTypeConversions(byte[] bArr, byte[] bArr2) {
        if (bArr2.length != bArr.length) {
            throw new IllegalArgumentException("Invalid column type specification: number of columns and number of types differ!");
        }
        byte[] bArr3 = new byte[bArr2.length];
        for (int i = 0; i < bArr2.length; i++) {
            if (bArr[i] == 3 || bArr[i] == 5) {
                bArr3[i] = bArr[i];
            } else if (bArr[i] == 0 && bArr2[i] == 3) {
                bArr3[i] = 2;
            } else {
                bArr3[i] = bArr2[i];
            }
        }
        return bArr3;
    }

    private static void checkCompatibility(ParquetMetadata parquetMetadata) {
        for (BlockMetaData blockMetaData : parquetMetadata.getBlocks()) {
            if (blockMetaData.getRowCount() > 2147483647L) {
                IcedHashMapGeneric.IcedHashMapStringObject icedHashMapStringObject = new IcedHashMapGeneric.IcedHashMapStringObject();
                icedHashMapStringObject.put("startingPos", Long.valueOf(blockMetaData.getStartingPos()));
                icedHashMapStringObject.put("rowCount", Long.valueOf(blockMetaData.getRowCount()));
                throw new H2OUnsupportedDataFileException("Unsupported Parquet file (technical limitation).", "Current implementation doesn't support Parquet files with blocks larger than 2147483647 rows.", icedHashMapStringObject);
            }
        }
        for (String[] strArr : parquetMetadata.getFileMetaData().getSchema().getPaths()) {
            if (strArr.length != 1) {
                throw new H2OUnsupportedDataFileException("Parquet files with nested structures are not supported.", "Detected a column with a nested structure " + Arrays.asList(strArr));
            }
        }
    }

    private static ParquetPreviewParseWriter readFirstRecords(ParquetParseSetup parquetParseSetup, ByteVec byteVec, int i, boolean[] zArr) {
        ParquetMetadata readFooter = VecParquetReader.readFooter(parquetParseSetup.parquetMetadata);
        ParquetMetadata parquetMetadata = new ParquetMetadata(readFooter.getFileMetaData(), readFooter.getBlocks().isEmpty() ? Collections.emptyList() : Collections.singletonList(findFirstBlock(readFooter)));
        ParquetPreviewParseWriter parquetPreviewParseWriter = new ParquetPreviewParseWriter(parquetParseSetup);
        VecParquetReader vecParquetReader = new VecParquetReader(byteVec, parquetMetadata, parquetPreviewParseWriter, parquetPreviewParseWriter._roughTypes, zArr, parquetParseSetup.get_parse_columns_indices().length);
        int i2 = 0;
        do {
            try {
                if (vecParquetReader.read() == null) {
                    break;
                }
                i2++;
            } catch (IOException e) {
                throw new RuntimeException("Failed to read the first few records", e);
            }
        } while (i2 < i);
        return parquetPreviewParseWriter;
    }

    private static byte[] roughGuessTypes(MessageType messageType) {
        byte[] bArr = new byte[messageType.getPaths().size()];
        for (int i = 0; i < bArr.length; i++) {
            Type type = messageType.getType(i);
            if (!$assertionsDisabled && !type.isPrimitive()) {
                throw new AssertionError();
            }
            bArr[i] = convertType(type.getOriginalType(), type.asPrimitiveType());
        }
        return bArr;
    }

    private static byte convertType(OriginalType originalType, PrimitiveType primitiveType) {
        if (OriginalType.TIMESTAMP_MILLIS.equals(originalType) || OriginalType.DATE.equals(originalType)) {
            return (byte) 5;
        }
        if (OriginalType.DECIMAL.equals(originalType)) {
            return (byte) 3;
        }
        switch (AnonymousClass1.$SwitchMap$org$apache$parquet$schema$PrimitiveType$PrimitiveTypeName[primitiveType.getPrimitiveTypeName().ordinal()]) {
            case 1:
                return (byte) 4;
            case 2:
            case 3:
            case 4:
            case 5:
                return (byte) 3;
            case 6:
                return (byte) 5;
            default:
                return (byte) 0;
        }
    }

    private static String[] columnNames(MessageType messageType) {
        String[] strArr = new String[messageType.getPaths().size()];
        int i = 0;
        for (String[] strArr2 : messageType.getPaths()) {
            if (!$assertionsDisabled && strArr2.length != 1) {
                throw new AssertionError();
            }
            int i2 = i;
            i++;
            strArr[i2] = strArr2[0];
        }
        return strArr;
    }

    private static BlockMetaData findFirstBlock(ParquetMetadata parquetMetadata) {
        BlockMetaData blockMetaData = (BlockMetaData) parquetMetadata.getBlocks().get(0);
        for (BlockMetaData blockMetaData2 : parquetMetadata.getBlocks()) {
            if (blockMetaData2.getStartingPos() < blockMetaData.getStartingPos()) {
                blockMetaData = blockMetaData2;
            }
        }
        return blockMetaData;
    }

    static {
        $assertionsDisabled = !ParquetParser.class.desiredAssertionStatus();
    }
}
