package org.apache.pinot.plugin.inputformat.parquet;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.List;
import java.util.Map;
import java.util.zip.GZIPInputStream;
import org.apache.avro.Schema;
import org.apache.avro.SchemaBuilder;
import org.apache.avro.file.DataFileConstants;
import org.apache.avro.file.DataFileStream;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericDatumReader;
import org.apache.avro.generic.GenericRecord;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
import org.apache.hadoop.fs.LocalFileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.parquet.avro.AvroParquetReader;
import org.apache.parquet.avro.AvroParquetWriter;
import org.apache.parquet.avro.AvroSchemaConverter;
import org.apache.parquet.format.converter.ParquetMetadataConverter;
import org.apache.parquet.hadoop.ParquetFileReader;
import org.apache.parquet.hadoop.ParquetReader;
import org.apache.parquet.hadoop.ParquetWriter;
import org.apache.parquet.hadoop.metadata.ParquetMetadata;
import org.apache.pinot.spi.data.FieldSpec;
import org.apache.pinot.spi.data.readers.RecordReaderUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import shaded.com.google.common.base.Preconditions;

/* loaded from: input_file:org/apache/pinot/plugin/inputformat/parquet/ParquetUtils.class */
public class ParquetUtils {
    public static final String DEFAULT_FS = "file:///";
    private static final Logger LOGGER = LoggerFactory.getLogger((Class<?>) ParquetUtils.class);

    public static ParquetReader<GenericRecord> getParquetReader(Path path) throws IOException {
        return AvroParquetReader.builder(path).disableCompatibility().withDataModel(GenericData.get()).withConf(getConfiguration()).build();
    }

    public static ParquetWriter<GenericRecord> getParquetWriter(Path path, Schema schema) throws IOException {
        return AvroParquetWriter.builder(path).withSchema(schema).withConf(getConfiguration()).build();
    }

    public static Schema getParquetSchema(Path path) throws IOException {
        ParquetMetadata readFooter = ParquetFileReader.readFooter(getConfiguration(), path, ParquetMetadataConverter.NO_FILTER);
        Map<String, String> keyValueMetaData = readFooter.getFileMetaData().getKeyValueMetaData();
        String str = keyValueMetaData.get("parquet.avro.schema");
        if (str == null) {
            str = keyValueMetaData.get(DataFileConstants.SCHEMA);
        }
        return str != null ? new Schema.Parser().parse(str) : new AvroSchemaConverter().convert(readFooter.getFileMetaData().getSchema());
    }

    private static Configuration getConfiguration() {
        Configuration configuration = new Configuration();
        configuration.set("fs.defaultFS", "file:///");
        configuration.set(CommonConfigurationKeysPublic.FS_FILE_IMPL_KEY, LocalFileSystem.class.getName());
        return configuration;
    }

    public static Schema getAvroSchemaFromPinotSchema(org.apache.pinot.spi.data.Schema schema) {
        SchemaBuilder.FieldAssembler<Schema> fields = SchemaBuilder.record("record").fields();
        for (FieldSpec fieldSpec : schema.getAllFieldSpecs()) {
            FieldSpec.DataType dataType = fieldSpec.getDataType();
            if (fieldSpec.isSingleValueField()) {
                switch (dataType) {
                    case INT:
                        fields = fields.name(fieldSpec.getName()).type().intType().noDefault();
                        break;
                    case LONG:
                        fields = fields.name(fieldSpec.getName()).type().longType().noDefault();
                        break;
                    case FLOAT:
                        fields = fields.name(fieldSpec.getName()).type().floatType().noDefault();
                        break;
                    case DOUBLE:
                        fields = fields.name(fieldSpec.getName()).type().doubleType().noDefault();
                        break;
                    case STRING:
                        fields = fields.name(fieldSpec.getName()).type().stringType().noDefault();
                        break;
                    case BYTES:
                        fields = fields.name(fieldSpec.getName()).type().bytesType().noDefault();
                        break;
                    default:
                        throw new RuntimeException("Unsupported data type: " + dataType);
                }
            } else {
                switch (dataType) {
                    case INT:
                        fields = fields.name(fieldSpec.getName()).type().array().items().intType().noDefault();
                        break;
                    case LONG:
                        fields = fields.name(fieldSpec.getName()).type().array().items().longType().noDefault();
                        break;
                    case FLOAT:
                        fields = fields.name(fieldSpec.getName()).type().array().items().floatType().noDefault();
                        break;
                    case DOUBLE:
                        fields = fields.name(fieldSpec.getName()).type().array().items().doubleType().noDefault();
                        break;
                    case STRING:
                        fields = fields.name(fieldSpec.getName()).type().array().items().stringType().noDefault();
                        break;
                    default:
                        throw new RuntimeException("Unsupported data type: " + dataType);
                }
            }
        }
        return fields.endRecord();
    }

    public static void validateSchema(org.apache.pinot.spi.data.Schema schema, Schema schema2) {
        for (FieldSpec fieldSpec : schema.getAllFieldSpecs()) {
            String name = fieldSpec.getName();
            Schema.Field field = schema2.getField(name);
            if (field == null) {
                LOGGER.warn("Pinot field: {} does not exist in Avro Schema", name);
            } else {
                boolean isSingleValueField = fieldSpec.isSingleValueField();
                if (isSingleValueField != isSingleValueField(field)) {
                    String str = "Pinot field: " + name + " is " + (isSingleValueField ? "Single" : "Multi") + "-valued in Pinot schema but not in Avro schema";
                    LOGGER.error(str);
                    throw new IllegalStateException(str);
                }
                FieldSpec.DataType dataType = fieldSpec.getDataType();
                FieldSpec.DataType extractFieldDataType = extractFieldDataType(field);
                if (dataType != extractFieldDataType) {
                    LOGGER.warn("Pinot field: {} of type: {} mismatches with corresponding field in Avro Schema of type: {}", name, dataType, extractFieldDataType);
                }
            }
        }
    }

    public static DataFileStream<GenericRecord> getAvroReader(File file) throws IOException {
        return file.getName().endsWith(RecordReaderUtils.GZIP_FILE_EXTENSION) ? new DataFileStream<>(new GZIPInputStream(new FileInputStream(file)), new GenericDatumReader()) : new DataFileStream<>(new FileInputStream(file), new GenericDatumReader());
    }

    public static boolean isSingleValueField(Schema.Field field) {
        try {
            return extractSupportedSchema(field.schema()).getType() != Schema.Type.ARRAY;
        } catch (Exception e) {
            throw new RuntimeException("Caught exception while extracting non-null schema from field: " + field.name(), e);
        }
    }

    public static FieldSpec.DataType extractFieldDataType(Schema.Field field) {
        try {
            Schema extractSupportedSchema = extractSupportedSchema(field.schema());
            Schema.Type type = extractSupportedSchema.getType();
            return type == Schema.Type.ARRAY ? valueOf(extractSupportedSchema(extractSupportedSchema.getElementType()).getType()) : valueOf(type);
        } catch (Exception e) {
            throw new RuntimeException("Caught exception while extracting data type from field: " + field.name(), e);
        }
    }

    private static Schema extractSupportedSchema(Schema schema) {
        Schema.Type type = schema.getType();
        if (type != Schema.Type.UNION) {
            if (type != Schema.Type.RECORD) {
                return schema;
            }
            List<Schema.Field> fields = schema.getFields();
            Preconditions.checkState(fields.size() == 1, "Not one field in the RECORD schema");
            return extractSupportedSchema(fields.get(0).schema());
        }
        Schema schema2 = null;
        for (Schema schema3 : schema.getTypes()) {
            if (schema3.getType() != Schema.Type.NULL) {
                if (schema2 != null) {
                    throw new IllegalStateException("More than one non-null schema in UNION schema");
                }
                schema2 = schema3;
            }
        }
        if (schema2 != null) {
            return extractSupportedSchema(schema2);
        }
        throw new IllegalStateException("Cannot find non-null schema in UNION schema");
    }

    public static FieldSpec.DataType valueOf(Schema.Type type) {
        switch (type) {
            case INT:
                return FieldSpec.DataType.INT;
            case LONG:
                return FieldSpec.DataType.LONG;
            case FLOAT:
                return FieldSpec.DataType.FLOAT;
            case DOUBLE:
                return FieldSpec.DataType.DOUBLE;
            case BOOLEAN:
            case STRING:
            case ENUM:
                return FieldSpec.DataType.STRING;
            case BYTES:
                return FieldSpec.DataType.BYTES;
            default:
                throw new UnsupportedOperationException("Unsupported Avro type: " + type);
        }
    }
}
