package org.apache.crunch.io.parquet;

import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
import java.io.IOException;
import java.util.List;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.generic.IndexedRecord;
import org.apache.avro.specific.SpecificRecord;
import org.apache.crunch.ReadableData;
import org.apache.crunch.impl.mr.run.RuntimeParameters;
import org.apache.crunch.io.FormatBundle;
import org.apache.crunch.io.ReadableSource;
import org.apache.crunch.io.impl.FileSourceImpl;
import org.apache.crunch.types.Converter;
import org.apache.crunch.types.avro.AvroType;
import org.apache.crunch.types.avro.Avros;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.util.ReflectionUtils;
import org.apache.parquet.avro.AvroParquetInputFormat;
import org.apache.parquet.avro.AvroReadSupport;
import org.apache.parquet.filter.UnboundRecordFilter;
import org.apache.parquet.hadoop.ParquetInputFormat;
import org.apache.parquet.hadoop.ParquetInputSplit;
import org.apache.zookeeper.client.ZooKeeperSaslClient;

/* loaded from: input_file:lib/crunch-core-0.14.0.jar:org/apache/crunch/io/parquet/AvroParquetFileSource.class */
public class AvroParquetFileSource<T extends IndexedRecord> extends FileSourceImpl<T> implements ReadableSource<T> {
    private static final String AVRO_READ_SCHEMA = "parquet.avro.read.schema";
    private final String projSchema;

    /* loaded from: input_file:lib/crunch-core-0.14.0.jar:org/apache/crunch/io/parquet/AvroParquetFileSource$Builder.class */
    public static class Builder<T extends IndexedRecord> {
        private Class<T> clazz;
        private Schema baseSchema;
        private List<Schema.Field> fields;
        private Class<? extends UnboundRecordFilter> filterClass;

        private Builder(Class<T> cls) {
            this.fields = Lists.newArrayList();
            this.clazz = cls;
            this.baseSchema = ((IndexedRecord) ReflectionUtils.newInstance(cls, null)).getSchema();
        }

        private Builder(Schema schema) {
            this.fields = Lists.newArrayList();
            this.baseSchema = schema;
        }

        public Builder includeField(String str) {
            Schema.Field field = this.baseSchema.getField(str);
            if (field == null) {
                throw new IllegalArgumentException("No field " + str + " in schema: " + this.baseSchema.getName());
            }
            this.fields.add(new Schema.Field(field.name(), field.schema(), field.doc(), field.defaultValue(), field.order()));
            return this;
        }

        public Builder filterClass(Class<? extends UnboundRecordFilter> cls) {
            this.filterClass = cls;
            return this;
        }

        public AvroParquetFileSource<T> build(Path path) {
            return build(ImmutableList.of(path));
        }

        public AvroParquetFileSource<T> build(List<Path> list) {
            AvroType<GenericData.Record> generics = this.clazz == null ? Avros.generics(this.baseSchema) : Avros.specifics(this.clazz);
            if (this.fields.isEmpty()) {
                return new AvroParquetFileSource<>(list, generics, this.filterClass);
            }
            Schema createRecord = Schema.createRecord(this.baseSchema.getName(), this.baseSchema.getDoc(), this.baseSchema.getNamespace(), this.baseSchema.isError());
            createRecord.setFields(this.fields);
            return new AvroParquetFileSource<>(list, generics, createRecord, this.filterClass);
        }
    }

    private static <S> FormatBundle<AvroParquetInputFormat> getBundle(AvroType<S> avroType, Schema schema, Class<? extends UnboundRecordFilter> cls) {
        FormatBundle<AvroParquetInputFormat> formatBundle = FormatBundle.forInput(AvroParquetInputFormat.class).set(AVRO_READ_SCHEMA, avroType.getSchema().toString());
        if (schema != null) {
            formatBundle.set(AvroReadSupport.AVRO_REQUESTED_PROJECTION, schema.toString());
        }
        if (cls != null) {
            formatBundle.set(ParquetInputFormat.UNBOUND_RECORD_FILTER, cls.getName());
        }
        if (!FileSplit.class.isAssignableFrom(ParquetInputSplit.class)) {
            formatBundle.set(RuntimeParameters.DISABLE_COMBINE_FILE, ZooKeeperSaslClient.ENABLE_CLIENT_SASL_DEFAULT);
        }
        return formatBundle;
    }

    public AvroParquetFileSource(Path path, AvroType<T> avroType) {
        this(ImmutableList.of(path), avroType);
    }

    public AvroParquetFileSource(Path path, AvroType<T> avroType, Schema schema) {
        this(ImmutableList.of(path), avroType, schema);
    }

    public AvroParquetFileSource(List<Path> list, AvroType<T> avroType) {
        this(list, avroType, null, null);
    }

    public AvroParquetFileSource(List<Path> list, AvroType<T> avroType, Schema schema) {
        this(list, avroType, schema, null);
    }

    public AvroParquetFileSource(List<Path> list, AvroType<T> avroType, Class<? extends UnboundRecordFilter> cls) {
        this(list, avroType, null, cls);
    }

    public AvroParquetFileSource(List<Path> list, AvroType<T> avroType, Schema schema, Class<? extends UnboundRecordFilter> cls) {
        super(list, avroType, getBundle(avroType, schema, cls));
        this.projSchema = schema == null ? null : schema.toString();
    }

    public Schema getProjectedSchema() {
        return new Schema.Parser().parse(this.projSchema);
    }

    @Override // org.apache.crunch.io.impl.FileSourceImpl, org.apache.crunch.io.ReadableSource
    public Iterable<T> read(Configuration configuration) throws IOException {
        return (Iterable<T>) read(configuration, getFileReaderFactory((AvroType) this.ptype));
    }

    @Override // org.apache.crunch.io.impl.FileSourceImpl, org.apache.crunch.io.ReadableSource
    public ReadableData<T> asReadable() {
        return new AvroParquetReadableData(this.paths, (AvroType) this.ptype);
    }

    protected AvroParquetFileReaderFactory<T> getFileReaderFactory(AvroType<T> avroType) {
        return new AvroParquetFileReaderFactory<>(avroType);
    }

    @Override // org.apache.crunch.io.impl.FileSourceImpl, org.apache.crunch.Source
    public Converter<?, ?, ?, ?> getConverter() {
        return new AvroParquetConverter((AvroType) this.ptype);
    }

    @Override // org.apache.crunch.io.impl.FileSourceImpl
    public String toString() {
        return "Parquet(" + pathsAsString() + (this.projSchema == null ? ")" : ") -> " + this.projSchema);
    }

    public static <T extends SpecificRecord> Builder<T> builder(Class<T> cls) {
        return new Builder<>((Class) Preconditions.checkNotNull(cls));
    }

    public static Builder<GenericRecord> builder(Schema schema) {
        Preconditions.checkNotNull(schema);
        Preconditions.checkArgument(Schema.Type.RECORD.equals(schema.getType()));
        return new Builder<>(schema);
    }
}
