package org.apache.gobblin.compaction.mapreduce;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Enums;
import com.google.common.base.Optional;
import java.io.IOException;
import org.apache.avro.Schema;
import org.apache.avro.mapred.AvroKey;
import org.apache.avro.mapred.AvroValue;
import org.apache.avro.mapreduce.AvroJob;
import org.apache.gobblin.compaction.mapreduce.CompactionJobConfigurator;
import org.apache.gobblin.compaction.mapreduce.avro.AvroKeyCompactorOutputFormat;
import org.apache.gobblin.compaction.mapreduce.avro.AvroKeyDedupReducer;
import org.apache.gobblin.compaction.mapreduce.avro.AvroKeyMapper;
import org.apache.gobblin.compaction.mapreduce.avro.AvroKeyRecursiveCombineFileInputFormat;
import org.apache.gobblin.compaction.mapreduce.avro.MRCompactorAvroKeyDedupJobRunner;
import org.apache.gobblin.configuration.State;
import org.apache.gobblin.converter.filter.AvroSchemaFieldRemover;
import org.apache.gobblin.util.AvroUtils;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Job;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/apache/gobblin/compaction/mapreduce/CompactionAvroJobConfigurator.class */
public class CompactionAvroJobConfigurator extends CompactionJobConfigurator {
    private static final Logger log = LoggerFactory.getLogger(CompactionAvroJobConfigurator.class);
    private Optional<String> keyFieldBlacklist;

    /* loaded from: input_file:org/apache/gobblin/compaction/mapreduce/CompactionAvroJobConfigurator$Factory.class */
    public static class Factory implements CompactionJobConfigurator.ConfiguratorFactory {
        @Override // org.apache.gobblin.compaction.mapreduce.CompactionJobConfigurator.ConfiguratorFactory
        public CompactionJobConfigurator createConfigurator(State state) throws IOException {
            return new CompactionAvroJobConfigurator(state);
        }
    }

    @Override // org.apache.gobblin.compaction.mapreduce.CompactionJobConfigurator
    public String getFileExtension() {
        return CompactionJobConfigurator.EXTENSION.AVRO.getExtensionString();
    }

    public CompactionAvroJobConfigurator(State state) throws IOException {
        super(state);
        this.keyFieldBlacklist = Optional.fromNullable(state.getProp(MRCompactorAvroKeyDedupJobRunner.COMPACTION_JOB_KEY_FIELD_BLACKLIST));
    }

    private MRCompactorAvroKeyDedupJobRunner.DedupKeyOption getDedupKeyOption() {
        if (!this.state.contains(MRCompactorAvroKeyDedupJobRunner.COMPACTION_JOB_DEDUP_KEY)) {
            return MRCompactorAvroKeyDedupJobRunner.DEFAULT_DEDUP_KEY_OPTION;
        }
        Optional ifPresent = Enums.getIfPresent(MRCompactorAvroKeyDedupJobRunner.DedupKeyOption.class, this.state.getProp(MRCompactorAvroKeyDedupJobRunner.COMPACTION_JOB_DEDUP_KEY).toUpperCase());
        return ifPresent.isPresent() ? (MRCompactorAvroKeyDedupJobRunner.DedupKeyOption) ifPresent.get() : MRCompactorAvroKeyDedupJobRunner.DEFAULT_DEDUP_KEY_OPTION;
    }

    @VisibleForTesting
    Schema getDedupKeySchema(Schema schema) {
        Schema schema2;
        boolean contains = this.state.contains(MRCompactorAvroKeyDedupJobRunner.COMPACTION_JOB_AVRO_KEY_SCHEMA_LOC);
        MRCompactorAvroKeyDedupJobRunner.DedupKeyOption dedupKeyOption = getDedupKeyOption();
        if (dedupKeyOption == MRCompactorAvroKeyDedupJobRunner.DedupKeyOption.ALL) {
            log.info("Using all attributes in the schema (except Map, Arrar and Enum fields) for compaction");
            schema2 = (Schema) AvroUtils.removeUncomparableFields(schema).get();
        } else if (dedupKeyOption == MRCompactorAvroKeyDedupJobRunner.DedupKeyOption.KEY) {
            log.info("Using key attributes in the schema for compaction");
            schema2 = (Schema) AvroUtils.removeUncomparableFields(MRCompactorAvroKeyDedupJobRunner.getKeySchema(schema)).get();
        } else if (contains) {
            Path path = new Path(this.state.getProp(MRCompactorAvroKeyDedupJobRunner.COMPACTION_JOB_AVRO_KEY_SCHEMA_LOC));
            log.info("Using attributes specified in schema file " + path + " for compaction");
            try {
                schema2 = AvroUtils.parseSchemaFromFile(path, this.fs);
            } catch (IOException e) {
                log.error("Failed to parse avro schema from " + path + ", using key attributes in the schema for compaction");
                schema2 = (Schema) AvroUtils.removeUncomparableFields(MRCompactorAvroKeyDedupJobRunner.getKeySchema(schema)).get();
            }
            if (!MRCompactorAvroKeyDedupJobRunner.isKeySchemaValid(schema2, schema)) {
                log.warn(String.format("Key schema %s is not compatible with record schema %s.", schema2, schema) + "Using key attributes in the schema for compaction");
                schema2 = (Schema) AvroUtils.removeUncomparableFields(MRCompactorAvroKeyDedupJobRunner.getKeySchema(schema)).get();
            }
        } else {
            log.info("Property compaction.job.avro.key.schema.loc not provided. Using key attributes in the schema for compaction");
            schema2 = (Schema) AvroUtils.removeUncomparableFields(MRCompactorAvroKeyDedupJobRunner.getKeySchema(schema)).get();
        }
        if (this.keyFieldBlacklist.isPresent()) {
            schema2 = new AvroSchemaFieldRemover((String) this.keyFieldBlacklist.get()).removeFields(schema2);
            log.info("Adjusted key schema {}", schema2);
        }
        return schema2;
    }

    @Override // org.apache.gobblin.compaction.mapreduce.CompactionJobConfigurator
    protected void configureSchema(Job job) throws IOException {
        Schema newestSchemaFromSource = MRCompactorAvroKeyDedupJobRunner.getNewestSchemaFromSource(job, this.fs);
        if (newestSchemaFromSource != null) {
            if (this.state.getPropAsBoolean(MRCompactorAvroKeyDedupJobRunner.COMPACTION_JOB_AVRO_SINGLE_INPUT_SCHEMA, true)) {
                AvroJob.setInputKeySchema(job, newestSchemaFromSource);
            }
            AvroJob.setMapOutputKeySchema(job, this.shouldDeduplicate ? getDedupKeySchema(newestSchemaFromSource) : newestSchemaFromSource);
            AvroJob.setMapOutputValueSchema(job, newestSchemaFromSource);
            AvroJob.setOutputKeySchema(job, newestSchemaFromSource);
        }
    }

    @Override // org.apache.gobblin.compaction.mapreduce.CompactionJobConfigurator
    protected void configureMapper(Job job) {
        job.setInputFormatClass(AvroKeyRecursiveCombineFileInputFormat.class);
        job.setMapperClass(AvroKeyMapper.class);
        job.setMapOutputKeyClass(AvroKey.class);
        job.setMapOutputValueClass(AvroValue.class);
    }

    @Override // org.apache.gobblin.compaction.mapreduce.CompactionJobConfigurator
    protected void configureReducer(Job job) throws IOException {
        job.setOutputFormatClass(AvroKeyCompactorOutputFormat.class);
        job.setReducerClass(AvroKeyDedupReducer.class);
        job.setOutputKeyClass(AvroKey.class);
        job.setOutputValueClass(NullWritable.class);
        setNumberOfReducers(job);
    }
}
