package org.apache.gobblin.hive.orc;

import com.codahale.metrics.Timer;
import com.google.common.base.Charsets;
import com.google.common.base.Joiner;
import com.google.common.base.Optional;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.stream.Collectors;
import org.apache.gobblin.configuration.State;
import org.apache.gobblin.hive.HiveRegistrationUnit;
import org.apache.gobblin.hive.HiveSerDeManager;
import org.apache.gobblin.hive.HiveSerDeWrapper;
import org.apache.gobblin.instrumented.Instrumented;
import org.apache.gobblin.metrics.MetricContext;
import org.apache.gobblin.util.FileListUtils;
import org.apache.gobblin.util.HadoopUtils;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.hive.ql.io.orc.OrcFile;
import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat;
import org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat;
import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/apache/gobblin/hive/orc/HiveOrcSerDeManager.class */
public class HiveOrcSerDeManager extends HiveSerDeManager {
    public static final String FILE_EXTENSIONS_KEY = "hiveOrcSerdeManager.fileExtensions";
    public static final String DEFAULT_FILE_EXTENSIONS = ".orc";
    public static final String IGNORED_FILE_PREFIXES_KEY = "hiveOrcSerdeManager.ignoredPrefixes";
    public static final String DEFAULT_IGNORED_FILE_PREFIXES = "_,.";
    public static final String SERDE_TYPE_KEY = "hiveOrcSerdeManager.serdeType";
    public static final String DEFAULT_SERDE_TYPE = "ORC";
    public static final String INPUT_FORMAT_CLASS_KEY = "hiveOrcSerdeManager.inputFormatClass";
    public static final String OUTPUT_FORMAT_CLASS_KEY = "hiveOrcSerdeManager.outputFormatClass";
    public static final String HIVE_SPEC_SCHEMA_READING_TIMER = "hiveOrcSerdeManager.schemaReadTimer";
    public static final String ENABLED_ORC_TYPE_CHECK = "hiveOrcSerdeManager.enableFormatCheck";
    public static final boolean DEFAULT_ENABLED_ORC_TYPE_CHECK = false;
    private static final int EXPECTED_FOOTER_SIZE = 16384;
    private static final String ORC_FORMAT = "ORC";
    private final FileSystem fs;
    private final HiveSerDeWrapper serDeWrapper;
    private final List<String> fileExtensions;
    private final List<String> ignoredFilePrefixes;
    private final boolean checkOrcFormat;
    private final MetricContext metricContext;
    private static final Logger log = LoggerFactory.getLogger(HiveOrcSerDeManager.class);
    public static final String DEFAULT_INPUT_FORMAT_CLASS = OrcInputFormat.class.getName();
    public static final String DEFAULT_OUTPUT_FORMAT_CLASS = OrcOutputFormat.class.getName();
    private static final ByteBuffer MAGIC_BUFFER = ByteBuffer.wrap("ORC".getBytes(Charsets.UTF_8));

    public HiveOrcSerDeManager(State state) throws IOException {
        super(state);
        this.fs = FileSystem.get(HadoopUtils.getConfFromState(state));
        ImmutableList propAsList = state.getPropAsList(FILE_EXTENSIONS_KEY, DEFAULT_FILE_EXTENSIONS);
        this.fileExtensions = propAsList.isEmpty() ? ImmutableList.of("") : propAsList;
        this.ignoredFilePrefixes = state.getPropAsList(IGNORED_FILE_PREFIXES_KEY, DEFAULT_IGNORED_FILE_PREFIXES);
        this.checkOrcFormat = state.getPropAsBoolean(ENABLED_ORC_TYPE_CHECK, false);
        this.metricContext = Instrumented.getMetricContext(state, HiveOrcSerDeManager.class);
        this.serDeWrapper = HiveSerDeWrapper.get(state.getProp(SERDE_TYPE_KEY, "ORC"), Optional.of(state.getProp(INPUT_FORMAT_CLASS_KEY, DEFAULT_INPUT_FORMAT_CLASS)), Optional.of(state.getProp(OUTPUT_FORMAT_CLASS_KEY, DEFAULT_OUTPUT_FORMAT_CLASS)));
    }

    @Override // org.apache.gobblin.hive.HiveSerDeManager
    public boolean haveSameSchema(HiveRegistrationUnit hiveRegistrationUnit, HiveRegistrationUnit hiveRegistrationUnit2) throws IOException {
        return hiveRegistrationUnit.getSerDeProps().contains("columns") && hiveRegistrationUnit2.getSerDeProps().contains("columns") && hiveRegistrationUnit.getSerDeProps().contains("columns.types") && hiveRegistrationUnit2.getSerDeProps().contains("columns.types") && hiveRegistrationUnit.getSerDeProps().getProp("columns").equals(hiveRegistrationUnit2.getSerDeProps().getProp("columns")) && hiveRegistrationUnit.getSerDeProps().getProp("columns.types").equals(hiveRegistrationUnit2.getSerDeProps().getProp("columns.types"));
    }

    @Override // org.apache.gobblin.hive.HiveSerDeManager
    public void addSerDeProperties(Path path, HiveRegistrationUnit hiveRegistrationUnit) throws IOException {
        hiveRegistrationUnit.setSerDeType(this.serDeWrapper.getSerDe().getClass().getName());
        hiveRegistrationUnit.setInputFormat(this.serDeWrapper.getInputFormatClassName());
        hiveRegistrationUnit.setOutputFormat(this.serDeWrapper.getOutputFormatClassName());
        addSchemaProperties(path, hiveRegistrationUnit);
    }

    @Override // org.apache.gobblin.hive.HiveSerDeManager
    public void addSerDeProperties(HiveRegistrationUnit hiveRegistrationUnit, HiveRegistrationUnit hiveRegistrationUnit2) throws IOException {
        if (hiveRegistrationUnit.getSerDeType().isPresent()) {
            hiveRegistrationUnit2.setSerDeType((String) hiveRegistrationUnit.getSerDeType().get());
        }
        if (hiveRegistrationUnit.getInputFormat().isPresent()) {
            hiveRegistrationUnit2.setInputFormat((String) hiveRegistrationUnit.getInputFormat().get());
        }
        if (hiveRegistrationUnit.getOutputFormat().isPresent()) {
            hiveRegistrationUnit2.setOutputFormat((String) hiveRegistrationUnit.getOutputFormat().get());
        }
    }

    @Override // org.apache.gobblin.hive.HiveSerDeManager
    public void updateSchema(HiveRegistrationUnit hiveRegistrationUnit, HiveRegistrationUnit hiveRegistrationUnit2) throws IOException {
        Preconditions.checkArgument(hiveRegistrationUnit2.getSerDeProps().contains("columns"));
        Preconditions.checkArgument(hiveRegistrationUnit2.getSerDeProps().contains("columns.types"));
        hiveRegistrationUnit.setSerDeProp("columns", hiveRegistrationUnit2.getSerDeProps().getProp("columns"));
        hiveRegistrationUnit.setSerDeProp("columns.types", hiveRegistrationUnit2.getSerDeProps().getProp("columns.types"));
    }

    public TypeInfo getSchemaFromLatestFile(Path path, final FileSystem fileSystem) throws IOException {
        if (!fileSystem.isDirectory(path)) {
            return TypeInfoUtils.getTypeInfoFromObjectInspector(OrcFile.createReader(fileSystem, path).getObjectInspector());
        }
        List asList = Arrays.asList(fileSystem.listStatus(path, new PathFilter() { // from class: org.apache.gobblin.hive.orc.HiveOrcSerDeManager.1
            /* JADX WARN: Code restructure failed: missing block: B:9:0x0046, code lost:
            
                if (org.apache.gobblin.hive.orc.HiveOrcSerDeManager.isORC(r5, r5) != false) goto L10;
             */
            /*
                Code decompiled incorrectly, please refer to instructions dump.
                To view partially-correct add '--show-bad-code' argument
            */
            public boolean accept(org.apache.hadoop.fs.Path r5) {
                /*
                    r4 = this;
                    r0 = r4
                    org.apache.gobblin.hive.orc.HiveOrcSerDeManager r0 = org.apache.gobblin.hive.orc.HiveOrcSerDeManager.this     // Catch: java.io.IOException -> L4f
                    java.util.List r0 = org.apache.gobblin.hive.orc.HiveOrcSerDeManager.access$000(r0)     // Catch: java.io.IOException -> L4f
                    java.util.stream.Stream r0 = r0.stream()     // Catch: java.io.IOException -> L4f
                    r1 = r5
                    boolean r1 = (v1) -> { // java.util.function.Predicate.test(java.lang.Object):boolean
                        return lambda$accept$0(r1, v1);
                    }     // Catch: java.io.IOException -> L4f
                    boolean r0 = r0.noneMatch(r1)     // Catch: java.io.IOException -> L4f
                    if (r0 == 0) goto L4d
                    r0 = r4
                    org.apache.gobblin.hive.orc.HiveOrcSerDeManager r0 = org.apache.gobblin.hive.orc.HiveOrcSerDeManager.this     // Catch: java.io.IOException -> L4f
                    java.util.List r0 = org.apache.gobblin.hive.orc.HiveOrcSerDeManager.access$100(r0)     // Catch: java.io.IOException -> L4f
                    java.util.stream.Stream r0 = r0.stream()     // Catch: java.io.IOException -> L4f
                    r1 = r5
                    boolean r1 = (v1) -> { // java.util.function.Predicate.test(java.lang.Object):boolean
                        return lambda$accept$1(r1, v1);
                    }     // Catch: java.io.IOException -> L4f
                    boolean r0 = r0.anyMatch(r1)     // Catch: java.io.IOException -> L4f
                    if (r0 == 0) goto L4d
                    r0 = r4
                    org.apache.gobblin.hive.orc.HiveOrcSerDeManager r0 = org.apache.gobblin.hive.orc.HiveOrcSerDeManager.this     // Catch: java.io.IOException -> L4f
                    boolean r0 = org.apache.gobblin.hive.orc.HiveOrcSerDeManager.access$200(r0)     // Catch: java.io.IOException -> L4f
                    if (r0 == 0) goto L49
                    r0 = r5
                    r1 = r4
                    org.apache.hadoop.fs.FileSystem r1 = r5     // Catch: java.io.IOException -> L4f
                    boolean r0 = org.apache.gobblin.hive.orc.HiveOrcSerDeManager.access$300(r0, r1)     // Catch: java.io.IOException -> L4f
                    if (r0 == 0) goto L4d
                L49:
                    r0 = 1
                    goto L4e
                L4d:
                    r0 = 0
                L4e:
                    return r0
                L4f:
                    r6 = move-exception
                    org.slf4j.Logger r0 = org.apache.gobblin.hive.orc.HiveOrcSerDeManager.access$400()
                    java.lang.String r1 = "Error checking file for schema retrieval"
                    r2 = r6
                    r0.error(r1, r2)
                    r0 = 0
                    return r0
                */
                throw new UnsupportedOperationException("Method not decompiled: org.apache.gobblin.hive.orc.HiveOrcSerDeManager.AnonymousClass1.accept(org.apache.hadoop.fs.Path):boolean");
            }
        }));
        if (asList.size() <= 0) {
            throw new FileNotFoundException("No files in Dataset:" + path + " found for schema retrieval");
        }
        Collections.sort(asList, FileListUtils.LATEST_MOD_TIME_ORDER);
        return getSchemaFromLatestFile(((FileStatus) asList.get(0)).getPath(), fileSystem);
    }

    /* JADX INFO: Access modifiers changed from: private */
    public static boolean isORC(Path path, FileSystem fileSystem) throws IOException {
        try {
            FSDataInputStream open = fileSystem.open(path);
            long len = fileSystem.getFileStatus(path).getLen();
            byte[] bArr = new byte[Math.toIntExact(Math.min(len, 16384L))];
            if (len < bArr.length) {
                return false;
            }
            open.readFully(len - bArr.length, bArr);
            int i = bArr[bArr.length - 1] & 255;
            int remaining = MAGIC_BUFFER.remaining();
            if (i < remaining + 1 || i >= bArr.length) {
                return false;
            }
            if (MAGIC_BUFFER.equals(ByteBuffer.wrap(bArr, (bArr.length - 1) - remaining, remaining))) {
                return true;
            }
            byte[] bArr2 = new byte[remaining];
            open.readFully(0L, bArr2);
            return MAGIC_BUFFER.equals(ByteBuffer.wrap(bArr2));
        } catch (Exception e) {
            throw new RuntimeException("Error occured when checking the type of file:" + path);
        }
    }

    private void addSchemaProperties(Path path, HiveRegistrationUnit hiveRegistrationUnit) throws IOException {
        Preconditions.checkArgument(this.fs.getFileStatus(path).isDirectory(), path + " is not a directory.");
        Timer.Context time = this.metricContext.timer(HIVE_SPEC_SCHEMA_READING_TIMER).time();
        Throwable th = null;
        try {
            try {
                addSchemaPropertiesHelper(path, hiveRegistrationUnit);
                if (time != null) {
                    if (0 == 0) {
                        time.close();
                        return;
                    }
                    try {
                        time.close();
                    } catch (Throwable th2) {
                        th.addSuppressed(th2);
                    }
                }
            } catch (Throwable th3) {
                th = th3;
                throw th3;
            }
        } catch (Throwable th4) {
            if (time != null) {
                if (th != null) {
                    try {
                        time.close();
                    } catch (Throwable th5) {
                        th.addSuppressed(th5);
                    }
                } else {
                    time.close();
                }
            }
            throw th4;
        }
    }

    protected void addSchemaPropertiesHelper(Path path, HiveRegistrationUnit hiveRegistrationUnit) throws IOException {
        StructTypeInfo schemaFromLatestFile = getSchemaFromLatestFile(path, this.fs);
        if (!(schemaFromLatestFile instanceof StructTypeInfo)) {
            throw new IllegalStateException("A valid ORC schema should be an instance of struct");
        }
        StructTypeInfo structTypeInfo = schemaFromLatestFile;
        hiveRegistrationUnit.setSerDeProp("columns", Joiner.on(",").join(structTypeInfo.getAllStructFieldNames()));
        hiveRegistrationUnit.setSerDeProp("columns.types", Joiner.on(",").join((Iterable) structTypeInfo.getAllStructFieldTypeInfos().stream().map(typeInfo -> {
            return typeInfo.getTypeName();
        }).collect(Collectors.toList())));
    }
}
