package io.openlineage.spark.agent.util;

import io.acryl.shaded.org.apache.commons.lang3.reflect.FieldUtils;
import java.util.Arrays;
import java.util.Objects;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.stream.Stream;
import lombok.Generated;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.spark.package$;
import org.apache.spark.rdd.HadoopRDD;
import org.apache.spark.rdd.MapPartitionsRDD;
import org.apache.spark.rdd.ParallelCollectionRDD;
import org.apache.spark.rdd.RDD;
import org.apache.spark.sql.execution.datasources.FileScanRDD;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import scala.Tuple2;
import scala.collection.immutable.Seq;
import scala.collection.mutable.ArrayBuffer;

/* loaded from: input_file:io/openlineage/spark/agent/util/RddPathUtils.class */
public class RddPathUtils {

    @Generated
    private static final Logger log = LoggerFactory.getLogger(RddPathUtils.class);

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:io/openlineage/spark/agent/util/RddPathUtils$FileScanRDDExtractor.class */
    public static class FileScanRDDExtractor implements RddPathExtractor<FileScanRDD> {
        FileScanRDDExtractor() {
        }

        @Override // io.openlineage.spark.agent.util.RddPathUtils.RddPathExtractor
        public boolean isDefinedAt(Object obj) {
            return obj instanceof FileScanRDD;
        }

        @Override // io.openlineage.spark.agent.util.RddPathUtils.RddPathExtractor
        public Stream<Path> extract(FileScanRDD fileScanRDD) {
            return ScalaConversionUtils.fromSeq(fileScanRDD.filePartitions()).stream().flatMap(filePartition -> {
                return Arrays.stream(filePartition.files());
            }).map(partitionedFile -> {
                return "3.4".compareTo(package$.MODULE$.SPARK_VERSION()) <= 0 ? ((Path) ReflectionUtils.tryExecuteMethod(partitionedFile, "filePath", new Object[0]).map(obj -> {
                    return ReflectionUtils.tryExecuteMethod(obj, "toPath", new Object[0]);
                }).map(optional -> {
                    return (Path) optional.get();
                }).get()).getParent() : RddPathUtils.parentOf(partitionedFile.filePath());
            });
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:io/openlineage/spark/agent/util/RddPathUtils$HadoopRDDExtractor.class */
    public static class HadoopRDDExtractor implements RddPathExtractor<HadoopRDD> {
        HadoopRDDExtractor() {
        }

        @Override // io.openlineage.spark.agent.util.RddPathUtils.RddPathExtractor
        public boolean isDefinedAt(Object obj) {
            return obj instanceof HadoopRDD;
        }

        @Override // io.openlineage.spark.agent.util.RddPathUtils.RddPathExtractor
        public Stream<Path> extract(HadoopRDD hadoopRDD) {
            Path[] inputPaths = FileInputFormat.getInputPaths(hadoopRDD.getJobConf());
            Configuration conf = hadoopRDD.getConf();
            if (RddPathUtils.log.isDebugEnabled()) {
                RddPathUtils.log.debug("Hadoop RDD class {}", hadoopRDD.getClass());
                RddPathUtils.log.debug("Hadoop RDD input paths {}", Arrays.toString(inputPaths));
                RddPathUtils.log.debug("Hadoop RDD job conf {}", hadoopRDD.getJobConf());
            }
            return Arrays.stream(inputPaths).map(path -> {
                return PlanUtils.getDirectoryPath(path, conf);
            });
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:io/openlineage/spark/agent/util/RddPathUtils$MapPartitionsRDDExtractor.class */
    public static class MapPartitionsRDDExtractor implements RddPathExtractor<MapPartitionsRDD> {
        MapPartitionsRDDExtractor() {
        }

        @Override // io.openlineage.spark.agent.util.RddPathUtils.RddPathExtractor
        public boolean isDefinedAt(Object obj) {
            return obj instanceof MapPartitionsRDD;
        }

        @Override // io.openlineage.spark.agent.util.RddPathUtils.RddPathExtractor
        public Stream<Path> extract(MapPartitionsRDD mapPartitionsRDD) {
            if (RddPathUtils.log.isDebugEnabled()) {
                RddPathUtils.log.debug("Parent RDD: {}", mapPartitionsRDD.prev());
            }
            return RddPathUtils.findRDDPaths(mapPartitionsRDD.prev());
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:io/openlineage/spark/agent/util/RddPathUtils$ParallelCollectionRDDExtractor.class */
    public static class ParallelCollectionRDDExtractor implements RddPathExtractor<ParallelCollectionRDD> {
        ParallelCollectionRDDExtractor() {
        }

        @Override // io.openlineage.spark.agent.util.RddPathUtils.RddPathExtractor
        public boolean isDefinedAt(Object obj) {
            return obj instanceof ParallelCollectionRDD;
        }

        @Override // io.openlineage.spark.agent.util.RddPathUtils.RddPathExtractor
        public Stream<Path> extract(ParallelCollectionRDD parallelCollectionRDD) {
            Object readField;
            AtomicBoolean atomicBoolean = new AtomicBoolean(false);
            try {
                readField = FieldUtils.readField((Object) parallelCollectionRDD, "data", true);
                RddPathUtils.log.debug("ParallelCollectionRDD data: {}", readField);
            } catch (IllegalAccessException | IllegalArgumentException e) {
                RddPathUtils.log.debug("Cannot read data field from ParallelCollectionRDD {}", parallelCollectionRDD);
            }
            if ((readField instanceof Seq) && !((Seq) readField).isEmpty() && (((Seq) readField).head() instanceof Tuple2)) {
                return ScalaConversionUtils.fromSeq((Seq) ((Seq) readField).slice(0, 1000)).stream().map(obj -> {
                    Path path = null;
                    if (obj instanceof Tuple2) {
                        path = RddPathUtils.parentOf(((Tuple2) obj)._1.toString());
                        RddPathUtils.log.debug("Found input {}", path);
                    } else if (!atomicBoolean.get()) {
                        RddPathUtils.log.warn("unable to extract Path from {}", obj.getClass().getCanonicalName());
                        atomicBoolean.set(true);
                    }
                    return path;
                }).filter(Objects::nonNull);
            }
            if ((readField instanceof ArrayBuffer) && !((ArrayBuffer) readField).isEmpty()) {
                return ScalaConversionUtils.fromSeq(((ArrayBuffer) readField).toSeq()).stream().map(obj2 -> {
                    return RddPathUtils.parentOf(obj2.toString());
                }).filter((v0) -> {
                    return Objects.nonNull(v0);
                });
            }
            RddPathUtils.log.debug("Cannot extract path from ParallelCollectionRDD {}", readField);
            return Stream.empty();
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:io/openlineage/spark/agent/util/RddPathUtils$RddPathExtractor.class */
    public interface RddPathExtractor<T extends RDD> {
        boolean isDefinedAt(Object obj);

        Stream<Path> extract(T t);
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:io/openlineage/spark/agent/util/RddPathUtils$UnknownRDDExtractor.class */
    public static class UnknownRDDExtractor implements RddPathExtractor<RDD> {
        UnknownRDDExtractor() {
        }

        @Override // io.openlineage.spark.agent.util.RddPathUtils.RddPathExtractor
        public boolean isDefinedAt(Object obj) {
            return true;
        }

        @Override // io.openlineage.spark.agent.util.RddPathUtils.RddPathExtractor
        public Stream<Path> extract(RDD rdd) {
            RddPathUtils.log.debug("Unknown RDD class {}", rdd);
            return Stream.empty();
        }
    }

    public static Stream<Path> findRDDPaths(RDD rdd) {
        return ((RddPathExtractor) Stream.of((Object[]) new RddPathExtractor[]{new HadoopRDDExtractor(), new FileScanRDDExtractor(), new MapPartitionsRDDExtractor(), new ParallelCollectionRDDExtractor()}).filter(rddPathExtractor -> {
            return rddPathExtractor.isDefinedAt(rdd);
        }).findFirst().orElse(new UnknownRDDExtractor())).extract(rdd).filter(obj -> {
            return obj != null;
        });
    }

    /* JADX INFO: Access modifiers changed from: private */
    public static Path parentOf(String str) {
        try {
            return new Path(str).getParent();
        } catch (Exception e) {
            if (!log.isDebugEnabled()) {
                return null;
            }
            log.debug("Cannot get parent of path {}", str, e);
            return null;
        }
    }
}
