package net.sansa_stack.ml.spark.anomalydetection;

import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import net.sansa_stack.ml.spark.utils.FeatureExtractorModel;
import net.sansa_stack.rdf.common.io.riot.error.ErrorParseMode$;
import net.sansa_stack.rdf.common.io.riot.error.WarningParseMode$;
import net.sansa_stack.rdf.spark.io.NTripleReader$;
import net.sansa_stack.rdf.spark.io.package$;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.jena.graph.Node;
import org.apache.jena.graph.Triple;
import org.apache.jena.riot.Lang;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import org.apache.spark.HashPartitioner;
import org.apache.spark.ml.clustering.BisectingKMeans;
import org.apache.spark.ml.evaluation.ClusteringEvaluator;
import org.apache.spark.ml.feature.CountVectorizer;
import org.apache.spark.ml.feature.MinHashLSH;
import org.apache.spark.ml.linalg.Vector;
import org.apache.spark.rdd.RDD;
import org.apache.spark.rdd.RDD$;
import org.apache.spark.sql.Column;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.SparkSession$;
import org.apache.spark.sql.expressions.UserDefinedFunction;
import org.apache.spark.sql.functions$;
import org.apache.spark.storage.StorageLevel$;
import scala.Double$;
import scala.Function2;
import scala.MatchError;
import scala.Predef$;
import scala.Tuple2;
import scala.Tuple3;
import scala.Tuple4;
import scala.collection.Seq;
import scala.collection.Seq$;
import scala.collection.TraversableLike;
import scala.collection.TraversableOnce;
import scala.collection.immutable.$colon;
import scala.collection.immutable.List;
import scala.collection.immutable.List$;
import scala.collection.immutable.Map;
import scala.collection.immutable.Nil$;
import scala.collection.immutable.StringOps;
import scala.collection.mutable.ArrayOps;
import scala.collection.mutable.HashSet;
import scala.collection.mutable.HashSet$;
import scala.collection.mutable.Set;
import scala.collection.mutable.Set$;
import scala.collection.mutable.SetLike;
import scala.collection.mutable.StringBuilder;
import scala.math.Ordering;
import scala.math.Ordering$String$;
import scala.reflect.ClassTag;
import scala.reflect.ClassTag$;
import scala.reflect.api.Mirror;
import scala.reflect.api.TypeCreator;
import scala.reflect.api.Types;
import scala.reflect.api.Universe;
import scala.runtime.BooleanRef;
import scala.runtime.BoxedUnit;
import scala.runtime.BoxesRunTime;
import scala.runtime.DoubleRef;
import scala.runtime.IntRef;
import scala.runtime.LazyRef;
import scala.runtime.NonLocalReturnControl;
import scala.runtime.RichChar$;
import scala.runtime.RichInt$;

/* compiled from: DistADUtil.scala */
/* loaded from: input_file:net/sansa_stack/ml/spark/anomalydetection/DistADUtil$.class */
public final class DistADUtil$ {
    public static DistADUtil$ MODULE$;
    private final Logger LOG;
    private final List<String> objList;
    private final UserDefinedFunction convertStringToDouble;

    static {
        new DistADUtil$();
    }

    public Logger LOG() {
        return this.LOG;
    }

    public List<String> objList() {
        return this.objList;
    }

    public SparkSession createSpark() {
        LazyRef lazyRef = new LazyRef();
        Logger.getLogger("org").setLevel(Level.ERROR);
        return spark$1(lazyRef);
    }

    public RDD<Triple> readData(SparkSession sparkSession, String str) {
        RDD<Triple> rdd;
        if (str.endsWith("nt")) {
            rdd = NTripleReader$.MODULE$.load(sparkSession, str, ErrorParseMode$.MODULE$.SKIP(), WarningParseMode$.MODULE$.IGNORE(), NTripleReader$.MODULE$.load$default$5(), NTripleReader$.MODULE$.load$default$6());
        } else {
            rdd = (RDD) package$.MODULE$.RDFReader(sparkSession).rdf(Lang.TURTLE).apply(str);
        }
        return rdd;
    }

    public boolean isNumeric(String str) {
        try {
            return str.contains("^") ? isAllDigits(str.substring(1, str.indexOf(94) - 1)) : false;
        } catch (Exception e) {
            return false;
        }
    }

    public boolean isAllDigits(String str) {
        BooleanRef create = BooleanRef.create(true);
        new StringOps(Predef$.MODULE$.augmentString(str)).foreach(obj -> {
            $anonfun$isAllDigits$1(create, BoxesRunTime.unboxToChar(obj));
            return BoxedUnit.UNIT;
        });
        return create.elem;
    }

    public boolean searchEdge(String str, List<String> list) {
        if (str.contains("^")) {
            return list.contains(str.substring(str.indexOf(94) + 2));
        }
        return false;
    }

    public double getNumber(String str) {
        try {
            return new StringOps(Predef$.MODULE$.augmentString(StringUtils.stripEnd(StringUtils.stripStart(str.substring(0, str.indexOf(94)), "\""), "\""))).toDouble();
        } catch (Throwable th) {
            return Double.NaN;
        }
    }

    public RDD<Triple> getOnlyLiteralObjects(RDD<Triple> rdd) {
        return rdd.filter(triple -> {
            return BoxesRunTime.boxToBoolean($anonfun$getOnlyLiteralObjects$1(triple));
        });
    }

    public RDD<Triple> triplesWithNumericLit(RDD<Triple> rdd) {
        return rdd.filter(triple -> {
            return BoxesRunTime.boxToBoolean($anonfun$triplesWithNumericLit$1(triple));
        });
    }

    public RDD<Triple> triplesWithNumericLitWithTypeIgnoreEndingWithID(RDD<Triple> rdd) {
        return triplesWithNumericLit(rdd).filter(triple -> {
            return BoxesRunTime.boxToBoolean($anonfun$triplesWithNumericLitWithTypeIgnoreEndingWithID$1(triple));
        }).filter(triple2 -> {
            return BoxesRunTime.boxToBoolean($anonfun$triplesWithNumericLitWithTypeIgnoreEndingWithID$2(triple2));
        });
    }

    public UserDefinedFunction convertStringToDouble() {
        return this.convertStringToDouble;
    }

    public RDD<Triple> filterAllTriplesWhichAtLeastHaveOneNumericLiterals(RDD<Triple> rdd, RDD<Triple> rdd2) {
        return RDD$.MODULE$.rddToPairRDDFunctions(rdd.map(triple -> {
            return new Tuple2(triple.getSubject().toString(), triple);
        }, ClassTag$.MODULE$.apply(Tuple2.class)), ClassTag$.MODULE$.apply(String.class), ClassTag$.MODULE$.apply(Triple.class), Ordering$String$.MODULE$).join(rdd2.map(triple2 -> {
            return new Tuple2(triple2.getSubject().toString(), triple2);
        }, ClassTag$.MODULE$.apply(Tuple2.class))).map(tuple2 -> {
            return (Triple) ((Tuple2) tuple2._2())._1();
        }, ClassTag$.MODULE$.apply(Triple.class));
    }

    public <A, B> Map<A, List<B>> merge(List<Map<A, B>> list) {
        return list.flatten(Predef$.MODULE$.$conforms()).groupBy(tuple2 -> {
            if (tuple2 != null) {
                return tuple2._1();
            }
            throw new MatchError(tuple2);
        }).mapValues(list2 -> {
            return (List) list2.map(tuple22 -> {
                if (tuple22 != null) {
                    return tuple22._2();
                }
                throw new MatchError(tuple22);
            }, List$.MODULE$.canBuildFrom());
        });
    }

    public void writeAnomaliesToFile(List<String> list, String str) {
        if (!str.contains("hdfs://")) {
            BufferedWriter bufferedWriter = new BufferedWriter(new FileWriter(new File(str)));
            list.foreach(str2 -> {
                $anonfun$writeAnomaliesToFile$2(bufferedWriter, str2);
                return BoxedUnit.UNIT;
            });
            bufferedWriter.close();
            return;
        }
        StringBuilder stringBuilder = new StringBuilder();
        list.foreach(str3 -> {
            return stringBuilder.append(str3).append("\n");
        });
        Path path = new Path(str);
        FileSystem fileSystem = path.getFileSystem(createSpark().sparkContext().hadoopConfiguration());
        fileSystem.delete(path, true);
        fileSystem.create(new Path(str)).write(stringBuilder.toString().getBytes());
        fileSystem.close();
    }

    public void writeToFile(String str, Dataset<Row> dataset) {
        if (!str.contains("hdfs://")) {
            dataset.coalesce(1).write().mode(SaveMode.Overwrite).options(Predef$.MODULE$.Map().apply(Predef$.MODULE$.wrapRefArray(new Tuple2[]{new Tuple2("delimiter", ","), new Tuple2("header", "true")}))).csv(str);
            return;
        }
        StringBuilder stringBuilder = new StringBuilder();
        new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[]) dataset.coalesce(1).collect())).foreach(row -> {
            return stringBuilder.append(row.mkString(",")).append("\n");
        });
        Path path = new Path(str);
        FileSystem fileSystem = path.getFileSystem(createSpark().sparkContext().hadoopConfiguration());
        fileSystem.delete(path, true);
        fileSystem.create(new Path(str)).write(stringBuilder.toString().getBytes());
        fileSystem.close();
    }

    public int detectNumberOfClusters(Dataset<Row> dataset, double d) {
        Dataset<Row> sample;
        Object obj = new Object();
        if (d != 1.0d) {
            try {
                sample = dataset.sample(d);
            } catch (NonLocalReturnControl e) {
                if (e.key() == obj) {
                    return e.value$mcI$sp();
                }
                throw e;
            }
        } else {
            sample = dataset;
        }
        Dataset<Row> transform = new FeatureExtractorModel().setMode("or").transform((Dataset<?>) sample);
        Dataset transform2 = new CountVectorizer().setInputCol("extractedFeatures").setOutputCol("features").fit(transform).transform(transform);
        IntRef create = IntRef.create(-1);
        DoubleRef create2 = DoubleRef.create(Double$.MODULE$.MinValue());
        ClusteringEvaluator clusteringEvaluator = new ClusteringEvaluator();
        RichInt$.MODULE$.to$extension0(Predef$.MODULE$.intWrapper(2), 10).foreach$mVc$sp(i -> {
            Dataset transform3 = new BisectingKMeans().setK(i).setSeed(1L).setFeaturesCol("features").fit(transform2).transform(transform2);
            long count = transform3.select("prediction", Predef$.MODULE$.wrapRefArray(new String[0])).distinct().count();
            if (count < i) {
                if (count == 1) {
                    throw new NonLocalReturnControl.mcI.sp(obj, i);
                }
                throw new NonLocalReturnControl.mcI.sp(obj, (int) count);
            }
            double evaluate = clusteringEvaluator.evaluate(transform3);
            if (evaluate > create2.elem) {
                create2.elem = evaluate;
                create.elem = i;
            }
            MODULE$.LOG().info(new StringBuilder(40).append("Silhouette Coefficient for ").append(i).append(" clusters is ").append(evaluate).toString());
        });
        return create.elem;
    }

    public Dataset<Row> iqr(Dataset<Row> dataset, boolean z, int i) {
        Dataset cache = dataset.groupBy("prediction", Predef$.MODULE$.wrapRefArray(new String[]{"p"})).agg(functions$.MODULE$.count("o").as("count"), Predef$.MODULE$.wrapRefArray(new Column[]{functions$.MODULE$.expr("approx_percentile(o,0.25)").as("q1"), functions$.MODULE$.expr("approx_percentile(o,0.75)").as("q3")})).withColumn("iqr", functions$.MODULE$.col("q3").$minus(functions$.MODULE$.col("q1"))).withColumn("upper", functions$.MODULE$.col("q3").$plus(functions$.MODULE$.lit(BoxesRunTime.boxToDouble(1.5d)).$times(functions$.MODULE$.col("iqr")))).withColumn("lower", functions$.MODULE$.col("q1").$minus(functions$.MODULE$.lit(BoxesRunTime.boxToDouble(1.5d)).$times(functions$.MODULE$.col("iqr")))).cache();
        if (z) {
            LOG().info("result of aggregation:");
            cache.show(false);
        }
        Dataset join = dataset.join(cache, Seq$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new String[]{"prediction", "p"})));
        if (z) {
            LOG().info("result of aggregation join:");
            join.show(false);
        }
        Dataset<Row> select = join.filter(functions$.MODULE$.col("count").$greater(BoxesRunTime.boxToInteger(i))).filter(functions$.MODULE$.col("o").$less(functions$.MODULE$.col("lower")).$bar$bar(functions$.MODULE$.col("o").$greater(functions$.MODULE$.col("upper")))).select("s", Predef$.MODULE$.wrapRefArray(new String[]{"p", "o"}));
        if (z) {
            LOG().info(new StringBuilder(26).append("total number of anomalies ").append(select.count()).toString());
            select.show(false);
        }
        return select;
    }

    public Dataset<Row> mad(Dataset<Row> dataset, boolean z, int i) {
        Dataset agg = dataset.groupBy("prediction", Predef$.MODULE$.wrapRefArray(new String[]{"p"})).agg(functions$.MODULE$.count("o").as("count"), Predef$.MODULE$.wrapRefArray(new Column[]{functions$.MODULE$.expr("approx_percentile(o,0.5)").as("median")}));
        if (z) {
            LOG().info("result of aggregation:");
            agg.show(false);
        }
        Dataset withColumn = dataset.join(agg, Seq$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new String[]{"prediction", "p"}))).withColumn("difference", functions$.MODULE$.col("o").$minus(functions$.MODULE$.col("median")));
        if (z) {
            LOG().info("result of aggregation join:");
            withColumn.show(false);
        }
        Dataset agg2 = withColumn.groupBy("prediction", Predef$.MODULE$.wrapRefArray(new String[]{"p"})).agg(functions$.MODULE$.expr("approx_percentile(difference,0.5)").as("MAD"), Predef$.MODULE$.wrapRefArray(new Column[0]));
        if (z) {
            agg2.show(false);
        }
        Dataset withColumn2 = withColumn.join(agg2, Seq$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new String[]{"prediction", "p"}))).withColumn("upper", functions$.MODULE$.col("median").$plus(functions$.MODULE$.lit(BoxesRunTime.boxToDouble(2.5d)).$times(functions$.MODULE$.col("MAD")))).withColumn("lower", functions$.MODULE$.col("median").$minus(functions$.MODULE$.lit(BoxesRunTime.boxToDouble(2.5d)).$times(functions$.MODULE$.col("MAD"))));
        if (z) {
            withColumn2.show(false);
        }
        Dataset<Row> select = withColumn2.filter(functions$.MODULE$.col("count").$greater(BoxesRunTime.boxToInteger(i))).filter(functions$.MODULE$.col("o").$less(functions$.MODULE$.col("lower")).$bar$bar(functions$.MODULE$.col("o").$greater(functions$.MODULE$.col("upper")))).select("s", Predef$.MODULE$.wrapRefArray(new String[]{"p", "o"}));
        if (z) {
            LOG().info(new StringBuilder(26).append("total number of anomalies ").append(select.count()).toString());
            select.show(false);
        }
        return select;
    }

    public Dataset<Row> zscore(Dataset<Row> dataset, boolean z, int i) {
        Dataset agg = dataset.groupBy("prediction", Predef$.MODULE$.wrapRefArray(new String[]{"p"})).agg(functions$.MODULE$.count("o").as("count"), Predef$.MODULE$.wrapRefArray(new Column[]{functions$.MODULE$.mean("o").as("mean"), functions$.MODULE$.stddev("o").as("std")}));
        if (z) {
            LOG().info("result of aggregation:");
            agg.show(false);
        }
        Dataset join = dataset.join(agg, Seq$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new String[]{"prediction", "p"})));
        if (z) {
            LOG().info("result of aggregation join:");
            join.show(false);
        }
        Dataset<Row> select = join.withColumn("zscore", functions$.MODULE$.col("o").$minus(functions$.MODULE$.col("mean").$div(functions$.MODULE$.col("std")))).filter(functions$.MODULE$.col("count").$greater(BoxesRunTime.boxToInteger(i))).filter(functions$.MODULE$.abs(functions$.MODULE$.col("zscore")).$greater(BoxesRunTime.boxToInteger(2))).select("s", Predef$.MODULE$.wrapRefArray(new String[]{"p", "o"}));
        if (z) {
            LOG().info(new StringBuilder(26).append("total number of anomalies ").append(select.count()).toString());
            select.show(false);
        }
        return select;
    }

    public Dataset<Row> createDF(RDD<Triple> rdd) {
        return net.sansa_stack.rdf.spark.model.package$.MODULE$.TripleOperations(rdd).toDF();
    }

    public Dataset<Row> createDFWithConversion(RDD<Triple> rdd) {
        return net.sansa_stack.rdf.spark.model.package$.MODULE$.TripleOperations(rdd).toDF().withColumn("o", convertStringToDouble().apply(Predef$.MODULE$.wrapRefArray(new Column[]{functions$.MODULE$.col("o")})));
    }

    public Dataset<Row> calculateBiSectingKmeanClustering(RDD<Triple> rdd, int i) {
        Dataset<Row> transform = new FeatureExtractorModel().setMode("or").transform(rdd);
        Dataset transform2 = new CountVectorizer().setInputCol("extractedFeatures").setOutputCol("features").fit(transform).transform(transform);
        return new BisectingKMeans().setK(i).setSeed(1L).setFeaturesCol("features").fit(transform2).transform(transform2).withColumnRenamed("uri", "s");
    }

    public Dataset<Row> calculateBiSectingKmeanClustering(Dataset<Row> dataset, int i) {
        Dataset<Row> transform = new FeatureExtractorModel().setMode("or").transform((Dataset<?>) dataset);
        Dataset transform2 = new CountVectorizer().setInputCol("extractedFeatures").setOutputCol("features").fit(transform).transform(transform);
        return new BisectingKMeans().setK(i).setSeed(1L).setFeaturesCol("features").fit(transform2).transform(transform2).withColumnRenamed("uri", "s");
    }

    public String getLocalName(Node node) {
        return node.toString().substring(node.toString().lastIndexOf("/") + 1);
    }

    public boolean search(double d, double[] dArr) {
        return new ArrayOps.ofDouble(Predef$.MODULE$.doubleArrayOps(dArr)).contains(BoxesRunTime.boxToDouble(d));
    }

    public Dataset<Row> calculateMinHashLSHClustering(RDD<Triple> rdd, RDD<Triple> rdd2, DistADConfig distADConfig) {
        Dataset<?> calculatePairwiseSimilarity;
        RDD<Tuple2<String, Set<Tuple3<String, String, Object>>>> propClustering = propClustering(rdd);
        scala.collection.immutable.Set set = new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[]) rdd.map(triple -> {
            return triple.getSubject();
        }, ClassTag$.MODULE$.apply(Node.class)).cache().collect())).toSet();
        RDD<Triple> filter = rdd2.filter(triple2 -> {
            return BoxesRunTime.boxToBoolean($anonfun$calculateMinHashLSHClustering$2(set, triple2));
        });
        String clusteringType = distADConfig.clusteringType();
        String PARTIAL = distADConfig.PARTIAL();
        if (PARTIAL != null ? !PARTIAL.equals(clusteringType) : clusteringType != null) {
            String FULL = distADConfig.FULL();
            if (FULL != null ? !FULL.equals(clusteringType) : clusteringType != null) {
                throw new MatchError(clusteringType);
            }
            calculatePairwiseSimilarity = calculatePairwiseSimilarity(rdd2, distADConfig.pairWiseDistanceThreshold());
        } else {
            calculatePairwiseSimilarity = calculatePairwiseSimilarity(filter, distADConfig.pairWiseDistanceThreshold());
        }
        RDD map = RDD$.MODULE$.rddToPairRDDFunctions(calculatePairwiseSimilarity.filter(functions$.MODULE$.col("datasetA.uri").isNotNull()).filter(functions$.MODULE$.col("datasetB.uri").isNotNull()).filter(functions$.MODULE$.col("datasetA.uri").$eq$bang$eq(functions$.MODULE$.col("datasetB.uri"))).select(Predef$.MODULE$.wrapRefArray(new Column[]{functions$.MODULE$.col("datasetA.uri").alias("id1"), functions$.MODULE$.col("datasetB.uri").alias("id2")})).persist(StorageLevel$.MODULE$.MEMORY_AND_DISK()).rdd().map(row -> {
            return new Tuple2(row.getString(0), row.getString(1));
        }, ClassTag$.MODULE$.apply(Tuple2.class)), ClassTag$.MODULE$.apply(String.class), ClassTag$.MODULE$.apply(String.class), Ordering$String$.MODULE$).aggregateByKey(Set$.MODULE$.empty(), (set2, str) -> {
            return set2.$plus$eq(str);
        }, (set3, set4) -> {
            return set3.$plus$plus$eq(set4);
        }, ClassTag$.MODULE$.apply(Set.class)).map(tuple2 -> {
            return new Tuple2(tuple2._1(), ((SetLike) tuple2._2()).$plus$eq(tuple2._1()).toSet());
        }, ClassTag$.MODULE$.apply(Tuple2.class));
        HashPartitioner hashPartitioner = new HashPartitioner(500);
        RDD persist = RDD$.MODULE$.rddToPairRDDFunctions(propClustering, ClassTag$.MODULE$.apply(String.class), ClassTag$.MODULE$.apply(Set.class), Ordering$String$.MODULE$).partitionBy(hashPartitioner).persist(StorageLevel$.MODULE$.MEMORY_AND_DISK());
        RDD persist2 = RDD$.MODULE$.rddToPairRDDFunctions(map, ClassTag$.MODULE$.apply(String.class), ClassTag$.MODULE$.apply(scala.collection.immutable.Set.class), Ordering$String$.MODULE$).partitionBy(hashPartitioner).persist(StorageLevel$.MODULE$.MEMORY_AND_DISK());
        RDD map2 = RDD$.MODULE$.rddToPairRDDFunctions(persist2, ClassTag$.MODULE$.apply(String.class), ClassTag$.MODULE$.apply(scala.collection.immutable.Set.class), Ordering$String$.MODULE$).join(persist).map(tuple22 -> {
            Tuple2 tuple22;
            if (tuple22 == null || (tuple22 = (Tuple2) tuple22._2()) == null) {
                throw new MatchError(tuple22);
            }
            return new Tuple2(((scala.collection.immutable.Set) tuple22._1()).toSet(), (Set) tuple22._2());
        }, ClassTag$.MODULE$.apply(Tuple2.class));
        HashSet empty = HashSet$.MODULE$.empty();
        Function2 function2 = (hashSet, set5) -> {
            return hashSet.$plus$eq(set5);
        };
        Function2 function22 = (hashSet2, hashSet3) -> {
            return hashSet2.$plus$plus$eq(hashSet3);
        };
        ClassTag apply = ClassTag$.MODULE$.apply(scala.collection.immutable.Set.class);
        ClassTag apply2 = ClassTag$.MODULE$.apply(Set.class);
        RDD$.MODULE$.rddToPairRDDFunctions$default$4(map2);
        RDD map3 = RDD$.MODULE$.rddToPairRDDFunctions(map2, apply, apply2, (Ordering) null).aggregateByKey(empty, function2, function22, ClassTag$.MODULE$.apply(HashSet.class)).map(tuple23 -> {
            if (tuple23 == null) {
                throw new MatchError(tuple23);
            }
            HashSet hashSet4 = (HashSet) tuple23._2();
            return new Tuple2(((TraversableOnce) hashSet4.flatMap(set6 -> {
                return (Set) set6.map(tuple3 -> {
                    return (String) tuple3._2();
                }, Set$.MODULE$.canBuildFrom());
            }, HashSet$.MODULE$.canBuildFrom())).toSet(), hashSet4);
        }, ClassTag$.MODULE$.apply(Tuple2.class)).flatMap(tuple24 -> {
            if (tuple24 == null) {
                throw new MatchError(tuple24);
            }
            scala.collection.immutable.Set set6 = (scala.collection.immutable.Set) tuple24._1();
            HashSet hashSet4 = (HashSet) tuple24._2();
            return (scala.collection.immutable.Set) set6.map(str2 -> {
                return new Tuple2(str2, ((TraversableOnce) hashSet4.flatMap(set7 -> {
                    return set7;
                }, HashSet$.MODULE$.canBuildFrom())).toSet());
            }, scala.collection.immutable.Set$.MODULE$.canBuildFrom());
        }, ClassTag$.MODULE$.apply(Tuple2.class)).map(tuple25 -> {
            if (tuple25 == null) {
                throw new MatchError(tuple25);
            }
            String str2 = (String) tuple25._1();
            return (scala.collection.immutable.Set) ((scala.collection.immutable.Set) tuple25._2()).filter(tuple3 -> {
                return BoxesRunTime.boxToBoolean($anonfun$calculateMinHashLSHClustering$17(str2, tuple3));
            });
        }, ClassTag$.MODULE$.apply(scala.collection.immutable.Set.class));
        persist.unpersist(persist.unpersist$default$1());
        persist2.unpersist(persist2.unpersist$default$1());
        RDD map4 = map3.repartition(1000, map3.repartition$default$2(1000)).persist(StorageLevel$.MODULE$.MEMORY_AND_DISK()).map(set6 -> {
            return set6.toSeq();
        }, ClassTag$.MODULE$.apply(Seq.class));
        SparkSession createSpark = createSpark();
        RDD flatMap = map4.zipWithIndex().map(tuple26 -> {
            return (Seq) ((TraversableLike) tuple26._1()).map(tuple3 -> {
                return new Tuple4(tuple3._1(), tuple3._2(), BoxesRunTime.boxToDouble(new StringOps(Predef$.MODULE$.augmentString(tuple3._3().toString().replace("\"", ""))).toDouble()), BoxesRunTime.boxToLong(tuple26._2$mcJ$sp()));
            }, Seq$.MODULE$.canBuildFrom());
        }, ClassTag$.MODULE$.apply(Seq.class)).flatMap(seq -> {
            return (Seq) Predef$.MODULE$.identity(seq);
        }, ClassTag$.MODULE$.apply(Tuple4.class));
        return createSpark.implicits().rddToDatasetHolder(flatMap, createSpark.implicits().newProductEncoder(scala.reflect.runtime.package$.MODULE$.universe().TypeTag().apply(scala.reflect.runtime.package$.MODULE$.universe().runtimeMirror(getClass().getClassLoader()), new TypeCreator() { // from class: net.sansa_stack.ml.spark.anomalydetection.DistADUtil$$typecreator5$1
            public <U extends Universe> Types.TypeApi apply(Mirror<U> mirror) {
                Universe universe = mirror.universe();
                return universe.internal().reificationSupport().TypeRef(universe.internal().reificationSupport().ThisType(mirror.staticPackage("scala").asModule().moduleClass()), mirror.staticClass("scala.Tuple4"), new $colon.colon(universe.internal().reificationSupport().TypeRef(universe.internal().reificationSupport().SingleType(mirror.staticPackage("scala").asModule().moduleClass().asType().toTypeConstructor(), mirror.staticModule("scala.Predef")), universe.internal().reificationSupport().selectType(mirror.staticModule("scala.Predef").asModule().moduleClass(), "String"), Nil$.MODULE$), new $colon.colon(universe.internal().reificationSupport().TypeRef(universe.internal().reificationSupport().SingleType(mirror.staticPackage("scala").asModule().moduleClass().asType().toTypeConstructor(), mirror.staticModule("scala.Predef")), universe.internal().reificationSupport().selectType(mirror.staticModule("scala.Predef").asModule().moduleClass(), "String"), Nil$.MODULE$), new $colon.colon(mirror.staticClass("scala.Double").asType().toTypeConstructor(), new $colon.colon(mirror.staticClass("scala.Long").asType().toTypeConstructor(), Nil$.MODULE$)))));
            }
        }))).toDF(Predef$.MODULE$.wrapRefArray(new String[]{"s", "p", "o", "prediction"}));
    }

    private Dataset<?> calculatePairwiseSimilarity(RDD<Triple> rdd, double d) {
        Dataset<Row> transform = new FeatureExtractorModel().setMode("or").transform(rdd);
        Dataset cache = new CountVectorizer().setInputCol("extractedFeatures").setOutputCol("vectorizedFeatures").fit(transform).transform(transform).filter(functions$.MODULE$.udf(vector -> {
            return BoxesRunTime.boxToBoolean($anonfun$calculatePairwiseSimilarity$1(vector));
        }, scala.reflect.runtime.package$.MODULE$.universe().TypeTag().Boolean(), scala.reflect.runtime.package$.MODULE$.universe().TypeTag().apply(scala.reflect.runtime.package$.MODULE$.universe().runtimeMirror(getClass().getClassLoader()), new TypeCreator() { // from class: net.sansa_stack.ml.spark.anomalydetection.DistADUtil$$typecreator1$2
            public <U extends Universe> Types.TypeApi apply(Mirror<U> mirror) {
                mirror.universe();
                return mirror.staticClass("org.apache.spark.ml.linalg.Vector").asType().toTypeConstructor();
            }
        })).apply(Predef$.MODULE$.wrapRefArray(new Column[]{functions$.MODULE$.col("vectorizedFeatures")}))).select("uri", Predef$.MODULE$.wrapRefArray(new String[]{"vectorizedFeatures"})).cache();
        cache.collect();
        return new MinHashLSH().setNumHashTables(3).setInputCol("vectorizedFeatures").setOutputCol("hashValues").fit(cache).approxSimilarityJoin(cache, cache, d, "distance");
    }

    public RDD<Tuple2<String, Set<Tuple3<String, String, Object>>>> propClustering(RDD<Triple> rdd) {
        RDD map = rdd.map(triple -> {
            return new Tuple2(triple.getSubject().toString(), new Tuple3(triple.getSubject().toString(), triple.getPredicate().toString(), BoxesRunTime.boxToDouble(MODULE$.getNumber(triple.getObject().toString()))));
        }, ClassTag$.MODULE$.apply(Tuple2.class));
        return RDD$.MODULE$.rddToPairRDDFunctions(map, ClassTag$.MODULE$.apply(String.class), ClassTag$.MODULE$.apply(Tuple3.class), Ordering$String$.MODULE$).aggregateByKey(Set$.MODULE$.empty(), (set, tuple3) -> {
            return set.$plus$eq(tuple3);
        }, (set2, set3) -> {
            return set2.$plus$plus$eq(set3);
        }, ClassTag$.MODULE$.apply(Set.class));
    }

    public RDD<Tuple2<String, String>> propWithSubject(RDD<Triple> rdd) {
        return rdd.map(triple -> {
            return new Tuple2(MODULE$.getLocalName(triple.getSubject()), MODULE$.getLocalName(triple.getPredicate()));
        }, ClassTag$.MODULE$.apply(Tuple2.class));
    }

    private static final /* synthetic */ SparkSession spark$lzycompute$1(LazyRef lazyRef) {
        SparkSession sparkSession;
        synchronized (lazyRef) {
            sparkSession = lazyRef.initialized() ? (SparkSession) lazyRef.value() : (SparkSession) lazyRef.initialize(SparkSession$.MODULE$.builder().appName("Anomaly Detection").config("spark.serializer", "org.apache.spark.serializer.KryoSerializer").config("spark.kryo.registrator", String.join(", ", "net.sansa_stack.rdf.spark.io.JenaKryoRegistrator", "net.sansa_stack.query.spark.sparqlify.KryoRegistratorSparqlify")).config("spark.sql.crossJoin.enabled", true).config("spark.sql.shuffle.partitions", 2000L).config("spark.kryoserializer.buffer.max", 2047L).config("spark.sql.pivotMaxValues", 100000L).getOrCreate());
        }
        return sparkSession;
    }

    private static final SparkSession spark$1(LazyRef lazyRef) {
        return lazyRef.initialized() ? (SparkSession) lazyRef.value() : spark$lzycompute$1(lazyRef);
    }

    public static final /* synthetic */ void $anonfun$isAllDigits$1(BooleanRef booleanRef, char c) {
        if (RichChar$.MODULE$.isDigit$extension(Predef$.MODULE$.charWrapper(c))) {
            return;
        }
        booleanRef.elem = false;
    }

    public static final /* synthetic */ boolean $anonfun$getOnlyLiteralObjects$1(Triple triple) {
        return triple.getObject().isLiteral();
    }

    public static final /* synthetic */ boolean $anonfun$triplesWithNumericLit$1(Triple triple) {
        return MODULE$.isNumeric(triple.getObject().toString());
    }

    public static final /* synthetic */ boolean $anonfun$triplesWithNumericLitWithTypeIgnoreEndingWithID$1(Triple triple) {
        return MODULE$.searchEdge(triple.getObject().toString(), MODULE$.objList());
    }

    public static final /* synthetic */ boolean $anonfun$triplesWithNumericLitWithTypeIgnoreEndingWithID$2(Triple triple) {
        return !triple.getPredicate().toString().toLowerCase().endsWith("id");
    }

    public static final /* synthetic */ double $anonfun$convertStringToDouble$1(String str) {
        return new StringOps(Predef$.MODULE$.augmentString(BoxesRunTime.boxToDouble(MODULE$.getNumber(str)).toString())).toDouble();
    }

    public static final /* synthetic */ void $anonfun$writeAnomaliesToFile$2(BufferedWriter bufferedWriter, String str) {
        bufferedWriter.write(new StringBuilder(1).append(str).append("\n").toString());
    }

    public static final /* synthetic */ boolean $anonfun$calculateMinHashLSHClustering$2(scala.collection.immutable.Set set, Triple triple) {
        return set.contains(triple.getSubject());
    }

    public static final /* synthetic */ boolean $anonfun$calculateMinHashLSHClustering$17(String str, Tuple3 tuple3) {
        return ((String) tuple3._2()).equals(str);
    }

    public static final /* synthetic */ boolean $anonfun$calculatePairwiseSimilarity$1(Vector vector) {
        return vector.numNonzeros() > 0;
    }

    private DistADUtil$() {
        MODULE$ = this;
        this.LOG = Logger.getLogger(getClass());
        this.objList = List$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new String[]{"http://www.w3.org/2001/XMLSchema#decimal", "http://www.w3.org/2001/XMLSchema#integer", "http://www.w3.org/2001/XMLSchema#double", "http://www.w3.org/2001/XMLSchema#float", "http://www.w3.org/2001/XMLSchema#int", "http://www.w3.org/2001/XMLSchema#long", "http://www.w3.org/2001/XMLSchema#unsignedInt", "http://www.w3.org/2001/XMLSchema#unsignedLong", "http://www.w3.org/2001/XMLSchema#positiveInteger", "http://www.w3.org/2001/XMLSchema#nonNegativeInteger", "http://www.w3.org/2001/XMLSchema#negativeInteger", "http://www.w3.org/2001/XMLSchema#nonPositiveInteger"}));
        this.convertStringToDouble = functions$.MODULE$.udf(str -> {
            return BoxesRunTime.boxToDouble($anonfun$convertStringToDouble$1(str));
        }, scala.reflect.runtime.package$.MODULE$.universe().TypeTag().Double(), scala.reflect.runtime.package$.MODULE$.universe().TypeTag().apply(scala.reflect.runtime.package$.MODULE$.universe().runtimeMirror(getClass().getClassLoader()), new TypeCreator() { // from class: net.sansa_stack.ml.spark.anomalydetection.DistADUtil$$typecreator1$1
            public <U extends Universe> Types.TypeApi apply(Mirror<U> mirror) {
                Universe universe = mirror.universe();
                return universe.internal().reificationSupport().TypeRef(universe.internal().reificationSupport().SingleType(mirror.staticPackage("scala").asModule().moduleClass().asType().toTypeConstructor(), mirror.staticModule("scala.Predef")), universe.internal().reificationSupport().selectType(mirror.staticModule("scala.Predef").asModule().moduleClass(), "String"), Nil$.MODULE$);
            }
        }));
    }
}
