package co.cask.cdap.examples.wikipedia;

import co.cask.cdap.api.spark.SparkExecutionContext;
import org.apache.commons.lang.StringUtils;
import org.apache.spark.SparkContext;
import org.apache.spark.mllib.linalg.Vector;
import org.apache.spark.rdd.RDD;
import org.apache.spark.rdd.RDD$;
import org.apache.spark.util.LongAccumulator;
import scala.Array$;
import scala.MatchError;
import scala.Predef$;
import scala.Tuple2;
import scala.Tuple3;
import scala.collection.immutable.Map;
import scala.math.Numeric$LongIsIntegral$;
import scala.math.Ordering;
import scala.math.Ordering$Long$;
import scala.math.Ordering$String$;
import scala.reflect.ClassTag;
import scala.reflect.ClassTag$;
import scala.runtime.BoxesRunTime;
import scala.runtime.ScalaRunTime$;

/* compiled from: ClusteringUtils.scala */
/* loaded from: input_file:co/cask/cdap/examples/wikipedia/ClusteringUtils$.class */
public final class ClusteringUtils$ {
    public static final ClusteringUtils$ MODULE$ = null;

    static {
        new ClusteringUtils$();
    }

    public Tuple3<RDD<Tuple2<Object, Vector>>, String[], Object> preProcess(RDD<Tuple2<byte[], byte[]>> rdd, Map<String, String> map) {
        String str = (String) map.getOrElse("stopwords.file", new ClusteringUtils$$anonfun$4());
        int unboxToInt = BoxesRunTime.unboxToInt(map.get("vocab.size").map(new ClusteringUtils$$anonfun$5()).getOrElse(new ClusteringUtils$$anonfun$1()));
        SimpleTokenizer simpleTokenizer = new SimpleTokenizer(rdd.sparkContext(), str);
        ClassTag apply = ClassTag$.MODULE$.apply(ScalaRunTime$.MODULE$.arrayClass(Byte.TYPE));
        ClassTag apply2 = ClassTag$.MODULE$.apply(ScalaRunTime$.MODULE$.arrayClass(Byte.TYPE));
        RDD$.MODULE$.rddToPairRDDFunctions$default$4(rdd);
        RDD map2 = RDD$.MODULE$.rddToPairRDDFunctions(rdd, apply, apply2, (Ordering) null).values().zipWithIndex().map(new ClusteringUtils$$anonfun$6(simpleTokenizer), ClassTag$.MODULE$.apply(Tuple2.class));
        map2.cache();
        RDD reduceByKey = RDD$.MODULE$.rddToPairRDDFunctions(map2.flatMap(new ClusteringUtils$$anonfun$7(), ClassTag$.MODULE$.apply(Tuple2.class)), ClassTag$.MODULE$.apply(String.class), ClassTag$.MODULE$.Long(), Ordering$String$.MODULE$).reduceByKey(new ClusteringUtils$$anonfun$2());
        reduceByKey.cache();
        Tuple2[] tuple2Arr = (unboxToInt == -1 || reduceByKey.count() <= ((long) unboxToInt)) ? (Tuple2[]) Predef$.MODULE$.refArrayOps((Object[]) reduceByKey.collect()).sortBy(new ClusteringUtils$$anonfun$8(), Ordering$Long$.MODULE$) : (Tuple2[]) reduceByKey.sortBy(new ClusteringUtils$$anonfun$9(), false, reduceByKey.sortBy$default$3(), Ordering$Long$.MODULE$, ClassTag$.MODULE$.Long()).take(unboxToInt);
        Tuple2 tuple2 = new Tuple2(Predef$.MODULE$.refArrayOps((Object[]) Predef$.MODULE$.refArrayOps((Object[]) Predef$.MODULE$.refArrayOps(tuple2Arr).map(new ClusteringUtils$$anonfun$10(), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(String.class)))).zipWithIndex(Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(Tuple2.class)))).toMap(Predef$.MODULE$.$conforms()), Predef$.MODULE$.longArrayOps((long[]) Predef$.MODULE$.refArrayOps(tuple2Arr).map(new ClusteringUtils$$anonfun$11(), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.Long()))).sum(Numeric$LongIsIntegral$.MODULE$));
        if (tuple2 != null) {
            Map map3 = (Map) tuple2._1();
            long _2$mcJ$sp = tuple2._2$mcJ$sp();
            if (map3 != null) {
                Tuple2 tuple22 = new Tuple2(map3, BoxesRunTime.boxToLong(_2$mcJ$sp));
                Map map4 = (Map) tuple22._1();
                long _2$mcJ$sp2 = tuple22._2$mcJ$sp();
                RDD map5 = map2.map(new ClusteringUtils$$anonfun$12(map4), ClassTag$.MODULE$.apply(Tuple2.class));
                String[] strArr = new String[map4.size()];
                map4.foreach(new ClusteringUtils$$anonfun$preProcess$1(strArr));
                return new Tuple3<>(map5, strArr, BoxesRunTime.boxToLong(_2$mcJ$sp2));
            }
        }
        throw new MatchError(tuple2);
    }

    public void storeResults(SparkContext sparkContext, SparkExecutionContext sparkExecutionContext, Tuple2<String, Object>[][] tuple2Arr, String str, String str2) {
        LongAccumulator longAccumulator = sparkContext.longAccumulator("num.records");
        HighestAccumulator highestAccumulator = new HighestAccumulator(new Term(StringUtils.EMPTY, 0.0d));
        sparkContext.register(highestAccumulator, "highest.score");
        sparkExecutionContext.execute(new ClusteringUtils$$anon$1(tuple2Arr, str, str2, longAccumulator, highestAccumulator));
        sparkExecutionContext.getWorkflowToken().foreach(new ClusteringUtils$$anonfun$storeResults$1(longAccumulator, highestAccumulator));
    }

    private ClusteringUtils$() {
        MODULE$ = this;
    }
}
