package com.linkedin.feathr.offline.generation;

import com.databricks.spark.avro.SchemaConverters$;
import com.linkedin.feathr.common.FeatureInfo;
import com.linkedin.feathr.common.FeatureValue;
import com.linkedin.feathr.common.Header;
import com.linkedin.feathr.common.TaggedFeatureName;
import com.linkedin.feathr.common.configObj.generation.OutputProcessorConfig;
import com.linkedin.feathr.common.package$;
import com.linkedin.feathr.offline.source.dataloader.DataLoaderHandler;
import com.linkedin.feathr.offline.util.FeatureGenConstants$;
import com.linkedin.feathr.offline.util.HdfsUtils$;
import org.apache.avro.Schema;
import org.apache.avro.SchemaBuilder;
import org.apache.spark.rdd.RDD;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.types.StructType;
import scala.MatchError;
import scala.Option;
import scala.Predef$;
import scala.Tuple2;
import scala.Tuple3;
import scala.collection.Seq;
import scala.collection.SeqLike;
import scala.collection.TraversableOnce;
import scala.collection.immutable.$colon;
import scala.collection.immutable.Iterable$;
import scala.collection.immutable.List;
import scala.collection.immutable.Map;
import scala.collection.immutable.Nil$;
import scala.math.Ordering$String$;
import scala.reflect.ClassTag$;
import scala.reflect.api.Mirror;
import scala.reflect.api.TypeCreator;
import scala.reflect.api.Types;
import scala.reflect.api.Universe;
import scala.runtime.BoxesRunTime;

/* compiled from: FeatureDataHDFSProcessUtils.scala */
/* loaded from: input_file:com/linkedin/feathr/offline/generation/FeatureDataHDFSProcessUtils$.class */
public final class FeatureDataHDFSProcessUtils$ {
    public static FeatureDataHDFSProcessUtils$ MODULE$;
    private final String useFloatInNTV;
    private final String valueSchemaFieldZName;
    private final String keySchemaFieldName;
    private final String featureNameField;

    static {
        new FeatureDataHDFSProcessUtils$();
    }

    public Tuple2<Dataset<Row>, Header> processFeatureDataHDFS(SparkSession sparkSession, Map<TaggedFeatureName, Tuple2<Dataset<Row>, Header>> map, String str, OutputProcessorConfig outputProcessorConfig, boolean z, Option<String> option, Option<String> option2, List<DataLoaderHandler> list) {
        Predef$.MODULE$.assert(((SeqLike) ((TraversableOnce) map.map(tuple2 -> {
            return BoxesRunTime.boxToInteger($anonfun$processFeatureDataHDFS$1(tuple2));
        }, Iterable$.MODULE$.canBuildFrom())).toSeq().distinct()).size() == 1);
        Predef$.MODULE$.assert(((SeqLike) ((TraversableOnce) map.map(tuple22 -> {
            return (Dataset) ((Tuple2) tuple22._2())._1();
        }, Iterable$.MODULE$.canBuildFrom())).toSeq().distinct()).size() == 1);
        Option<Number> numberOpt = package$.MODULE$.RichConfig(outputProcessorConfig.getParams()).getNumberOpt("num-parts");
        boolean booleanWithDefault = package$.MODULE$.RichConfig(outputProcessorConfig.getParams()).getBooleanWithDefault(FeatureGenConstants$.MODULE$.SAVE_SCHEMA_META(), false);
        Map<TaggedFeatureName, Header> mapValues = map.mapValues(tuple23 -> {
            return (Header) tuple23._2();
        });
        Tuple2 tuple24 = (Tuple2) ((Tuple2) map.head())._2();
        if (tuple24 == null) {
            throw new MatchError(tuple24);
        }
        Tuple2 tuple25 = new Tuple2((Dataset) tuple24._1(), (Header) tuple24._2());
        Dataset<Row> dataset = (Dataset) tuple25._1();
        Header header = (Header) tuple25._2();
        return z ? new Tuple2<>(dataset, header) : RawDataWriterUtils$.MODULE$.writeFdsDataToDisk(sparkSession, mapValues, str, numberOpt, option, booleanWithDefault, dataset, header, list);
    }

    public boolean processFeatureDataHDFS$default$5() {
        return false;
    }

    public Schema convertToAvroSchema(StructType structType, String str, String str2) {
        return (Schema) SchemaConverters$.MODULE$.convertStructToAvro(structType, SchemaBuilder.record(str).namespace(str2), str2);
    }

    public String convertToAvroSchema$default$2() {
        return "topLevelRecord";
    }

    public String convertToAvroSchema$default$3() {
        return FeatureValue.EMPTY_TERM;
    }

    public void writeSchemaToDisk(SparkSession sparkSession, Schema schema, String str) {
        RDD repartition = sparkSession.sparkContext().parallelize(new $colon.colon(schema.toString(false), Nil$.MODULE$), sparkSession.sparkContext().parallelize$default$2(), ClassTag$.MODULE$.apply(String.class)).repartition(1, Ordering$String$.MODULE$);
        HdfsUtils$.MODULE$.deletePath(str, true, HdfsUtils$.MODULE$.deletePath$default$3());
        repartition.saveAsTextFile(str);
    }

    public void writeHeaderDataToDF(SparkSession sparkSession, Map<TaggedFeatureName, Header> map, String str) {
        String sb = new StringBuilder(12).append("features: [").append(((TraversableOnce) map.map(tuple2 -> {
            if (tuple2 != null) {
                return ((TaggedFeatureName) tuple2._1()).getFeatureName();
            }
            throw new MatchError(tuple2);
        }, Iterable$.MODULE$.canBuildFrom())).mkString(",")).append("]").toString();
        Seq seq = ((TraversableOnce) map.map(tuple22 -> {
            if (tuple22 == null) {
                throw new MatchError(tuple22);
            }
            TaggedFeatureName taggedFeatureName = (TaggedFeatureName) tuple22._1();
            return new Tuple3(taggedFeatureName.toString(), ((FeatureInfo) ((Header) tuple22._2()).featureInfoMap().apply(taggedFeatureName)).toString(), sb);
        }, Iterable$.MODULE$.canBuildFrom())).toSeq();
        sparkSession.implicits().localSeqToDatasetHolder(seq, sparkSession.implicits().newProductEncoder(scala.reflect.runtime.package$.MODULE$.universe().TypeTag().apply(scala.reflect.runtime.package$.MODULE$.universe().runtimeMirror(getClass().getClassLoader()), new TypeCreator() { // from class: com.linkedin.feathr.offline.generation.FeatureDataHDFSProcessUtils$$typecreator5$1
            public <U extends Universe> Types.TypeApi apply(Mirror<U> mirror) {
                Universe universe = mirror.universe();
                return universe.internal().reificationSupport().TypeRef(universe.internal().reificationSupport().ThisType(mirror.staticPackage("scala").asModule().moduleClass()), mirror.staticClass("scala.Tuple3"), new $colon.colon(mirror.staticClass("java.lang.String").asType().toTypeConstructor(), new $colon.colon(universe.internal().reificationSupport().TypeRef(universe.internal().reificationSupport().SingleType(mirror.staticPackage("scala").asModule().moduleClass().asType().toTypeConstructor(), mirror.staticModule("scala.Predef")), universe.internal().reificationSupport().selectType(mirror.staticModule("scala.Predef").asModule().moduleClass(), "String"), Nil$.MODULE$), new $colon.colon(mirror.staticClass("java.lang.String").asType().toTypeConstructor(), Nil$.MODULE$))));
            }
        }))).toDF(Predef$.MODULE$.wrapRefArray(new String[]{"taggedFeatureName", "header", "features"})).write().mode("overwrite").format("com.databricks.spark.csv").option("header", "true").save(str);
    }

    public String useFloatInNTV() {
        return this.useFloatInNTV;
    }

    public String valueSchemaFieldZName() {
        return this.valueSchemaFieldZName;
    }

    public String keySchemaFieldName() {
        return this.keySchemaFieldName;
    }

    public String featureNameField() {
        return this.featureNameField;
    }

    public final String metaHeaderColumnName() {
        return "taggedFeatureName";
    }

    public final String NUM_PARTS() {
        return "num-parts";
    }

    public static final /* synthetic */ int $anonfun$processFeatureDataHDFS$1(Tuple2 tuple2) {
        return ((TaggedFeatureName) tuple2._1()).getKeyTag().size();
    }

    private FeatureDataHDFSProcessUtils$() {
        MODULE$ = this;
        this.useFloatInNTV = "useFloatInNTV";
        this.valueSchemaFieldZName = "featureList";
        this.keySchemaFieldName = "key";
        this.featureNameField = "featureName";
    }
}
