package org.apache.hudi.utilities.sources.helpers;

import com.fasterxml.jackson.databind.ObjectMapper;
import java.io.IOException;
import java.lang.invoke.SerializedLambda;
import java.net.URLDecoder;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import org.apache.avro.Schema;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hudi.AvroConversionUtils;
import org.apache.hudi.common.config.HoodieStorageConfig;
import org.apache.hudi.common.config.SerializableConfiguration;
import org.apache.hudi.common.config.TypedProperties;
import org.apache.hudi.common.fs.FSUtils;
import org.apache.hudi.common.util.CollectionUtils;
import org.apache.hudi.common.util.ConfigUtils;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.StringUtils;
import org.apache.hudi.exception.HoodieException;
import org.apache.hudi.exception.HoodieIOException;
import org.apache.hudi.utilities.config.CloudSourceConfig;
import org.apache.hudi.utilities.config.S3EventsHoodieIncrSourceConfig;
import org.apache.hudi.utilities.schema.SchemaProvider;
import org.apache.hudi.utilities.sources.InputBatch;
import org.apache.spark.api.java.function.MapPartitionsFunction;
import org.apache.spark.sql.DataFrameReader;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.functions;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/apache/hudi/utilities/sources/helpers/CloudObjectsSelectorCommon.class */
public class CloudObjectsSelectorCommon {
    private static final Logger LOG = LoggerFactory.getLogger(CloudObjectsSelectorCommon.class);

    public static MapPartitionsFunction<Row, CloudObjectMetadata> getCloudObjectMetadataPerPartition(String str, SerializableConfiguration serializableConfiguration, boolean z) {
        return it -> {
            ArrayList arrayList = new ArrayList();
            it.forEachRemaining(row -> {
                getUrlForFile(row, str, serializableConfiguration, z).ifPresent(str2 -> {
                    long longValue;
                    LOG.info("Adding file: " + str2);
                    Object obj = row.get(2);
                    if (obj instanceof String) {
                        longValue = Long.parseLong((String) obj);
                    } else if (obj instanceof Integer) {
                        longValue = ((Integer) obj).longValue();
                    } else {
                        if (!(obj instanceof Long)) {
                            throw new HoodieIOException("unexpected object size's type in Cloud storage events: " + obj.getClass());
                        }
                        longValue = ((Long) obj).longValue();
                    }
                    arrayList.add(new CloudObjectMetadata(str2, longValue));
                });
            });
            return arrayList.iterator();
        };
    }

    private static Option<String> getUrlForFile(Row row, String str, SerializableConfiguration serializableConfiguration, boolean z) {
        Configuration newCopy = serializableConfiguration.newCopy();
        String string = row.getString(0);
        String str2 = str + string + "/" + row.getString(1);
        try {
            String decode = URLDecoder.decode(str2, StandardCharsets.UTF_8.name());
            if (z && !checkIfFileExists(str, string, decode, newCopy)) {
                return Option.empty();
            }
            return Option.of(decode);
        } catch (Exception e) {
            LOG.warn(String.format("Failed to generate path to cloud file %s", str2), e);
            throw new HoodieException(String.format("Failed to generate path to cloud file %s", str2), e);
        }
    }

    private static boolean checkIfFileExists(String str, String str2, String str3, Configuration configuration) {
        try {
            return FSUtils.getFs(str + str2, configuration).exists(new Path(str3));
        } catch (IOException e) {
            String format = String.format("Error while checking path exists for %s ", str3);
            LOG.error(format, e);
            throw new HoodieIOException(format, e);
        }
    }

    public static Option<Dataset<Row>> loadAsDataset(SparkSession sparkSession, List<CloudObjectMetadata> list, TypedProperties typedProperties, String str, Option<SchemaProvider> option) {
        Schema sourceSchema;
        if (LOG.isDebugEnabled()) {
            LOG.debug("Extracted distinct files " + list.size() + " and some samples " + list.stream().map((v0) -> {
                return v0.getPath();
            }).limit(10L).collect(Collectors.toList()));
        }
        if (CollectionUtils.isNullOrEmpty(list)) {
            return Option.empty();
        }
        DataFrameReader format = sparkSession.read().format(str);
        String stringWithAltKeys = ConfigUtils.getStringWithAltKeys(typedProperties, CloudSourceConfig.SPARK_DATASOURCE_OPTIONS, true);
        if (option.isPresent() && (sourceSchema = ((SchemaProvider) option.get()).getSourceSchema()) != null && !sourceSchema.equals(InputBatch.NULL_SCHEMA)) {
            format = format.schema(AvroConversionUtils.convertAvroSchemaToStructType(sourceSchema));
        }
        if (StringUtils.isNullOrEmpty(stringWithAltKeys)) {
            stringWithAltKeys = ConfigUtils.getStringWithAltKeys(typedProperties, S3EventsHoodieIncrSourceConfig.SPARK_DATASOURCE_OPTIONS, true);
        }
        if (StringUtils.nonEmpty(stringWithAltKeys)) {
            try {
                Map map = (Map) new ObjectMapper().readValue(stringWithAltKeys, Map.class);
                LOG.info(String.format("sparkOptions loaded: %s", map));
                format = format.options(map);
            } catch (IOException e) {
                throw new HoodieException(String.format("Failed to parse sparkOptions: %s", stringWithAltKeys), e);
            }
        }
        ArrayList arrayList = new ArrayList();
        long j = 0;
        for (CloudObjectMetadata cloudObjectMetadata : list) {
            arrayList.add(cloudObjectMetadata.getPath());
            j += cloudObjectMetadata.getSize();
        }
        Dataset coalesce = (typedProperties.getBoolean(CloudStoreIngestionConfig.SPARK_DATASOURCE_READER_COMMA_SEPARATED_PATH_FORMAT, false) ? format.load(String.join(",", arrayList)) : format.load((String[]) arrayList.toArray(new String[list.size()]))).coalesce((int) Math.max(((long) (j * 1.1d)) / typedProperties.getLong(HoodieStorageConfig.PARQUET_MAX_FILE_SIZE.key(), Long.parseLong((String) HoodieStorageConfig.PARQUET_MAX_FILE_SIZE.defaultValue())), 1L));
        if (ConfigUtils.containsConfigProperty(typedProperties, CloudSourceConfig.PATH_BASED_PARTITION_FIELDS)) {
            for (String str2 : ConfigUtils.getStringWithAltKeys(typedProperties, CloudSourceConfig.PATH_BASED_PARTITION_FIELDS).split(",")) {
                String format2 = String.format("%s=", str2);
                LOG.info(String.format("Adding column %s to dataset", str2));
                coalesce = coalesce.withColumn(str2, functions.split(functions.split(functions.input_file_name(), format2).getItem(1), "/").getItem(0));
            }
        }
        return Option.of(coalesce);
    }

    public static Option<Dataset<Row>> loadAsDataset(SparkSession sparkSession, List<CloudObjectMetadata> list, TypedProperties typedProperties, String str) {
        return loadAsDataset(sparkSession, list, typedProperties, str, Option.empty());
    }

    private static /* synthetic */ Object $deserializeLambda$(SerializedLambda serializedLambda) {
        String implMethodName = serializedLambda.getImplMethodName();
        boolean z = -1;
        switch (implMethodName.hashCode()) {
            case -1151023493:
                if (implMethodName.equals("lambda$getCloudObjectMetadataPerPartition$1c5871a3$1")) {
                    z = false;
                    break;
                }
                break;
        }
        switch (z) {
            case false:
                if (serializedLambda.getImplMethodKind() == 6 && serializedLambda.getFunctionalInterfaceClass().equals("org/apache/spark/api/java/function/MapPartitionsFunction") && serializedLambda.getFunctionalInterfaceMethodName().equals("call") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/util/Iterator;)Ljava/util/Iterator;") && serializedLambda.getImplClass().equals("org/apache/hudi/utilities/sources/helpers/CloudObjectsSelectorCommon") && serializedLambda.getImplMethodSignature().equals("(Ljava/lang/String;Lorg/apache/hudi/common/config/SerializableConfiguration;ZLjava/util/Iterator;)Ljava/util/Iterator;")) {
                    String str = (String) serializedLambda.getCapturedArg(0);
                    SerializableConfiguration serializableConfiguration = (SerializableConfiguration) serializedLambda.getCapturedArg(1);
                    boolean booleanValue = ((Boolean) serializedLambda.getCapturedArg(2)).booleanValue();
                    return it -> {
                        List arrayList = new ArrayList();
                        it.forEachRemaining(row -> {
                            getUrlForFile(row, str, serializableConfiguration, booleanValue).ifPresent(str2 -> {
                                long longValue;
                                LOG.info("Adding file: " + str2);
                                Object obj = row.get(2);
                                if (obj instanceof String) {
                                    longValue = Long.parseLong((String) obj);
                                } else if (obj instanceof Integer) {
                                    longValue = ((Integer) obj).longValue();
                                } else {
                                    if (!(obj instanceof Long)) {
                                        throw new HoodieIOException("unexpected object size's type in Cloud storage events: " + obj.getClass());
                                    }
                                    longValue = ((Long) obj).longValue();
                                }
                                arrayList.add(new CloudObjectMetadata(str2, longValue));
                            });
                        });
                        return arrayList.iterator();
                    };
                }
                break;
        }
        throw new IllegalArgumentException("Invalid lambda deserialization");
    }
}
