/*
 * Decompiled with CFR 0.152.
 */
package de.viadee.ki.sparkimporter.processing.steps.userconfig;

import de.viadee.ki.sparkimporter.configuration.Configuration;
import de.viadee.ki.sparkimporter.configuration.preprocessing.ColumnHashConfiguration;
import de.viadee.ki.sparkimporter.configuration.preprocessing.PreprocessingConfiguration;
import de.viadee.ki.sparkimporter.configuration.util.ConfigurationUtils;
import de.viadee.ki.sparkimporter.processing.interfaces.PreprocessingStepInterface;
import de.viadee.ki.sparkimporter.util.SparkImporterLogger;
import de.viadee.ki.sparkimporter.util.SparkImporterUtils;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Map;
import org.apache.spark.sql.Column;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.functions;

public class ColumnHashStep
implements PreprocessingStepInterface {
    @Override
    public Dataset<Row> runPreprocessingStep(Dataset<Row> dataSet, boolean writeStepResultIntoFile, String dataLevel, Map<String, Object> parameters) {
        PreprocessingConfiguration preprocessingConfiguration;
        ArrayList<String> existingColumns = new ArrayList<String>(Arrays.asList(dataSet.columns()));
        Configuration configuration = ConfigurationUtils.getInstance().getConfiguration();
        if (configuration != null && (preprocessingConfiguration = configuration.getPreprocessingConfiguration()) != null) {
            for (ColumnHashConfiguration chc : preprocessingConfiguration.getColumnHashConfiguration()) {
                if (!chc.isHashColumn()) continue;
                if (!existingColumns.contains(chc.getColumnName())) {
                    SparkImporterLogger.getInstance().writeWarn("The column '" + chc.getColumnName() + "' is configured to be hashed, but does not exist in the data.");
                    continue;
                }
                dataSet = dataSet.withColumn(chc.getColumnName(), functions.sha1((Column)dataSet.col(chc.getColumnName())));
                SparkImporterLogger.getInstance().writeInfo("The column '" + chc.getColumnName() + "' is being hashed.");
            }
        }
        if (writeStepResultIntoFile) {
            SparkImporterUtils.getInstance().writeDatasetToCSV(dataSet, "column_hash_step");
        }
        return dataSet;
    }
}

