/*
 * Decompiled with CFR 0.152.
 */
package de.viadee.bpmnai.core.processing.steps.dataprocessing;

import de.viadee.bpmnai.core.annotation.PreprocessingStepDescription;
import de.viadee.bpmnai.core.configuration.Configuration;
import de.viadee.bpmnai.core.configuration.preprocessing.ColumnHashConfiguration;
import de.viadee.bpmnai.core.configuration.preprocessing.PreprocessingConfiguration;
import de.viadee.bpmnai.core.configuration.util.ConfigurationUtils;
import de.viadee.bpmnai.core.processing.interfaces.PreprocessingStepInterface;
import de.viadee.bpmnai.core.runner.config.SparkRunnerConfig;
import de.viadee.bpmnai.core.util.BpmnaiUtils;
import de.viadee.bpmnai.core.util.logging.BpmnaiLogger;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Map;
import org.apache.spark.sql.Column;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.functions;

@PreprocessingStepDescription(name="Hash column", description="In this step the columns that are configured to be hashed for anonymization are run through a SHA-1 hash operation.")
public class ColumnHashStep
implements PreprocessingStepInterface {
    @Override
    public Dataset<Row> runPreprocessingStep(Dataset<Row> dataSet, Map<String, Object> parameters, SparkRunnerConfig config) {
        PreprocessingConfiguration preprocessingConfiguration;
        ArrayList<String> existingColumns = new ArrayList<String>(Arrays.asList(dataSet.columns()));
        Configuration configuration = ConfigurationUtils.getInstance().getConfiguration(config);
        if (configuration != null && (preprocessingConfiguration = configuration.getPreprocessingConfiguration()) != null) {
            for (ColumnHashConfiguration chc : preprocessingConfiguration.getColumnHashConfiguration()) {
                if (!chc.isHashColumn()) continue;
                if (!existingColumns.contains(chc.getColumnName())) {
                    BpmnaiLogger.getInstance().writeWarn("The column '" + chc.getColumnName() + "' is configured to be hashed, but does not exist in the data.");
                    continue;
                }
                dataSet = dataSet.withColumn(chc.getColumnName(), functions.sha1((Column)dataSet.col(chc.getColumnName())));
                BpmnaiLogger.getInstance().writeInfo("The column '" + chc.getColumnName() + "' is being hashed.");
            }
        }
        if (config.isWriteStepResultsIntoFile()) {
            BpmnaiUtils.getInstance().writeDatasetToCSV(dataSet, "column_hash_step", config);
        }
        return dataSet;
    }
}

