/*
 * Decompiled with CFR 0.152.
 */
package de.viadee.ki.sparkimporter.processing.steps.dataprocessing;

import de.viadee.ki.sparkimporter.configuration.Configuration;
import de.viadee.ki.sparkimporter.configuration.preprocessing.ColumnConfiguration;
import de.viadee.ki.sparkimporter.configuration.preprocessing.PreprocessingConfiguration;
import de.viadee.ki.sparkimporter.configuration.util.ConfigurationUtils;
import de.viadee.ki.sparkimporter.processing.interfaces.PreprocessingStepInterface;
import de.viadee.ki.sparkimporter.util.SparkImporterLogger;
import de.viadee.ki.sparkimporter.util.SparkImporterUtils;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Map;
import java.util.function.Consumer;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;

public class ColumnRemoveStep
implements PreprocessingStepInterface {
    @Override
    public Dataset<Row> runPreprocessingStep(Dataset<Row> dataSet, boolean writeStepResultIntoFile, String dataLevel, Map<String, Object> parameters) {
        PreprocessingConfiguration preprocessingConfiguration;
        ArrayList<String> columnsToKeep = new ArrayList<String>();
        columnsToKeep.add("proc_inst_id_");
        columnsToKeep.add("name_");
        columnsToKeep.add("var_type_");
        columnsToKeep.add("rev_");
        columnsToKeep.add("state_");
        columnsToKeep.add("long_");
        columnsToKeep.add("double_");
        columnsToKeep.add("text_");
        columnsToKeep.add("text2_");
        ArrayList<String> columnsToRemove = new ArrayList<String>();
        Configuration configuration = ConfigurationUtils.getInstance().getConfiguration();
        if (configuration != null && (preprocessingConfiguration = configuration.getPreprocessingConfiguration()) != null) {
            for (ColumnConfiguration cc : preprocessingConfiguration.getColumnConfiguration()) {
                if (cc.isUseColumn()) continue;
                if (columnsToKeep.contains(cc.getColumnName())) {
                    SparkImporterLogger.getInstance().writeWarn("The column '" + cc.getColumnName() + "' has to stay in in order to do the processing. It will not be removed. Comment: " + cc.getComment());
                    continue;
                }
                columnsToRemove.add(cc.getColumnName());
                SparkImporterLogger.getInstance().writeInfo("The column '" + cc.getColumnName() + "' will be removed. Comment: " + cc.getComment());
            }
        }
        final ArrayList<String> existingColumns = new ArrayList<String>(Arrays.asList(dataSet.columns()));
        columnsToRemove.stream().forEach(new Consumer<String>(){

            @Override
            public void accept(String s) {
                if (!existingColumns.contains(s)) {
                    SparkImporterLogger.getInstance().writeWarn("The column '" + s + "' is configured to be filtered, but does not exist in the data.");
                }
            }
        });
        dataSet = dataSet.drop(SparkImporterUtils.getInstance().asSeq(columnsToRemove));
        return dataSet;
    }
}

