/*
 * Decompiled with CFR 0.152.
 */
package de.viadee.ki.sparkimporter.processing.steps.dataprocessing;

import de.viadee.ki.sparkimporter.processing.interfaces.PreprocessingStepInterface;
import de.viadee.ki.sparkimporter.util.SparkImporterUtils;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.spark.sql.Column;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.functions;

public class AggregateActivityInstancesStep
implements PreprocessingStepInterface {
    @Override
    public Dataset<Row> runPreprocessingStep(Dataset<Row> dataset, boolean writeStepResultIntoFile, String dataLevel, Map<String, Object> parameters) {
        String newColumnName;
        Matcher m;
        HashMap<String, String> aggregationMap = new HashMap<String, String>();
        for (String column : dataset.columns()) {
            if (column.equals("proc_inst_id_")) continue;
            if (column.equals("duration_")) {
                aggregationMap.put(column, "max");
                continue;
            }
            if (column.equals("state_")) {
                aggregationMap.put(column, "ProcessState");
                continue;
            }
            if (column.equals("act_inst_id_")) continue;
            aggregationMap.put(column, "AllButEmptyString");
        }
        Dataset datasetAIAgg = dataset.filter(functions.not((Column)functions.isnull((Column)dataset.col("act_inst_id_")))).groupBy("proc_inst_id_", new String[]{"act_inst_id_"}).agg(aggregationMap);
        String pattern = "(max|allbutemptystring|processstate)\\((.+)\\)";
        Pattern r = Pattern.compile(pattern);
        for (String columnName : dataset.columns()) {
            m = r.matcher(columnName);
            if (!m.find()) continue;
            newColumnName = m.group(2);
            dataset = dataset.withColumnRenamed(columnName, newColumnName);
        }
        dataset = dataset.filter(functions.isnull((Column)dataset.col("state_"))).groupBy("proc_inst_id_", new String[]{"act_inst_id_"}).agg(aggregationMap).union(datasetAIAgg);
        for (String columnName : dataset.columns()) {
            m = r.matcher(columnName);
            if (!m.find()) continue;
            newColumnName = m.group(2);
            dataset = dataset.withColumnRenamed(columnName, newColumnName);
        }
        dataset = dataset.drop("name_");
        dataset = dataset.sort("start_time_", new String[0]);
        if (writeStepResultIntoFile) {
            SparkImporterUtils.getInstance().writeDatasetToCSV((Dataset<Row>)dataset, "agg_of_activity_instances");
        }
        return dataset;
    }
}

