/*
 * Decompiled with CFR 0.152.
 */
package ai.h2o.targetencoding;

import ai.h2o.targetencoding.BlendingParams;
import ai.h2o.targetencoding.TargetEncoder;
import ai.h2o.targetencoding.TargetEncoderFrameHelper;
import hex.ModelMetricsBinomial;
import hex.ScoreKeeper;
import hex.genmodel.utils.DistributionFamily;
import hex.tree.gbm.GBM;
import hex.tree.gbm.GBMModel;
import java.util.Map;
import org.junit.After;
import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.Ignore;
import org.junit.Test;
import water.Key;
import water.Scope;
import water.TestUtil;
import water.fvec.Frame;
import water.fvec.Vec;
import water.util.IcedHashMapGeneric;

@Ignore(value="Ignoring benchmark tests")
public class TargetEncodingTitanicBenchmark
extends TestUtil {
    @BeforeClass
    public static void setup() {
        TargetEncodingTitanicBenchmark.stall_till_cloudsize((int)1);
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    @Test
    public void KFoldHoldoutTypeTest() {
        Scope.enter();
        GBMModel gbm = null;
        try {
            BlendingParams params = new BlendingParams(3.0, 1.0);
            String targetColumnName = "survived";
            String[] teColumns = new String[]{"cabin", "home.dest", "embarked"};
            String foldColumnName = "fold";
            String[] teColumnsWithFold = new String[]{"cabin", "home.dest", "embarked", foldColumnName};
            TargetEncoder tec = new TargetEncoder(teColumns);
            Frame trainFrame = TargetEncodingTitanicBenchmark.parse_test_file((Key)Key.make((String)"titanic_train_parsed"), (String)"smalldata/gbm_test/titanic_train.csv");
            Frame validFrame = TargetEncodingTitanicBenchmark.parse_test_file((Key)Key.make((String)"titanic_valid_parsed"), (String)"smalldata/gbm_test/titanic_valid.csv");
            Frame testFrame = TargetEncodingTitanicBenchmark.parse_test_file((Key)Key.make((String)"titanic_test_parsed"), (String)"smalldata/gbm_test/titanic_test.csv");
            this.asFactor(trainFrame, targetColumnName);
            this.asFactor(validFrame, targetColumnName);
            this.asFactor(testFrame, targetColumnName);
            this.printOutColumnsMetadata(testFrame);
            Scope.track((Frame[])new Frame[]{trainFrame, validFrame, testFrame});
            TargetEncoderFrameHelper.addKFoldColumn((Frame)trainFrame, (String)foldColumnName, (int)5, (long)1234L);
            trainFrame.remove(new String[]{"name", "ticket", "boat", "body"});
            validFrame.remove(new String[]{"name", "ticket", "boat", "body"});
            testFrame.remove(new String[]{"name", "ticket", "boat", "body"});
            boolean withBlendedAvg = true;
            boolean withNoiseOnlyForTraining = true;
            boolean withImputationForNAsInOriginalColumns = true;
            IcedHashMapGeneric encodingMap = tec.prepareEncodingMap(trainFrame, targetColumnName, foldColumnName, withImputationForNAsInOriginalColumns);
            Frame trainEncoded = withNoiseOnlyForTraining ? tec.applyTargetEncoding(trainFrame, targetColumnName, (Map)encodingMap, TargetEncoder.DataLeakageHandlingStrategy.KFold, foldColumnName, withBlendedAvg, withImputationForNAsInOriginalColumns, params, 1234L) : tec.applyTargetEncoding(trainFrame, targetColumnName, (Map)encodingMap, TargetEncoder.DataLeakageHandlingStrategy.KFold, foldColumnName, withBlendedAvg, 0.0, withImputationForNAsInOriginalColumns, params, 1234L);
            Frame validEncoded = tec.applyTargetEncoding(validFrame, targetColumnName, (Map)encodingMap, TargetEncoder.DataLeakageHandlingStrategy.None, foldColumnName, withBlendedAvg, 0.0, withImputationForNAsInOriginalColumns, params, 1234L);
            Frame testEncoded = tec.applyTargetEncoding(testFrame, targetColumnName, (Map)encodingMap, TargetEncoder.DataLeakageHandlingStrategy.None, foldColumnName, withBlendedAvg, 0.0, withImputationForNAsInOriginalColumns, params, 1234L);
            Scope.track((Frame[])new Frame[]{trainEncoded, validEncoded, testEncoded});
            this.printOutColumnsMetadata(trainEncoded);
            GBMModel.GBMParameters parms = new GBMModel.GBMParameters();
            parms._train = trainEncoded._key;
            parms._response_column = targetColumnName;
            parms._score_tree_interval = 10;
            parms._ntrees = 1000;
            parms._max_depth = 5;
            parms._distribution = DistributionFamily.multinomial;
            parms._valid = validEncoded._key;
            parms._stopping_tolerance = 0.001;
            parms._stopping_metric = ScoreKeeper.StoppingMetric.AUC;
            parms._stopping_rounds = 5;
            parms._ignored_columns = teColumnsWithFold;
            parms._seed = 1234L;
            GBM job = new GBM(parms);
            gbm = (GBMModel)job.trainModel().get();
            System.out.println(((GBMModel.GBMOutput)gbm._output)._variable_importances.toString(2, true));
            Assert.assertTrue((boolean)job.isStopped());
            Frame preds = gbm.score(testEncoded);
            Scope.track((Frame[])new Frame[]{preds});
            ModelMetricsBinomial mm = ModelMetricsBinomial.make((Vec)preds.vec(2), (Vec)testEncoded.vec(parms._response_column));
            double auc = mm._auc._auc;
            double auc2 = this.trainDefaultGBM(targetColumnName);
            System.out.println("AUC with encoding:" + auc);
            System.out.println("AUC without encoding:" + auc2);
            Assert.assertTrue((auc2 < auc ? 1 : 0) != 0);
            this.encodingMapCleanUp((Map<String, Frame>)encodingMap);
        }
        finally {
            if (gbm != null) {
                gbm.delete();
                gbm.deleteCrossValidationModels();
            }
            Scope.exit((Key[])new Key[0]);
        }
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    @Test
    public void leaveOneOutHoldoutTypeTest() {
        GBMModel gbm = null;
        Scope.enter();
        try {
            BlendingParams params = new BlendingParams(3.0, 1.0);
            String[] teColumns = new String[]{"cabin", "embarked", "home.dest"};
            String targetColumnName = "survived";
            TargetEncoder tec = new TargetEncoder(teColumns);
            Frame trainFrame = TargetEncodingTitanicBenchmark.parse_test_file((Key)Key.make((String)"titanic_train_parsed"), (String)"smalldata/gbm_test/titanic_train.csv");
            Frame validFrame = TargetEncodingTitanicBenchmark.parse_test_file((Key)Key.make((String)"titanic_valid_parsed"), (String)"smalldata/gbm_test/titanic_valid.csv");
            Frame testFrame = TargetEncodingTitanicBenchmark.parse_test_file((Key)Key.make((String)"titanic_test_parsed"), (String)"smalldata/gbm_test/titanic_test.csv");
            this.asFactor(trainFrame, targetColumnName);
            this.asFactor(validFrame, targetColumnName);
            this.asFactor(testFrame, targetColumnName);
            Scope.track((Frame[])new Frame[]{trainFrame, validFrame, testFrame});
            trainFrame.remove(new String[]{"name", "ticket", "boat", "body"});
            validFrame.remove(new String[]{"name", "ticket", "boat", "body"});
            testFrame.remove(new String[]{"name", "ticket", "boat", "body"});
            boolean withBlendedAvg = true;
            boolean withBlendedAvgOnlyForTraining = false;
            boolean withNoiseOnlyForTraining = true;
            boolean withImputationForNAsInOriginalColumns = true;
            IcedHashMapGeneric encodingMap = tec.prepareEncodingMap(trainFrame, targetColumnName, null, withImputationForNAsInOriginalColumns);
            Frame trainEncoded = withNoiseOnlyForTraining ? tec.applyTargetEncoding(trainFrame, targetColumnName, (Map)encodingMap, TargetEncoder.DataLeakageHandlingStrategy.LeaveOneOut, withBlendedAvg, withImputationForNAsInOriginalColumns, params, 1234L) : tec.applyTargetEncoding(trainFrame, targetColumnName, (Map)encodingMap, TargetEncoder.DataLeakageHandlingStrategy.LeaveOneOut, withBlendedAvg, 0.0, withImputationForNAsInOriginalColumns, params, 1234L);
            Frame validEncoded = tec.applyTargetEncoding(validFrame, targetColumnName, (Map)encodingMap, TargetEncoder.DataLeakageHandlingStrategy.None, withBlendedAvg && !withBlendedAvgOnlyForTraining, 0.0, withImputationForNAsInOriginalColumns, params, 1234L);
            Frame testEncoded = tec.applyTargetEncoding(testFrame, targetColumnName, (Map)encodingMap, TargetEncoder.DataLeakageHandlingStrategy.None, withBlendedAvg && !withBlendedAvgOnlyForTraining, 0.0, withImputationForNAsInOriginalColumns, params, 1234L);
            Scope.track((Frame[])new Frame[]{trainEncoded, validEncoded, testEncoded});
            GBMModel.GBMParameters parms = new GBMModel.GBMParameters();
            parms._train = trainEncoded._key;
            parms._response_column = targetColumnName;
            parms._score_tree_interval = 10;
            parms._ntrees = 1000;
            parms._max_depth = 5;
            parms._distribution = DistributionFamily.AUTO;
            parms._valid = validEncoded._key;
            parms._stopping_tolerance = 0.001;
            parms._stopping_metric = ScoreKeeper.StoppingMetric.AUC;
            parms._stopping_rounds = 5;
            parms._ignored_columns = teColumns;
            parms._seed = 1234L;
            GBM job = new GBM(parms);
            gbm = (GBMModel)job.trainModel().get();
            Assert.assertTrue((boolean)job.isStopped());
            System.out.println(((GBMModel.GBMOutput)gbm._output)._variable_importances.toString(2, true));
            Frame preds = gbm.score(testEncoded);
            Scope.track((Frame[])new Frame[]{preds});
            ModelMetricsBinomial mm = ModelMetricsBinomial.make((Vec)preds.vec(2), (Vec)testEncoded.vec(parms._response_column));
            double auc = mm._auc._auc;
            double auc2 = this.trainDefaultGBM(targetColumnName);
            System.out.println("AUC with encoding:" + auc);
            System.out.println("AUC without encoding:" + auc2);
            Assert.assertTrue((auc2 < auc ? 1 : 0) != 0);
            this.encodingMapCleanUp((Map<String, Frame>)encodingMap);
        }
        finally {
            if (gbm != null) {
                gbm.delete();
                gbm.deleteCrossValidationModels();
            }
            Scope.exit((Key[])new Key[0]);
        }
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    @Test
    public void noneHoldoutTypeTest() {
        Scope.enter();
        try {
            BlendingParams params = new BlendingParams(3.0, 1.0);
            String[] teColumns = new String[]{"cabin", "embarked", "home.dest"};
            String targetColumnName = "survived";
            TargetEncoder tec = new TargetEncoder(teColumns);
            Frame trainFrame = TargetEncodingTitanicBenchmark.parse_test_file((Key)Key.make((String)"titanic_train_parsed"), (String)"smalldata/gbm_test/titanic_train_wteh.csv");
            Frame teHoldoutFrame = TargetEncodingTitanicBenchmark.parse_test_file((Key)Key.make((String)"titanic_te_holdout_parsed"), (String)"smalldata/gbm_test/titanic_te_holdout.csv");
            Frame validFrame = TargetEncodingTitanicBenchmark.parse_test_file((Key)Key.make((String)"titanic_valid_parsed"), (String)"smalldata/gbm_test/titanic_valid.csv");
            Frame testFrame = TargetEncodingTitanicBenchmark.parse_test_file((Key)Key.make((String)"titanic_test_parsed"), (String)"smalldata/gbm_test/titanic_test.csv");
            this.asFactor(trainFrame, targetColumnName);
            this.asFactor(teHoldoutFrame, targetColumnName);
            this.asFactor(validFrame, targetColumnName);
            this.asFactor(testFrame, targetColumnName);
            Scope.track((Frame[])new Frame[]{trainFrame, teHoldoutFrame, validFrame, testFrame});
            trainFrame.remove(new String[]{"name", "ticket", "boat", "body"});
            teHoldoutFrame.remove(new String[]{"name", "ticket", "boat", "body"});
            validFrame.remove(new String[]{"name", "ticket", "boat", "body"});
            testFrame.remove(new String[]{"name", "ticket", "boat", "body"});
            Frame teHoldoutFrameFactorized = this.asFactor(teHoldoutFrame, "cabin");
            Scope.track((Frame[])new Frame[]{teHoldoutFrameFactorized});
            boolean withNoiseOnlyForTraining = true;
            boolean withImputation = true;
            IcedHashMapGeneric encodingMap = tec.prepareEncodingMap(teHoldoutFrameFactorized, targetColumnName, null, withImputation);
            Frame trainEncoded = withNoiseOnlyForTraining ? tec.applyTargetEncoding(trainFrame, targetColumnName, (Map)encodingMap, TargetEncoder.DataLeakageHandlingStrategy.None, true, withImputation, params, 1234L) : tec.applyTargetEncoding(trainFrame, targetColumnName, (Map)encodingMap, TargetEncoder.DataLeakageHandlingStrategy.None, true, 0.0, withImputation, params, 1234L);
            Frame validEncoded = tec.applyTargetEncoding(validFrame, targetColumnName, (Map)encodingMap, TargetEncoder.DataLeakageHandlingStrategy.None, true, 0.0, withImputation, params, 1234L);
            Frame testEncoded = tec.applyTargetEncoding(testFrame, targetColumnName, (Map)encodingMap, TargetEncoder.DataLeakageHandlingStrategy.None, true, 0.0, withImputation, params, 1234L);
            Scope.track((Frame[])new Frame[]{trainEncoded, validEncoded, testEncoded});
            GBMModel.GBMParameters parms = new GBMModel.GBMParameters();
            parms._train = trainEncoded._key;
            parms._response_column = targetColumnName;
            parms._score_tree_interval = 10;
            parms._ntrees = 1000;
            parms._max_depth = 5;
            parms._distribution = DistributionFamily.AUTO;
            parms._valid = validEncoded._key;
            parms._stopping_tolerance = 0.001;
            parms._stopping_metric = ScoreKeeper.StoppingMetric.AUC;
            parms._stopping_rounds = 5;
            parms._ignored_columns = teColumns;
            parms._seed = 1234L;
            GBM job = new GBM(parms);
            GBMModel gbm = (GBMModel)job.trainModel().get();
            Assert.assertTrue((boolean)job.isStopped());
            System.out.println(((GBMModel.GBMOutput)gbm._output)._variable_importances.toString(2, true));
            Frame preds = gbm.score(testEncoded);
            Scope.track((Frame[])new Frame[]{preds});
            ModelMetricsBinomial mm = ModelMetricsBinomial.make((Vec)preds.vec(2), (Vec)testEncoded.vec(parms._response_column));
            double auc = mm._auc._auc;
            double auc2 = this.trainDefaultGBM(targetColumnName);
            System.out.println("AUC with encoding:" + auc);
            System.out.println("AUC without encoding:" + auc2);
            Assert.assertTrue((auc2 < auc ? 1 : 0) != 0);
            this.encodingMapCleanUp((Map<String, Frame>)encodingMap);
            if (gbm != null) {
                gbm.delete();
                gbm.deleteCrossValidationModels();
            }
        }
        finally {
            Scope.exit((Key[])new Key[0]);
        }
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private double trainDefaultGBM(String targetColumnName) {
        GBMModel gbm2 = null;
        Scope.enter();
        try {
            double auc2;
            Frame trainFrame2 = TargetEncodingTitanicBenchmark.parse_test_file((Key)Key.make((String)"titanic_train_parsed"), (String)"smalldata/gbm_test/titanic_train.csv");
            Frame validFrame2 = TargetEncodingTitanicBenchmark.parse_test_file((Key)Key.make((String)"titanic_valid_parsed2"), (String)"smalldata/gbm_test/titanic_valid.csv");
            Frame testFrame2 = TargetEncodingTitanicBenchmark.parse_test_file((Key)Key.make((String)"titanic_test_parsed2"), (String)"smalldata/gbm_test/titanic_test.csv");
            Scope.track((Frame[])new Frame[]{trainFrame2, testFrame2, validFrame2});
            trainFrame2.remove(new String[]{"name", "ticket", "boat", "body"});
            validFrame2.remove(new String[]{"name", "ticket", "boat", "body"});
            testFrame2.remove(new String[]{"name", "ticket", "boat", "body"});
            GBMModel.GBMParameters parms2 = new GBMModel.GBMParameters();
            parms2._train = trainFrame2._key;
            parms2._response_column = targetColumnName;
            parms2._score_tree_interval = 10;
            parms2._ntrees = 1000;
            parms2._max_depth = 5;
            parms2._distribution = DistributionFamily.quasibinomial;
            parms2._valid = validFrame2._key;
            parms2._stopping_tolerance = 0.001;
            parms2._stopping_metric = ScoreKeeper.StoppingMetric.AUC;
            parms2._stopping_rounds = 5;
            parms2._seed = 1234L;
            GBM job2 = new GBM(parms2);
            gbm2 = (GBMModel)job2.trainModel().get();
            Assert.assertTrue((boolean)job2.isStopped());
            Frame preds2 = gbm2.score(testFrame2);
            Scope.track((Frame[])new Frame[]{preds2});
            TargetEncodingTitanicBenchmark.printOutFrameAsTable((Frame)preds2, (boolean)false, (long)preds2.numRows());
            ModelMetricsBinomial mm2 = ModelMetricsBinomial.make((Vec)preds2.vec(2), (Vec)testFrame2.vec(parms2._response_column));
            double d = auc2 = mm2._auc._auc;
            return d;
        }
        finally {
            if (gbm2 != null) {
                gbm2.delete();
                gbm2.deleteCrossValidationModels();
            }
            Scope.exit((Key[])new Key[0]);
        }
    }

    @After
    public void afterEach() {
    }

    private void encodingMapCleanUp(Map<String, Frame> encodingMap) {
        for (Map.Entry<String, Frame> map : encodingMap.entrySet()) {
            map.getValue().delete();
        }
    }
}

