/*
 * Decompiled with CFR 0.152.
 */
package hex.deeplearning;

import hex.DataInfo;
import hex.Distribution;
import hex.DistributionFactory;
import hex.FrameTask;
import hex.Model;
import hex.ModelMetrics;
import hex.ModelMetricsRegression;
import hex.deeplearning.DeepLearning;
import hex.deeplearning.DeepLearningModel;
import hex.deeplearning.DeepLearningModelInfo;
import hex.deeplearning.DeepLearningTask;
import hex.deeplearning.Neurons;
import hex.genmodel.utils.DistributionFamily;
import java.util.Random;
import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.Test;
import water.DKV;
import water.Iced;
import water.IcedUtils;
import water.Key;
import water.Keyed;
import water.MRTask;
import water.TestUtil;
import water.fvec.Chunk;
import water.fvec.Frame;
import water.fvec.Vec;
import water.util.Log;
import water.util.PrettyPrint;

public class DeepLearningGradientCheck
extends TestUtil {
    static final float MAX_TOLERANCE = 0.02f;
    static final float MAX_FAILED_COUNT = 30.0f;
    static final float SAMPLE_RATE = 0.01f;

    @BeforeClass
    public static void stall() {
        DeepLearningGradientCheck.stall_till_cloudsize((int)1);
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    @Test
    public void gradientCheck() {
        Frame tfr = null;
        DeepLearningModel dl = null;
        try {
            tfr = DeepLearningGradientCheck.parse_test_file((String)"smalldata/glm_test/cancar_logIn.csv");
            for (String s : new String[]{"Merit", "Class"}) {
                Vec f = tfr.vec(s).toCategoricalVec();
                tfr.remove(s).remove();
                tfr.add(s, f);
            }
            DKV.put((Keyed)tfr);
            tfr.add("Binary", tfr.anyVec().makeZero());
            new MRTask(){

                public void map(Chunk[] c) {
                    for (int i = 0; i < c[0]._len; ++i) {
                        if (c[0].at8(i) != 1L) continue;
                        c[1].set(i, 1L);
                    }
                }
            }.doAll(tfr.vecs(new String[]{"Class", "Binary"}));
            Vec cv = tfr.vec("Binary").toCategoricalVec();
            tfr.remove("Binary").remove();
            tfr.add("Binary", cv);
            DKV.put((Keyed)tfr);
            Random rng = new Random(912559L);
            int count = 0;
            int failedcount = 0;
            double maxRelErr = 0.0;
            double meanRelErr = 0.0;
            for (DistributionFamily dist : new DistributionFamily[]{DistributionFamily.gaussian, DistributionFamily.laplace, DistributionFamily.quantile, DistributionFamily.huber, DistributionFamily.gamma, DistributionFamily.poisson, DistributionFamily.AUTO, DistributionFamily.tweedie, DistributionFamily.multinomial, DistributionFamily.bernoulli}) {
                for (DeepLearningModel.DeepLearningParameters.Activation act : new DeepLearningModel.DeepLearningParameters.Activation[]{DeepLearningModel.DeepLearningParameters.Activation.Tanh, DeepLearningModel.DeepLearningParameters.Activation.Rectifier}) {
                    for (String response : new String[]{"Binary", "Class", "Cost"}) {
                        for (boolean adaptive : new boolean[]{true, false}) {
                            for (int miniBatchSize : new int[]{1}) {
                                if (response.equals("Class") ? dist != DistributionFamily.multinomial && dist != DistributionFamily.AUTO : (response.equals("Binary") ? dist != DistributionFamily.modified_huber && dist != DistributionFamily.bernoulli && dist != DistributionFamily.AUTO : dist == DistributionFamily.multinomial || dist == DistributionFamily.modified_huber || dist == DistributionFamily.bernoulli)) continue;
                                DeepLearningModel.DeepLearningParameters parms = new DeepLearningModel.DeepLearningParameters();
                                parms._huber_alpha = rng.nextDouble() + 0.1;
                                parms._tweedie_power = 1.01 + rng.nextDouble() * 0.9;
                                parms._quantile_alpha = 0.05 + rng.nextDouble() * 0.9;
                                parms._train = tfr._key;
                                parms._epochs = 100.0;
                                parms._l1 = 0.001;
                                parms._l2 = 0.001;
                                parms._force_load_balance = false;
                                parms._hidden = new int[]{10, 10, 10};
                                parms._fast_mode = false;
                                parms._response_column = response;
                                parms._distribution = dist;
                                parms._max_w2 = 10.0f;
                                parms._seed = 0xAAABBBL;
                                parms._activation = act;
                                parms._adaptive_rate = adaptive;
                                parms._rate = 1.0E-4;
                                parms._momentum_start = 0.9;
                                parms._momentum_stable = 0.99;
                                parms._mini_batch_size = miniBatchSize;
                                DeepLearningModelInfo.gradientCheck = new DeepLearningModelInfo.GradientCheck(0, 0, 0);
                                DeepLearning job = new DeepLearning(parms);
                                try {
                                    boolean classification;
                                    dl = (DeepLearningModel)job.trainModel().get();
                                    boolean bl = classification = response.equals("Class") || response.equals("Binary");
                                    if (!classification) {
                                        Frame p = dl.score(tfr);
                                        ModelMetrics mm = ModelMetrics.getFromDKV((Model)dl, (Frame)tfr);
                                        double resdev = ((ModelMetricsRegression)mm)._mean_residual_deviance;
                                        Log.info((Object[])new Object[]{"Mean residual deviance: " + resdev});
                                        p.delete();
                                    }
                                    DeepLearningModelInfo modelInfo = (DeepLearningModelInfo)IcedUtils.deepCopy((Iced)dl.model_info());
                                    long before = dl.model_info().checksum_impl();
                                    float meanLoss = 0.0f;
                                    int rId = 0;
                                    while ((long)rId < tfr.numRows()) {
                                        dl.set_model_info((DeepLearningModelInfo)IcedUtils.deepCopy((Iced)modelInfo));
                                        DataInfo di = dl.model_info().data_info();
                                        DataInfo.Row[] rowsMiniBatch = new DataInfo.Row[miniBatchSize];
                                        for (int i = 0; i < rowsMiniBatch.length; ++i) {
                                            if (0 > rId + i || (long)(rId + i) >= tfr.numRows()) continue;
                                            rowsMiniBatch[i] = ((FrameTask.ExtractDenseRow)new FrameTask.ExtractDenseRow((DataInfo)di, (long)((long)(rId + i))).doAll((Frame)di._adaptedFrame))._row;
                                        }
                                        long cs = dl.model_info().checksum_impl();
                                        double loss = dl.meanLoss(rowsMiniBatch);
                                        assert (cs == before);
                                        assert (before == dl.model_info().checksum_impl());
                                        meanLoss = (float)((double)meanLoss + loss);
                                        for (int layer = 0; layer <= parms._hidden.length; ++layer) {
                                            int rows = dl.model_info().get_weights(layer).rows();
                                            assert (dl.model_info().get_biases(layer).size() == rows);
                                            for (int row = 0; row < rows; ++row) {
                                                dl.set_model_info((DeepLearningModelInfo)IcedUtils.deepCopy((Iced)modelInfo));
                                                Neurons[] neurons = DeepLearningTask.makeNeuronsForTraining((DeepLearningModelInfo)dl.model_info());
                                                double[] responses = new double[miniBatchSize];
                                                double[] offsets = new double[miniBatchSize];
                                                int n = 0;
                                                for (DataInfo.Row myRow : rowsMiniBatch) {
                                                    if (myRow == null) continue;
                                                    ((Neurons.Input)neurons[0]).setInput(-1L, myRow.numIds, myRow.numVals, myRow.nBins, myRow.binIds, n);
                                                    responses[n] = myRow.response(0);
                                                    offsets[n] = myRow.offset;
                                                    ++n;
                                                }
                                                DeepLearningTask.fpropMiniBatch((long)-1L, (Neurons[])neurons, (DeepLearningModelInfo)dl.model_info(), null, (boolean)true, (double[])responses, (double[])offsets, (int)n);
                                                long after = dl.model_info().checksum_impl();
                                                assert (after == before);
                                                DeepLearningModelInfo.gradientCheck = new DeepLearningModelInfo.GradientCheck(layer, row, -1);
                                                DeepLearningTask.bpropMiniBatch((Neurons[])neurons, (int)n);
                                                assert (before != dl.model_info().checksum_impl());
                                                dl.set_model_info((DeepLearningModelInfo)IcedUtils.deepCopy((Iced)modelInfo));
                                                assert (before == dl.model_info().checksum_impl());
                                                double bpropGradient = DeepLearningModelInfo.gradientCheck.gradient;
                                                double bias = dl.model_info().get_biases(layer).get(row);
                                                double eps = 1.0E-4 * Math.abs(bias);
                                                if (eps == 0.0) {
                                                    eps = 1.0E-6;
                                                }
                                                dl.model_info().get_biases(layer).set(row, bias + eps);
                                                double up = dl.meanLoss(rowsMiniBatch);
                                                dl.model_info().get_biases(layer).set(row, bias - eps);
                                                double down = dl.meanLoss(rowsMiniBatch);
                                                if (Math.abs(up - down) / Math.abs(up + down) < 1.0E-8) continue;
                                                double gradient = (up - down) / (2.0 * eps);
                                                double relError = 2.0 * Math.abs(bpropGradient - gradient) / (Math.abs(gradient) + Math.abs(bpropGradient));
                                                ++count;
                                                if ((Math.abs(gradient) < 1.0E-7 || Math.abs(bpropGradient) < 1.0E-7) && Math.abs(bpropGradient - gradient) < 1.0E-7) continue;
                                                meanRelErr += relError;
                                                if (relError > (double)0.02f) {
                                                    Log.info((Object[])new Object[]{"\nDistribution: " + ((DeepLearningModel.DeepLearningParameters)dl._parms)._distribution});
                                                    Log.info((Object[])new Object[]{"\nRow: " + rId});
                                                    Log.info((Object[])new Object[]{"bias (layer " + layer + ", row " + row + "): " + bias + " +/- " + eps});
                                                    Log.info((Object[])new Object[]{"loss: " + loss});
                                                    Log.info((Object[])new Object[]{"losses up/down: " + up + " / " + down});
                                                    Log.info((Object[])new Object[]{"=> Finite differences gradient: " + gradient});
                                                    Log.info((Object[])new Object[]{"=> Back-propagation gradient  : " + bpropGradient});
                                                    Log.info((Object[])new Object[]{"=> Relative error             : " + PrettyPrint.formatPct((double)relError)});
                                                    ++failedcount;
                                                }
                                                int cols = dl.model_info().get_weights(layer).cols();
                                                for (int col = 0; col < cols; ++col) {
                                                    if (rng.nextFloat() >= 0.01f) continue;
                                                    dl.set_model_info((DeepLearningModelInfo)IcedUtils.deepCopy((Iced)modelInfo));
                                                    Neurons[] neurons2 = DeepLearningTask.makeNeuronsForTraining((DeepLearningModelInfo)dl.model_info());
                                                    double[] responses2 = new double[miniBatchSize];
                                                    double[] offsets2 = new double[miniBatchSize];
                                                    int n2 = 0;
                                                    for (DataInfo.Row myRow : rowsMiniBatch) {
                                                        if (myRow == null) continue;
                                                        ((Neurons.Input)neurons2[0]).setInput(-1L, myRow.numIds, myRow.numVals, myRow.nBins, myRow.binIds, n2);
                                                        responses2[n2] = myRow.response(0);
                                                        offsets2[n2] = myRow.offset;
                                                        ++n2;
                                                    }
                                                    DeepLearningTask.fpropMiniBatch((long)-1L, (Neurons[])neurons2, (DeepLearningModelInfo)dl.model_info(), null, (boolean)true, (double[])responses2, (double[])offsets2, (int)n2);
                                                    long after2 = dl.model_info().checksum_impl();
                                                    assert (after2 == before);
                                                    DeepLearningModelInfo.gradientCheck = new DeepLearningModelInfo.GradientCheck(layer, row, col);
                                                    DeepLearningTask.bpropMiniBatch((Neurons[])neurons2, (int)n2);
                                                    assert (before != dl.model_info().checksum_impl());
                                                    dl.set_model_info((DeepLearningModelInfo)IcedUtils.deepCopy((Iced)modelInfo));
                                                    assert (before == dl.model_info().checksum_impl());
                                                    double bpropGradient2 = DeepLearningModelInfo.gradientCheck.gradient;
                                                    float weight = dl.model_info().get_weights(layer).get(row, col);
                                                    double eps2 = 1.0E-4 * (double)Math.abs(weight);
                                                    if (eps2 == 0.0) {
                                                        eps2 = 1.0E-6;
                                                    }
                                                    dl.model_info().get_weights(layer).set(row, col, (float)((double)weight + eps2));
                                                    double up2 = dl.meanLoss(rowsMiniBatch);
                                                    dl.model_info().get_weights(layer).set(row, col, (float)((double)weight - eps2));
                                                    double down2 = dl.meanLoss(rowsMiniBatch);
                                                    if (Math.abs(up2 - down2) / Math.abs(up2 + down2) < 1.0E-8) continue;
                                                    double gradient2 = (up2 - down2) / (2.0 * eps2);
                                                    double relError2 = 2.0 * Math.abs(bpropGradient2 - gradient2) / (Math.abs(gradient2) + Math.abs(bpropGradient2));
                                                    ++count;
                                                    if ((Math.abs(gradient2) < 1.0E-7 || Math.abs(bpropGradient2) < 1.0E-7) && Math.abs(bpropGradient2 - gradient2) < 1.0E-7) continue;
                                                    meanRelErr += relError2;
                                                    if (relError2 > (double)0.02f) {
                                                        Log.info((Object[])new Object[]{"\nDistribution: " + ((DeepLearningModel.DeepLearningParameters)dl._parms)._distribution});
                                                        Log.info((Object[])new Object[]{"\nRow: " + rId});
                                                        Log.info((Object[])new Object[]{"weight (layer " + layer + ", row " + row + ", col " + col + "): " + weight + " +/- " + eps2});
                                                        Log.info((Object[])new Object[]{"loss: " + loss});
                                                        Log.info((Object[])new Object[]{"losses up/down: " + up2 + " / " + down2});
                                                        Log.info((Object[])new Object[]{"=> Finite differences gradient: " + gradient2});
                                                        Log.info((Object[])new Object[]{"=> Back-propagation gradient  : " + bpropGradient2});
                                                        Log.info((Object[])new Object[]{"=> Relative error             : " + PrettyPrint.formatPct((double)relError2)});
                                                        ++failedcount;
                                                    }
                                                    maxRelErr = Math.max(maxRelErr, relError2);
                                                    assert (!Double.isNaN(maxRelErr));
                                                }
                                            }
                                        }
                                        ++rId;
                                    }
                                    Log.info((Object[])new Object[]{"Mean loss: " + (meanLoss /= (float)tfr.numRows())});
                                }
                                catch (RuntimeException ex) {
                                    dl = (DeepLearningModel)DKV.getGet((Key)job.dest());
                                    if (dl != null) {
                                        Assert.assertTrue((boolean)dl.model_info().isUnstable());
                                        continue;
                                    }
                                    Assert.assertTrue((boolean)job.isStopped());
                                }
                                finally {
                                    if (dl != null) {
                                        dl.delete();
                                    }
                                }
                            }
                        }
                    }
                }
            }
            Log.info((Object[])new Object[]{"Number of tests: " + count});
            Log.info((Object[])new Object[]{"Number of failed tests: " + failedcount});
            Log.info((Object[])new Object[]{"Mean. relative error: " + meanRelErr / (double)count});
            Log.info((Object[])new Object[]{"Max. relative error: " + PrettyPrint.formatPct((double)maxRelErr)});
            Assert.assertTrue((String)("Error too large: " + maxRelErr + " >= " + 0.02f), (maxRelErr < (double)0.02f ? 1 : 0) != 0);
            Assert.assertTrue((String)("Failed count too large: " + failedcount + " > " + 30.0f), ((float)failedcount <= 30.0f ? 1 : 0) != 0);
        }
        finally {
            if (tfr != null) {
                tfr.remove();
            }
        }
    }

    @Test
    public void checkDistributionGradients() {
        Random rng = new Random(912559L);
        for (DistributionFamily dist : new DistributionFamily[]{DistributionFamily.AUTO, DistributionFamily.gaussian, DistributionFamily.laplace, DistributionFamily.quantile, DistributionFamily.huber, DistributionFamily.gamma, DistributionFamily.poisson, DistributionFamily.tweedie, DistributionFamily.bernoulli}) {
            DeepLearningModel.DeepLearningParameters p = new DeepLearningModel.DeepLearningParameters();
            p._distribution = dist;
            int N = 1000;
            double eps = 1.0 / (10.0 * (double)N);
            for (double y : new double[]{0.0, 1.0}) {
                for (int i = -5 * N; i < 5 * N; ++i) {
                    p._huber_alpha = rng.nextDouble() + 0.1;
                    p._tweedie_power = 1.01 + rng.nextDouble() * 0.9;
                    p._quantile_alpha = 0.05 + rng.nextDouble() * 0.9;
                    Distribution d = DistributionFactory.getDistribution((Model.Parameters)p);
                    double f = ((double)i + 0.5) / (double)N;
                    double grad = -2.0 * d.negHalfGradient(y, f);
                    double w = rng.nextDouble() * 10.0;
                    double approxgrad = (d.deviance(w, y, d.linkInv(f + eps)) - d.deviance(w, y, d.linkInv(f - eps))) / (2.0 * eps * w);
                    assert (Math.abs(grad - approxgrad) <= 1.0E-4);
                }
            }
        }
    }
}

