/*
 * Decompiled with CFR 0.152.
 */
package com.github.chen0040.si.testing;

import com.github.chen0040.si.exceptions.VariableWrongValueTypeException;
import com.github.chen0040.si.statistics.Sample;
import com.github.chen0040.si.statistics.SampleLinearRegression;
import org.apache.commons.math3.distribution.FDistribution;

public class Anova4Regression {
    private double ySST;
    private double ySSE;
    private double ySSG;
    private double yMSG;
    private double yMSE;
    private double dfT;
    private double dfG;
    private double dfE;
    private double pValue;
    private double F;

    public Anova4Regression(Sample sample) {
        if (!sample.containsTwoNumericalVariables()) {
            throw new VariableWrongValueTypeException("Sample 1 should contain numeric variable x and y");
        }
        SampleLinearRegression regression = new SampleLinearRegression(sample);
        this.run(regression, sample);
    }

    public Anova4Regression run(SampleLinearRegression regression, Sample sample) {
        double yBar = regression.getYBar();
        double b_0 = regression.getIntercept();
        double b_1 = regression.getSlope();
        this.ySST = sample.getObservations().stream().map(o -> Math.pow(o.getY() - yBar, 2.0)).reduce((a, b) -> a + b).get();
        this.ySSE = sample.getObservations().stream().map(o -> Math.pow(o.getY() - o.getX() * b_1 + b_0, 2.0)).reduce((a, b) -> a + b).get();
        this.ySSG = this.ySST - this.ySSE;
        this.dfT = sample.countByGroupId(null) - 1;
        this.dfG = 1.0;
        this.dfE = this.dfT - this.dfG;
        this.yMSG = this.ySSG / this.dfG;
        this.yMSE = this.ySSE / this.dfE;
        FDistribution distribution = new FDistribution(this.dfG, this.dfE);
        this.F = this.yMSG / this.yMSE;
        this.pValue = distribution.cumulativeProbability(this.F);
        return this;
    }

    public String getSummary() {
        StringBuilder sb = new StringBuilder();
        sb.append("null hypothesis: numerical variable y (response) is independent of the numerical variable x (explanatory)");
        sb.append("alternative hypothesis: numerical variable y is correlated to the numerical variable x");
        sb.append("SST (sum of squares total): ").append(this.ySST);
        sb.append("\nSSG (sum of squares group): ").append(this.ySSG);
        sb.append("\nSSE (sum of squares error): ").append(this.ySSE);
        sb.append("\ndf (total): ").append(this.dfT);
        sb.append("\ndf (group): ").append(this.dfG);
        sb.append("\ndf (error): ").append(this.dfE);
        sb.append("\nMSG (mean squares group): ").append(this.yMSG);
        sb.append("\nMSG (mean squares error): ").append(this.yMSE);
        sb.append("\nF-statistic: ").append(this.F);
        sb.append("\np-value: ").append(this.pValue);
        double significanceLevel = 0.001;
        boolean rejectH0 = this.pValue < significanceLevel;
        sb.append("\nIf the significance level is ").append(significanceLevel).append(", the null hypothesis is ").append(rejectH0 ? "rejected as p-value is smaller than that" : "failed to be rejected");
        return sb.toString();
    }

    public String toString() {
        return this.getSummary();
    }

    public void report() {
        System.out.println(this.toString());
    }

    public boolean willRejectH0(double significanceLevel) {
        return this.pValue < significanceLevel;
    }
}

