001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017 package org.apache.commons.math3.stat.regression;
018
019 import java.io.Serializable;
020 import java.util.Arrays;
021 import org.apache.commons.math3.util.FastMath;
022 import org.apache.commons.math3.util.MathArrays;
023 import org.apache.commons.math3.exception.OutOfRangeException;
024
025 /**
026 * Results of a Multiple Linear Regression model fit.
027 *
028 * @version $Id: RegressionResults.java 1392342 2012-10-01 14:08:52Z psteitz $
029 * @since 3.0
030 */
031 public class RegressionResults implements Serializable {
032
033 /** INDEX of Sum of Squared Errors */
034 private static final int SSE_IDX = 0;
035 /** INDEX of Sum of Squares of Model */
036 private static final int SST_IDX = 1;
037 /** INDEX of R-Squared of regression */
038 private static final int RSQ_IDX = 2;
039 /** INDEX of Mean Squared Error */
040 private static final int MSE_IDX = 3;
041 /** INDEX of Adjusted R Squared */
042 private static final int ADJRSQ_IDX = 4;
043 /** UID */
044 private static final long serialVersionUID = 1l;
045 /** regression slope parameters */
046 private final double[] parameters;
047 /** variance covariance matrix of parameters */
048 private final double[][] varCovData;
049 /** boolean flag for variance covariance matrix in symm compressed storage */
050 private final boolean isSymmetricVCD;
051 /** rank of the solution */
052 @SuppressWarnings("unused")
053 private final int rank;
054 /** number of observations on which results are based */
055 private final long nobs;
056 /** boolean flag indicator of whether a constant was included*/
057 private final boolean containsConstant;
058 /** array storing global results, SSE, MSE, RSQ, adjRSQ */
059 private final double[] globalFitInfo;
060
061 /**
062 * Set the default constructor to private access
063 * to prevent inadvertent instantiation
064 */
065 @SuppressWarnings("unused")
066 private RegressionResults() {
067 this.parameters = null;
068 this.varCovData = null;
069 this.rank = -1;
070 this.nobs = -1;
071 this.containsConstant = false;
072 this.isSymmetricVCD = false;
073 this.globalFitInfo = null;
074 }
075
076 /**
077 * Constructor for Regression Results.
078 *
079 * @param parameters a double array with the regression slope estimates
080 * @param varcov the variance covariance matrix, stored either in a square matrix
081 * or as a compressed
082 * @param isSymmetricCompressed a flag which denotes that the variance covariance
083 * matrix is in symmetric compressed format
084 * @param nobs the number of observations of the regression estimation
085 * @param rank the number of independent variables in the regression
086 * @param sumy the sum of the independent variable
087 * @param sumysq the sum of the squared independent variable
088 * @param sse sum of squared errors
089 * @param containsConstant true model has constant, false model does not have constant
090 * @param copyData if true a deep copy of all input data is made, if false only references
091 * are copied and the RegressionResults become mutable
092 */
093 public RegressionResults(
094 final double[] parameters, final double[][] varcov,
095 final boolean isSymmetricCompressed,
096 final long nobs, final int rank,
097 final double sumy, final double sumysq, final double sse,
098 final boolean containsConstant,
099 final boolean copyData) {
100 if (copyData) {
101 this.parameters = MathArrays.copyOf(parameters);
102 this.varCovData = new double[varcov.length][];
103 for (int i = 0; i < varcov.length; i++) {
104 this.varCovData[i] = MathArrays.copyOf(varcov[i]);
105 }
106 } else {
107 this.parameters = parameters;
108 this.varCovData = varcov;
109 }
110 this.isSymmetricVCD = isSymmetricCompressed;
111 this.nobs = nobs;
112 this.rank = rank;
113 this.containsConstant = containsConstant;
114 this.globalFitInfo = new double[5];
115 Arrays.fill(this.globalFitInfo, Double.NaN);
116
117 if (rank > 0) {
118 this.globalFitInfo[SST_IDX] = containsConstant ?
119 (sumysq - sumy * sumy / nobs) : sumysq;
120 }
121
122 this.globalFitInfo[SSE_IDX] = sse;
123 this.globalFitInfo[MSE_IDX] = this.globalFitInfo[SSE_IDX] /
124 (nobs - rank);
125 this.globalFitInfo[RSQ_IDX] = 1.0 -
126 this.globalFitInfo[SSE_IDX] /
127 this.globalFitInfo[SST_IDX];
128
129 if (!containsConstant) {
130 this.globalFitInfo[ADJRSQ_IDX] = 1.0-
131 (1.0 - this.globalFitInfo[RSQ_IDX]) *
132 ( (double) nobs / ( (double) (nobs - rank)));
133 } else {
134 this.globalFitInfo[ADJRSQ_IDX] = 1.0 - (sse * (nobs - 1.0)) /
135 (globalFitInfo[SST_IDX] * (nobs - rank));
136 }
137 }
138
139 /**
140 * <p>Returns the parameter estimate for the regressor at the given index.</p>
141 *
142 * <p>A redundant regressor will have its redundancy flag set, as well as
143 * a parameters estimated equal to {@code Double.NaN}</p>
144 *
145 * @param index Index.
146 * @return the parameters estimated for regressor at index.
147 * @throws OutOfRangeException if {@code index} is not in the interval
148 * {@code [0, number of parameters)}.
149 */
150 public double getParameterEstimate(int index) throws OutOfRangeException {
151 if (parameters == null) {
152 return Double.NaN;
153 }
154 if (index < 0 || index >= this.parameters.length) {
155 throw new OutOfRangeException(index, 0, this.parameters.length - 1);
156 }
157 return this.parameters[index];
158 }
159
160 /**
161 * <p>Returns a copy of the regression parameters estimates.</p>
162 *
163 * <p>The parameter estimates are returned in the natural order of the data.</p>
164 *
165 * <p>A redundant regressor will have its redundancy flag set, as will
166 * a parameter estimate equal to {@code Double.NaN}.</p>
167 *
168 * @return array of parameter estimates, null if no estimation occurred
169 */
170 public double[] getParameterEstimates() {
171 if (this.parameters == null) {
172 return null;
173 }
174 return MathArrays.copyOf(parameters);
175 }
176
177 /**
178 * Returns the <a href="http://www.xycoon.com/standerrorb(1).htm">standard
179 * error of the parameter estimate at index</a>,
180 * usually denoted s(b<sub>index</sub>).
181 *
182 * @param index Index.
183 * @return the standard errors associated with parameters estimated at index.
184 * @throws OutOfRangeException if {@code index} is not in the interval
185 * {@code [0, number of parameters)}.
186 */
187 public double getStdErrorOfEstimate(int index) throws OutOfRangeException {
188 if (parameters == null) {
189 return Double.NaN;
190 }
191 if (index < 0 || index >= this.parameters.length) {
192 throw new OutOfRangeException(index, 0, this.parameters.length - 1);
193 }
194 double var = this.getVcvElement(index, index);
195 if (!Double.isNaN(var) && var > Double.MIN_VALUE) {
196 return FastMath.sqrt(var);
197 }
198 return Double.NaN;
199 }
200
201 /**
202 * <p>Returns the <a href="http://www.xycoon.com/standerrorb(1).htm">standard
203 * error of the parameter estimates</a>,
204 * usually denoted s(b<sub>i</sub>).</p>
205 *
206 * <p>If there are problems with an ill conditioned design matrix then the regressor
207 * which is redundant will be assigned <code>Double.NaN</code>. </p>
208 *
209 * @return an array standard errors associated with parameters estimates,
210 * null if no estimation occurred
211 */
212 public double[] getStdErrorOfEstimates() {
213 if (parameters == null) {
214 return null;
215 }
216 double[] se = new double[this.parameters.length];
217 for (int i = 0; i < this.parameters.length; i++) {
218 double var = this.getVcvElement(i, i);
219 if (!Double.isNaN(var) && var > Double.MIN_VALUE) {
220 se[i] = FastMath.sqrt(var);
221 continue;
222 }
223 se[i] = Double.NaN;
224 }
225 return se;
226 }
227
228 /**
229 * <p>Returns the covariance between regression parameters i and j.</p>
230 *
231 * <p>If there are problems with an ill conditioned design matrix then the covariance
232 * which involves redundant columns will be assigned {@code Double.NaN}. </p>
233 *
234 * @param i {@code i}th regression parameter.
235 * @param j {@code j}th regression parameter.
236 * @return the covariance of the parameter estimates.
237 * @throws OutOfRangeException if {@code i} or {@code j} is not in the
238 * interval {@code [0, number of parameters)}.
239 */
240 public double getCovarianceOfParameters(int i, int j) throws OutOfRangeException {
241 if (parameters == null) {
242 return Double.NaN;
243 }
244 if (i < 0 || i >= this.parameters.length) {
245 throw new OutOfRangeException(i, 0, this.parameters.length - 1);
246 }
247 if (j < 0 || j >= this.parameters.length) {
248 throw new OutOfRangeException(j, 0, this.parameters.length - 1);
249 }
250 return this.getVcvElement(i, j);
251 }
252
253 /**
254 * <p>Returns the number of parameters estimated in the model.</p>
255 *
256 * <p>This is the maximum number of regressors, some techniques may drop
257 * redundant parameters</p>
258 *
259 * @return number of regressors, -1 if not estimated
260 */
261 public int getNumberOfParameters() {
262 if (this.parameters == null) {
263 return -1;
264 }
265 return this.parameters.length;
266 }
267
268 /**
269 * Returns the number of observations added to the regression model.
270 *
271 * @return Number of observations, -1 if an error condition prevents estimation
272 */
273 public long getN() {
274 return this.nobs;
275 }
276
277 /**
278 * <p>Returns the sum of squared deviations of the y values about their mean.</p>
279 *
280 * <p>This is defined as SSTO
281 * <a href="http://www.xycoon.com/SumOfSquares.htm">here</a>.</p>
282 *
283 * <p>If {@code n < 2}, this returns {@code Double.NaN}.</p>
284 *
285 * @return sum of squared deviations of y values
286 */
287 public double getTotalSumSquares() {
288 return this.globalFitInfo[SST_IDX];
289 }
290
291 /**
292 * <p>Returns the sum of squared deviations of the predicted y values about
293 * their mean (which equals the mean of y).</p>
294 *
295 * <p>This is usually abbreviated SSR or SSM. It is defined as SSM
296 * <a href="http://www.xycoon.com/SumOfSquares.htm">here</a></p>
297 *
298 * <p><strong>Preconditions</strong>: <ul>
299 * <li>At least two observations (with at least two different x values)
300 * must have been added before invoking this method. If this method is
301 * invoked before a model can be estimated, <code>Double.NaN</code> is
302 * returned.
303 * </li></ul></p>
304 *
305 * @return sum of squared deviations of predicted y values
306 */
307 public double getRegressionSumSquares() {
308 return this.globalFitInfo[SST_IDX] - this.globalFitInfo[SSE_IDX];
309 }
310
311 /**
312 * <p>Returns the <a href="http://www.xycoon.com/SumOfSquares.htm">
313 * sum of squared errors</a> (SSE) associated with the regression
314 * model.</p>
315 *
316 * <p>The return value is constrained to be non-negative - i.e., if due to
317 * rounding errors the computational formula returns a negative result,
318 * 0 is returned.</p>
319 *
320 * <p><strong>Preconditions</strong>: <ul>
321 * <li>numberOfParameters data pairs
322 * must have been added before invoking this method. If this method is
323 * invoked before a model can be estimated, <code>Double,NaN</code> is
324 * returned.
325 * </li></ul></p>
326 *
327 * @return sum of squared errors associated with the regression model
328 */
329 public double getErrorSumSquares() {
330 return this.globalFitInfo[ SSE_IDX];
331 }
332
333 /**
334 * <p>Returns the sum of squared errors divided by the degrees of freedom,
335 * usually abbreviated MSE.</p>
336 *
337 * <p>If there are fewer than <strong>numberOfParameters + 1</strong> data pairs in the model,
338 * or if there is no variation in <code>x</code>, this returns
339 * <code>Double.NaN</code>.</p>
340 *
341 * @return sum of squared deviations of y values
342 */
343 public double getMeanSquareError() {
344 return this.globalFitInfo[ MSE_IDX];
345 }
346
347 /**
348 * <p>Returns the <a href="http://www.xycoon.com/coefficient1.htm">
349 * coefficient of multiple determination</a>,
350 * usually denoted r-square.</p>
351 *
352 * <p><strong>Preconditions</strong>: <ul>
353 * <li>At least numberOfParameters observations (with at least numberOfParameters different x values)
354 * must have been added before invoking this method. If this method is
355 * invoked before a model can be estimated, {@code Double,NaN} is
356 * returned.
357 * </li></ul></p>
358 *
359 * @return r-square, a double in the interval [0, 1]
360 */
361 public double getRSquared() {
362 return this.globalFitInfo[ RSQ_IDX];
363 }
364
365 /**
366 * <p>Returns the adjusted R-squared statistic, defined by the formula <pre>
367 * R<sup>2</sup><sub>adj</sub> = 1 - [SSR (n - 1)] / [SSTO (n - p)]
368 * </pre>
369 * where SSR is the sum of squared residuals},
370 * SSTO is the total sum of squares}, n is the number
371 * of observations and p is the number of parameters estimated (including the intercept).</p>
372 *
373 * <p>If the regression is estimated without an intercept term, what is returned is <pre>
374 * <code> 1 - (1 - {@link #getRSquared()} ) * (n / (n - p)) </code>
375 * </pre></p>
376 *
377 * @return adjusted R-Squared statistic
378 */
379 public double getAdjustedRSquared() {
380 return this.globalFitInfo[ ADJRSQ_IDX];
381 }
382
383 /**
384 * Returns true if the regression model has been computed including an intercept.
385 * In this case, the coefficient of the intercept is the first element of the
386 * {@link #getParameterEstimates() parameter estimates}.
387 * @return true if the model has an intercept term
388 */
389 public boolean hasIntercept() {
390 return this.containsConstant;
391 }
392
393 /**
394 * Gets the i-jth element of the variance-covariance matrix.
395 *
396 * @param i first variable index
397 * @param j second variable index
398 * @return the requested variance-covariance matrix entry
399 */
400 private double getVcvElement(int i, int j) {
401 if (this.isSymmetricVCD) {
402 if (this.varCovData.length > 1) {
403 //could be stored in upper or lower triangular
404 if (i == j) {
405 return varCovData[i][i];
406 } else if (i >= varCovData[j].length) {
407 return varCovData[i][j];
408 } else {
409 return varCovData[j][i];
410 }
411 } else {//could be in single array
412 if (i > j) {
413 return varCovData[0][(i + 1) * i / 2 + j];
414 } else {
415 return varCovData[0][(j + 1) * j / 2 + i];
416 }
417 }
418 } else {
419 return this.varCovData[i][j];
420 }
421 }
422 }