001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017 package org.apache.commons.math3.stat.regression;
018
019 import java.util.Arrays;
020 import org.apache.commons.math3.exception.util.LocalizedFormats;
021 import org.apache.commons.math3.util.FastMath;
022 import org.apache.commons.math3.util.Precision;
023 import org.apache.commons.math3.util.MathArrays;
024
025 /**
026 * This class is a concrete implementation of the {@link UpdatingMultipleLinearRegression} interface.
027 *
028 * <p>The algorithm is described in: <pre>
029 * Algorithm AS 274: Least Squares Routines to Supplement Those of Gentleman
030 * Author(s): Alan J. Miller
031 * Source: Journal of the Royal Statistical Society.
032 * Series C (Applied Statistics), Vol. 41, No. 2
033 * (1992), pp. 458-478
034 * Published by: Blackwell Publishing for the Royal Statistical Society
035 * Stable URL: http://www.jstor.org/stable/2347583 </pre></p>
036 *
037 * <p>This method for multiple regression forms the solution to the OLS problem
038 * by updating the QR decomposition as described by Gentleman.</p>
039 *
040 * @version $Id: MillerUpdatingRegression.java 1392358 2012-10-01 14:41:55Z psteitz $
041 * @since 3.0
042 */
043 public class MillerUpdatingRegression implements UpdatingMultipleLinearRegression {
044
045 /** number of variables in regression */
046 private final int nvars;
047 /** diagonals of cross products matrix */
048 private final double[] d;
049 /** the elements of the R`Y */
050 private final double[] rhs;
051 /** the off diagonal portion of the R matrix */
052 private final double[] r;
053 /** the tolerance for each of the variables */
054 private final double[] tol;
055 /** residual sum of squares for all nested regressions */
056 private final double[] rss;
057 /** order of the regressors */
058 private final int[] vorder;
059 /** scratch space for tolerance calc */
060 private final double[] work_tolset;
061 /** number of observations entered */
062 private long nobs = 0;
063 /** sum of squared errors of largest regression */
064 private double sserr = 0.0;
065 /** has rss been called? */
066 private boolean rss_set = false;
067 /** has the tolerance setting method been called */
068 private boolean tol_set = false;
069 /** flags for variables with linear dependency problems */
070 private final boolean[] lindep;
071 /** singular x values */
072 private final double[] x_sing;
073 /** workspace for singularity method */
074 private final double[] work_sing;
075 /** summation of Y variable */
076 private double sumy = 0.0;
077 /** summation of squared Y values */
078 private double sumsqy = 0.0;
079 /** boolean flag whether a regression constant is added */
080 private boolean hasIntercept;
081 /** zero tolerance */
082 private final double epsilon;
083 /**
084 * Set the default constructor to private access
085 * to prevent inadvertent instantiation
086 */
087 @SuppressWarnings("unused")
088 private MillerUpdatingRegression() {
089 this(-1, false, Double.NaN);
090 }
091
092 /**
093 * This is the augmented constructor for the MillerUpdatingRegression class.
094 *
095 * @param numberOfVariables number of regressors to expect, not including constant
096 * @param includeConstant include a constant automatically
097 * @param errorTolerance zero tolerance, how machine zero is determined
098 * @throws ModelSpecificationException if {@code numberOfVariables is less than 1}
099 */
100 public MillerUpdatingRegression(int numberOfVariables, boolean includeConstant, double errorTolerance)
101 throws ModelSpecificationException {
102 if (numberOfVariables < 1) {
103 throw new ModelSpecificationException(LocalizedFormats.NO_REGRESSORS);
104 }
105 if (includeConstant) {
106 this.nvars = numberOfVariables + 1;
107 } else {
108 this.nvars = numberOfVariables;
109 }
110 this.hasIntercept = includeConstant;
111 this.nobs = 0;
112 this.d = new double[this.nvars];
113 this.rhs = new double[this.nvars];
114 this.r = new double[this.nvars * (this.nvars - 1) / 2];
115 this.tol = new double[this.nvars];
116 this.rss = new double[this.nvars];
117 this.vorder = new int[this.nvars];
118 this.x_sing = new double[this.nvars];
119 this.work_sing = new double[this.nvars];
120 this.work_tolset = new double[this.nvars];
121 this.lindep = new boolean[this.nvars];
122 for (int i = 0; i < this.nvars; i++) {
123 vorder[i] = i;
124 }
125 if (errorTolerance > 0) {
126 this.epsilon = errorTolerance;
127 } else {
128 this.epsilon = -errorTolerance;
129 }
130 }
131
132 /**
133 * Primary constructor for the MillerUpdatingRegression.
134 *
135 * @param numberOfVariables maximum number of potential regressors
136 * @param includeConstant include a constant automatically
137 * @throws ModelSpecificationException if {@code numberOfVariables is less than 1}
138 */
139 public MillerUpdatingRegression(int numberOfVariables, boolean includeConstant)
140 throws ModelSpecificationException {
141 this(numberOfVariables, includeConstant, Precision.EPSILON);
142 }
143
144 /**
145 * A getter method which determines whether a constant is included.
146 * @return true regression has an intercept, false no intercept
147 */
148 public boolean hasIntercept() {
149 return this.hasIntercept;
150 }
151
152 /**
153 * Gets the number of observations added to the regression model.
154 * @return number of observations
155 */
156 public long getN() {
157 return this.nobs;
158 }
159
160 /**
161 * Adds an observation to the regression model.
162 * @param x the array with regressor values
163 * @param y the value of dependent variable given these regressors
164 * @exception ModelSpecificationException if the length of {@code x} does not equal
165 * the number of independent variables in the model
166 */
167 public void addObservation(final double[] x, final double y)
168 throws ModelSpecificationException {
169
170 if ((!this.hasIntercept && x.length != nvars) ||
171 (this.hasIntercept && x.length + 1 != nvars)) {
172 throw new ModelSpecificationException(LocalizedFormats.INVALID_REGRESSION_OBSERVATION,
173 x.length, nvars);
174 }
175 if (!this.hasIntercept) {
176 include(MathArrays.copyOf(x, x.length), 1.0, y);
177 } else {
178 final double[] tmp = new double[x.length + 1];
179 System.arraycopy(x, 0, tmp, 1, x.length);
180 tmp[0] = 1.0;
181 include(tmp, 1.0, y);
182 }
183 ++nobs;
184
185 }
186
187 /**
188 * Adds multiple observations to the model.
189 * @param x observations on the regressors
190 * @param y observations on the regressand
191 * @throws ModelSpecificationException if {@code x} is not rectangular, does not match
192 * the length of {@code y} or does not contain sufficient data to estimate the model
193 */
194 public void addObservations(double[][] x, double[] y) throws ModelSpecificationException {
195 if ((x == null) || (y == null) || (x.length != y.length)) {
196 throw new ModelSpecificationException(
197 LocalizedFormats.DIMENSIONS_MISMATCH_SIMPLE,
198 (x == null) ? 0 : x.length,
199 (y == null) ? 0 : y.length);
200 }
201 if (x.length == 0) { // Must be no y data either
202 throw new ModelSpecificationException(
203 LocalizedFormats.NO_DATA);
204 }
205 if (x[0].length + 1 > x.length) {
206 throw new ModelSpecificationException(
207 LocalizedFormats.NOT_ENOUGH_DATA_FOR_NUMBER_OF_PREDICTORS,
208 x.length, x[0].length);
209 }
210 for (int i = 0; i < x.length; i++) {
211 addObservation(x[i], y[i]);
212 }
213 }
214
215 /**
216 * The include method is where the QR decomposition occurs. This statement forms all
217 * intermediate data which will be used for all derivative measures.
218 * According to the miller paper, note that in the original implementation the x vector
219 * is overwritten. In this implementation, the include method is passed a copy of the
220 * original data vector so that there is no contamination of the data. Additionally,
221 * this method differs slightly from Gentleman's method, in that the assumption is
222 * of dense design matrices, there is some advantage in using the original gentleman algorithm
223 * on sparse matrices.
224 *
225 * @param x observations on the regressors
226 * @param wi weight of the this observation (-1,1)
227 * @param yi observation on the regressand
228 */
229 private void include(final double[] x, final double wi, final double yi) {
230 int nextr = 0;
231 double w = wi;
232 double y = yi;
233 double xi;
234 double di;
235 double wxi;
236 double dpi;
237 double xk;
238 double _w;
239 this.rss_set = false;
240 sumy = smartAdd(yi, sumy);
241 sumsqy = smartAdd(sumsqy, yi * yi);
242 for (int i = 0; i < x.length; i++) {
243 if (w == 0.0) {
244 return;
245 }
246 xi = x[i];
247
248 if (xi == 0.0) {
249 nextr += nvars - i - 1;
250 continue;
251 }
252 di = d[i];
253 wxi = w * xi;
254 _w = w;
255 if (di != 0.0) {
256 dpi = smartAdd(di, wxi * xi);
257 final double tmp = wxi * xi / di;
258 if (FastMath.abs(tmp) > Precision.EPSILON) {
259 w = (di * w) / dpi;
260 }
261 } else {
262 dpi = wxi * xi;
263 w = 0.0;
264 }
265 d[i] = dpi;
266 for (int k = i + 1; k < nvars; k++) {
267 xk = x[k];
268 x[k] = smartAdd(xk, -xi * r[nextr]);
269 if (di != 0.0) {
270 r[nextr] = smartAdd(di * r[nextr], (_w * xi) * xk) / dpi;
271 } else {
272 r[nextr] = xk / xi;
273 }
274 ++nextr;
275 }
276 xk = y;
277 y = smartAdd(xk, -xi * rhs[i]);
278 if (di != 0.0) {
279 rhs[i] = smartAdd(di * rhs[i], wxi * xk) / dpi;
280 } else {
281 rhs[i] = xk / xi;
282 }
283 }
284 sserr = smartAdd(sserr, w * y * y);
285 }
286
287 /**
288 * Adds to number a and b such that the contamination due to
289 * numerical smallness of one addend does not corrupt the sum.
290 * @param a - an addend
291 * @param b - an addend
292 * @return the sum of the a and b
293 */
294 private double smartAdd(double a, double b) {
295 final double _a = FastMath.abs(a);
296 final double _b = FastMath.abs(b);
297 if (_a > _b) {
298 final double eps = _a * Precision.EPSILON;
299 if (_b > eps) {
300 return a + b;
301 }
302 return a;
303 } else {
304 final double eps = _b * Precision.EPSILON;
305 if (_a > eps) {
306 return a + b;
307 }
308 return b;
309 }
310 }
311
312 /**
313 * As the name suggests, clear wipes the internals and reorders everything in the
314 * canonical order.
315 */
316 public void clear() {
317 Arrays.fill(this.d, 0.0);
318 Arrays.fill(this.rhs, 0.0);
319 Arrays.fill(this.r, 0.0);
320 Arrays.fill(this.tol, 0.0);
321 Arrays.fill(this.rss, 0.0);
322 Arrays.fill(this.work_tolset, 0.0);
323 Arrays.fill(this.work_sing, 0.0);
324 Arrays.fill(this.x_sing, 0.0);
325 Arrays.fill(this.lindep, false);
326 for (int i = 0; i < nvars; i++) {
327 this.vorder[i] = i;
328 }
329 this.nobs = 0;
330 this.sserr = 0.0;
331 this.sumy = 0.0;
332 this.sumsqy = 0.0;
333 this.rss_set = false;
334 this.tol_set = false;
335 }
336
337 /**
338 * This sets up tolerances for singularity testing.
339 */
340 private void tolset() {
341 int pos;
342 double total;
343 final double eps = this.epsilon;
344 for (int i = 0; i < nvars; i++) {
345 this.work_tolset[i] = Math.sqrt(d[i]);
346 }
347 tol[0] = eps * this.work_tolset[0];
348 for (int col = 1; col < nvars; col++) {
349 pos = col - 1;
350 total = work_tolset[col];
351 for (int row = 0; row < col; row++) {
352 total += Math.abs(r[pos]) * work_tolset[row];
353 pos += nvars - row - 2;
354 }
355 tol[col] = eps * total;
356 }
357 tol_set = true;
358 }
359
360 /**
361 * The regcf method conducts the linear regression and extracts the
362 * parameter vector. Notice that the algorithm can do subset regression
363 * with no alteration.
364 *
365 * @param nreq how many of the regressors to include (either in canonical
366 * order, or in the current reordered state)
367 * @return an array with the estimated slope coefficients
368 * @throws ModelSpecificationException if {@code nreq} is less than 1
369 * or greater than the number of independent variables
370 */
371 private double[] regcf(int nreq) throws ModelSpecificationException {
372 int nextr;
373 if (nreq < 1) {
374 throw new ModelSpecificationException(LocalizedFormats.NO_REGRESSORS);
375 }
376 if (nreq > this.nvars) {
377 throw new ModelSpecificationException(
378 LocalizedFormats.TOO_MANY_REGRESSORS, nreq, this.nvars);
379 }
380 if (!this.tol_set) {
381 tolset();
382 }
383 final double[] ret = new double[nreq];
384 boolean rankProblem = false;
385 for (int i = nreq - 1; i > -1; i--) {
386 if (Math.sqrt(d[i]) < tol[i]) {
387 ret[i] = 0.0;
388 d[i] = 0.0;
389 rankProblem = true;
390 } else {
391 ret[i] = rhs[i];
392 nextr = i * (nvars + nvars - i - 1) / 2;
393 for (int j = i + 1; j < nreq; j++) {
394 ret[i] = smartAdd(ret[i], -r[nextr] * ret[j]);
395 ++nextr;
396 }
397 }
398 }
399 if (rankProblem) {
400 for (int i = 0; i < nreq; i++) {
401 if (this.lindep[i]) {
402 ret[i] = Double.NaN;
403 }
404 }
405 }
406 return ret;
407 }
408
409 /**
410 * The method which checks for singularities and then eliminates the offending
411 * columns.
412 */
413 private void singcheck() {
414 int pos;
415 for (int i = 0; i < nvars; i++) {
416 work_sing[i] = Math.sqrt(d[i]);
417 }
418 for (int col = 0; col < nvars; col++) {
419 // Set elements within R to zero if they are less than tol(col) in
420 // absolute value after being scaled by the square root of their row
421 // multiplier
422 final double temp = tol[col];
423 pos = col - 1;
424 for (int row = 0; row < col - 1; row++) {
425 if (Math.abs(r[pos]) * work_sing[row] < temp) {
426 r[pos] = 0.0;
427 }
428 pos += nvars - row - 2;
429 }
430 // If diagonal element is near zero, set it to zero, set appropriate
431 // element of LINDEP, and use INCLUD to augment the projections in
432 // the lower rows of the orthogonalization.
433 lindep[col] = false;
434 if (work_sing[col] < temp) {
435 lindep[col] = true;
436 if (col < nvars - 1) {
437 Arrays.fill(x_sing, 0.0);
438 int _pi = col * (nvars + nvars - col - 1) / 2;
439 for (int _xi = col + 1; _xi < nvars; _xi++, _pi++) {
440 x_sing[_xi] = r[_pi];
441 r[_pi] = 0.0;
442 }
443 final double y = rhs[col];
444 final double weight = d[col];
445 d[col] = 0.0;
446 rhs[col] = 0.0;
447 this.include(x_sing, weight, y);
448 } else {
449 sserr += d[col] * rhs[col] * rhs[col];
450 }
451 }
452 }
453 }
454
455 /**
456 * Calculates the sum of squared errors for the full regression
457 * and all subsets in the following manner: <pre>
458 * rss[] ={
459 * ResidualSumOfSquares_allNvars,
460 * ResidualSumOfSquares_FirstNvars-1,
461 * ResidualSumOfSquares_FirstNvars-2,
462 * ..., ResidualSumOfSquares_FirstVariable} </pre>
463 */
464 private void ss() {
465 double total = sserr;
466 rss[nvars - 1] = sserr;
467 for (int i = nvars - 1; i > 0; i--) {
468 total += d[i] * rhs[i] * rhs[i];
469 rss[i - 1] = total;
470 }
471 rss_set = true;
472 }
473
474 /**
475 * Calculates the cov matrix assuming only the first nreq variables are
476 * included in the calculation. The returned array contains a symmetric
477 * matrix stored in lower triangular form. The matrix will have
478 * ( nreq + 1 ) * nreq / 2 elements. For illustration <pre>
479 * cov =
480 * {
481 * cov_00,
482 * cov_10, cov_11,
483 * cov_20, cov_21, cov22,
484 * ...
485 * } </pre>
486 *
487 * @param nreq how many of the regressors to include (either in canonical
488 * order, or in the current reordered state)
489 * @return an array with the variance covariance of the included
490 * regressors in lower triangular form
491 */
492 private double[] cov(int nreq) {
493 if (this.nobs <= nreq) {
494 return null;
495 }
496 double rnk = 0.0;
497 for (int i = 0; i < nreq; i++) {
498 if (!this.lindep[i]) {
499 rnk += 1.0;
500 }
501 }
502 final double var = rss[nreq - 1] / (nobs - rnk);
503 final double[] rinv = new double[nreq * (nreq - 1) / 2];
504 inverse(rinv, nreq);
505 final double[] covmat = new double[nreq * (nreq + 1) / 2];
506 Arrays.fill(covmat, Double.NaN);
507 int pos2;
508 int pos1;
509 int start = 0;
510 double total = 0;
511 for (int row = 0; row < nreq; row++) {
512 pos2 = start;
513 if (!this.lindep[row]) {
514 for (int col = row; col < nreq; col++) {
515 if (!this.lindep[col]) {
516 pos1 = start + col - row;
517 if (row == col) {
518 total = 1.0 / d[col];
519 } else {
520 total = rinv[pos1 - 1] / d[col];
521 }
522 for (int k = col + 1; k < nreq; k++) {
523 if (!this.lindep[k]) {
524 total += rinv[pos1] * rinv[pos2] / d[k];
525 }
526 ++pos1;
527 ++pos2;
528 }
529 covmat[ (col + 1) * col / 2 + row] = total * var;
530 } else {
531 pos2 += nreq - col - 1;
532 }
533 }
534 }
535 start += nreq - row - 1;
536 }
537 return covmat;
538 }
539
540 /**
541 * This internal method calculates the inverse of the upper-triangular portion
542 * of the R matrix.
543 * @param rinv the storage for the inverse of r
544 * @param nreq how many of the regressors to include (either in canonical
545 * order, or in the current reordered state)
546 */
547 private void inverse(double[] rinv, int nreq) {
548 int pos = nreq * (nreq - 1) / 2 - 1;
549 int pos1 = -1;
550 int pos2 = -1;
551 double total = 0.0;
552 Arrays.fill(rinv, Double.NaN);
553 for (int row = nreq - 1; row > 0; --row) {
554 if (!this.lindep[row]) {
555 final int start = (row - 1) * (nvars + nvars - row) / 2;
556 for (int col = nreq; col > row; --col) {
557 pos1 = start;
558 pos2 = pos;
559 total = 0.0;
560 for (int k = row; k < col - 1; k++) {
561 pos2 += nreq - k - 1;
562 if (!this.lindep[k]) {
563 total += -r[pos1] * rinv[pos2];
564 }
565 ++pos1;
566 }
567 rinv[pos] = total - r[pos1];
568 --pos;
569 }
570 } else {
571 pos -= nreq - row;
572 }
573 }
574 }
575
576 /**
577 * In the original algorithm only the partial correlations of the regressors
578 * is returned to the user. In this implementation, we have <pre>
579 * corr =
580 * {
581 * corrxx - lower triangular
582 * corrxy - bottom row of the matrix
583 * }
584 * Replaces subroutines PCORR and COR of:
585 * ALGORITHM AS274 APPL. STATIST. (1992) VOL.41, NO. 2 </pre>
586 *
587 * <p>Calculate partial correlations after the variables in rows
588 * 1, 2, ..., IN have been forced into the regression.
589 * If IN = 1, and the first row of R represents a constant in the
590 * model, then the usual simple correlations are returned.</p>
591 *
592 * <p>If IN = 0, the value returned in array CORMAT for the correlation
593 * of variables Xi & Xj is: <pre>
594 * sum ( Xi.Xj ) / Sqrt ( sum (Xi^2) . sum (Xj^2) )</pre></p>
595 *
596 * <p>On return, array CORMAT contains the upper triangle of the matrix of
597 * partial correlations stored by rows, excluding the 1's on the diagonal.
598 * e.g. if IN = 2, the consecutive elements returned are:
599 * (3,4) (3,5) ... (3,ncol), (4,5) (4,6) ... (4,ncol), etc.
600 * Array YCORR stores the partial correlations with the Y-variable
601 * starting with YCORR(IN+1) = partial correlation with the variable in
602 * position (IN+1). </p>
603 *
604 * @param in how many of the regressors to include (either in canonical
605 * order, or in the current reordered state)
606 * @return an array with the partial correlations of the remainder of
607 * regressors with each other and the regressand, in lower triangular form
608 */
609 public double[] getPartialCorrelations(int in) {
610 final double[] output = new double[(nvars - in + 1) * (nvars - in) / 2];
611 int pos;
612 int pos1;
613 int pos2;
614 final int rms_off = -in;
615 final int wrk_off = -(in + 1);
616 final double[] rms = new double[nvars - in];
617 final double[] work = new double[nvars - in - 1];
618 double sumxx;
619 double sumxy;
620 double sumyy;
621 final int offXX = (nvars - in) * (nvars - in - 1) / 2;
622 if (in < -1 || in >= nvars) {
623 return null;
624 }
625 final int nvm = nvars - 1;
626 final int base_pos = r.length - (nvm - in) * (nvm - in + 1) / 2;
627 if (d[in] > 0.0) {
628 rms[in + rms_off] = 1.0 / Math.sqrt(d[in]);
629 }
630 for (int col = in + 1; col < nvars; col++) {
631 pos = base_pos + col - 1 - in;
632 sumxx = d[col];
633 for (int row = in; row < col; row++) {
634 sumxx += d[row] * r[pos] * r[pos];
635 pos += nvars - row - 2;
636 }
637 if (sumxx > 0.0) {
638 rms[col + rms_off] = 1.0 / Math.sqrt(sumxx);
639 } else {
640 rms[col + rms_off] = 0.0;
641 }
642 }
643 sumyy = sserr;
644 for (int row = in; row < nvars; row++) {
645 sumyy += d[row] * rhs[row] * rhs[row];
646 }
647 if (sumyy > 0.0) {
648 sumyy = 1.0 / Math.sqrt(sumyy);
649 }
650 pos = 0;
651 for (int col1 = in; col1 < nvars; col1++) {
652 sumxy = 0.0;
653 Arrays.fill(work, 0.0);
654 pos1 = base_pos + col1 - in - 1;
655 for (int row = in; row < col1; row++) {
656 pos2 = pos1 + 1;
657 for (int col2 = col1 + 1; col2 < nvars; col2++) {
658 work[col2 + wrk_off] += d[row] * r[pos1] * r[pos2];
659 pos2++;
660 }
661 sumxy += d[row] * r[pos1] * rhs[row];
662 pos1 += nvars - row - 2;
663 }
664 pos2 = pos1 + 1;
665 for (int col2 = col1 + 1; col2 < nvars; col2++) {
666 work[col2 + wrk_off] += d[col1] * r[pos2];
667 ++pos2;
668 output[ (col2 - 1 - in) * (col2 - in) / 2 + col1 - in] =
669 work[col2 + wrk_off] * rms[col1 + rms_off] * rms[col2 + rms_off];
670 ++pos;
671 }
672 sumxy += d[col1] * rhs[col1];
673 output[col1 + rms_off + offXX] = sumxy * rms[col1 + rms_off] * sumyy;
674 }
675
676 return output;
677 }
678
679 /**
680 * ALGORITHM AS274 APPL. STATIST. (1992) VOL.41, NO. 2.
681 * Move variable from position FROM to position TO in an
682 * orthogonal reduction produced by AS75.1.
683 *
684 * @param from initial position
685 * @param to destination
686 */
687 private void vmove(int from, int to) {
688 double d1;
689 double d2;
690 double X;
691 double d1new;
692 double d2new;
693 double cbar;
694 double sbar;
695 double Y;
696 int first;
697 int inc;
698 int m1;
699 int m2;
700 int mp1;
701 int pos;
702 boolean bSkipTo40 = false;
703 if (from == to) {
704 return;
705 }
706 if (!this.rss_set) {
707 ss();
708 }
709 int count = 0;
710 if (from < to) {
711 first = from;
712 inc = 1;
713 count = to - from;
714 } else {
715 first = from - 1;
716 inc = -1;
717 count = from - to;
718 }
719
720 int m = first;
721 int idx = 0;
722 while (idx < count) {
723 m1 = m * (nvars + nvars - m - 1) / 2;
724 m2 = m1 + nvars - m - 1;
725 mp1 = m + 1;
726
727 d1 = d[m];
728 d2 = d[mp1];
729 // Special cases.
730 if (d1 > this.epsilon || d2 > this.epsilon) {
731 X = r[m1];
732 if (Math.abs(X) * Math.sqrt(d1) < tol[mp1]) {
733 X = 0.0;
734 }
735 if (d1 < this.epsilon || Math.abs(X) < this.epsilon) {
736 d[m] = d2;
737 d[mp1] = d1;
738 r[m1] = 0.0;
739 for (int col = m + 2; col < nvars; col++) {
740 ++m1;
741 X = r[m1];
742 r[m1] = r[m2];
743 r[m2] = X;
744 ++m2;
745 }
746 X = rhs[m];
747 rhs[m] = rhs[mp1];
748 rhs[mp1] = X;
749 bSkipTo40 = true;
750 //break;
751 } else if (d2 < this.epsilon) {
752 d[m] = d1 * X * X;
753 r[m1] = 1.0 / X;
754 for (int _i = m1 + 1; _i < m1 + nvars - m - 1; _i++) {
755 r[_i] /= X;
756 }
757 rhs[m] = rhs[m] / X;
758 bSkipTo40 = true;
759 //break;
760 }
761 if (!bSkipTo40) {
762 d1new = d2 + d1 * X * X;
763 cbar = d2 / d1new;
764 sbar = X * d1 / d1new;
765 d2new = d1 * cbar;
766 d[m] = d1new;
767 d[mp1] = d2new;
768 r[m1] = sbar;
769 for (int col = m + 2; col < nvars; col++) {
770 ++m1;
771 Y = r[m1];
772 r[m1] = cbar * r[m2] + sbar * Y;
773 r[m2] = Y - X * r[m2];
774 ++m2;
775 }
776 Y = rhs[m];
777 rhs[m] = cbar * rhs[mp1] + sbar * Y;
778 rhs[mp1] = Y - X * rhs[mp1];
779 }
780 }
781 if (m > 0) {
782 pos = m;
783 for (int row = 0; row < m; row++) {
784 X = r[pos];
785 r[pos] = r[pos - 1];
786 r[pos - 1] = X;
787 pos += nvars - row - 2;
788 }
789 }
790 // Adjust variable order (VORDER), the tolerances (TOL) and
791 // the vector of residual sums of squares (RSS).
792 m1 = vorder[m];
793 vorder[m] = vorder[mp1];
794 vorder[mp1] = m1;
795 X = tol[m];
796 tol[m] = tol[mp1];
797 tol[mp1] = X;
798 rss[m] = rss[mp1] + d[mp1] * rhs[mp1] * rhs[mp1];
799
800 m += inc;
801 ++idx;
802 }
803 }
804
805 /**
806 * ALGORITHM AS274 APPL. STATIST. (1992) VOL.41, NO. 2
807 *
808 * <p> Re-order the variables in an orthogonal reduction produced by
809 * AS75.1 so that the N variables in LIST start at position POS1,
810 * though will not necessarily be in the same order as in LIST.
811 * Any variables in VORDER before position POS1 are not moved.
812 * Auxiliary routine called: VMOVE. </p>
813 *
814 * <p>This internal method reorders the regressors.</p>
815 *
816 * @param list the regressors to move
817 * @param pos1 where the list will be placed
818 * @return -1 error, 0 everything ok
819 */
820 private int reorderRegressors(int[] list, int pos1) {
821 int next;
822 int i;
823 int l;
824 if (list.length < 1 || list.length > nvars + 1 - pos1) {
825 return -1;
826 }
827 next = pos1;
828 i = pos1;
829 while (i < nvars) {
830 l = vorder[i];
831 for (int j = 0; j < list.length; j++) {
832 if (l == list[j] && i > next) {
833 this.vmove(i, next);
834 ++next;
835 if (next >= list.length + pos1) {
836 return 0;
837 } else {
838 break;
839 }
840 }
841 }
842 ++i;
843 }
844 return 0;
845 }
846
847 /**
848 * Gets the diagonal of the Hat matrix also known as the leverage matrix.
849 *
850 * @param row_data returns the diagonal of the hat matrix for this observation
851 * @return the diagonal element of the hatmatrix
852 */
853 public double getDiagonalOfHatMatrix(double[] row_data) {
854 double[] wk = new double[this.nvars];
855 int pos;
856 double total;
857
858 if (row_data.length > nvars) {
859 return Double.NaN;
860 }
861 double[] xrow;
862 if (this.hasIntercept) {
863 xrow = new double[row_data.length + 1];
864 xrow[0] = 1.0;
865 System.arraycopy(row_data, 0, xrow, 1, row_data.length);
866 } else {
867 xrow = row_data;
868 }
869 double hii = 0.0;
870 for (int col = 0; col < xrow.length; col++) {
871 if (Math.sqrt(d[col]) < tol[col]) {
872 wk[col] = 0.0;
873 } else {
874 pos = col - 1;
875 total = xrow[col];
876 for (int row = 0; row < col; row++) {
877 total = smartAdd(total, -wk[row] * r[pos]);
878 pos += nvars - row - 2;
879 }
880 wk[col] = total;
881 hii = smartAdd(hii, (total * total) / d[col]);
882 }
883 }
884 return hii;
885 }
886
887 /**
888 * Gets the order of the regressors, useful if some type of reordering
889 * has been called. Calling regress with int[]{} args will trigger
890 * a reordering.
891 *
892 * @return int[] with the current order of the regressors
893 */
894 public int[] getOrderOfRegressors(){
895 return MathArrays.copyOf(vorder);
896 }
897
898 /**
899 * Conducts a regression on the data in the model, using all regressors.
900 *
901 * @return RegressionResults the structure holding all regression results
902 * @exception ModelSpecificationException - thrown if number of observations is
903 * less than the number of variables
904 */
905 public RegressionResults regress() throws ModelSpecificationException {
906 return regress(this.nvars);
907 }
908
909 /**
910 * Conducts a regression on the data in the model, using a subset of regressors.
911 *
912 * @param numberOfRegressors many of the regressors to include (either in canonical
913 * order, or in the current reordered state)
914 * @return RegressionResults the structure holding all regression results
915 * @exception ModelSpecificationException - thrown if number of observations is
916 * less than the number of variables or number of regressors requested
917 * is greater than the regressors in the model
918 */
919 public RegressionResults regress(int numberOfRegressors) throws ModelSpecificationException {
920 if (this.nobs <= numberOfRegressors) {
921 throw new ModelSpecificationException(
922 LocalizedFormats.NOT_ENOUGH_DATA_FOR_NUMBER_OF_PREDICTORS,
923 this.nobs, numberOfRegressors);
924 }
925 if( numberOfRegressors > this.nvars ){
926 throw new ModelSpecificationException(
927 LocalizedFormats.TOO_MANY_REGRESSORS, numberOfRegressors, this.nvars);
928 }
929
930 tolset();
931 singcheck();
932
933 double[] beta = this.regcf(numberOfRegressors);
934
935 ss();
936
937 double[] cov = this.cov(numberOfRegressors);
938
939 int rnk = 0;
940 for (int i = 0; i < this.lindep.length; i++) {
941 if (!this.lindep[i]) {
942 ++rnk;
943 }
944 }
945
946 boolean needsReorder = false;
947 for (int i = 0; i < numberOfRegressors; i++) {
948 if (this.vorder[i] != i) {
949 needsReorder = true;
950 break;
951 }
952 }
953 if (!needsReorder) {
954 return new RegressionResults(
955 beta, new double[][]{cov}, true, this.nobs, rnk,
956 this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false);
957 } else {
958 double[] betaNew = new double[beta.length];
959 double[] covNew = new double[cov.length];
960
961 int[] newIndices = new int[beta.length];
962 for (int i = 0; i < nvars; i++) {
963 for (int j = 0; j < numberOfRegressors; j++) {
964 if (this.vorder[j] == i) {
965 betaNew[i] = beta[ j];
966 newIndices[i] = j;
967 }
968 }
969 }
970
971 int idx1 = 0;
972 int idx2;
973 int _i;
974 int _j;
975 for (int i = 0; i < beta.length; i++) {
976 _i = newIndices[i];
977 for (int j = 0; j <= i; j++, idx1++) {
978 _j = newIndices[j];
979 if (_i > _j) {
980 idx2 = _i * (_i + 1) / 2 + _j;
981 } else {
982 idx2 = _j * (_j + 1) / 2 + _i;
983 }
984 covNew[idx1] = cov[idx2];
985 }
986 }
987 return new RegressionResults(
988 betaNew, new double[][]{covNew}, true, this.nobs, rnk,
989 this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false);
990 }
991 }
992
993 /**
994 * Conducts a regression on the data in the model, using regressors in array
995 * Calling this method will change the internal order of the regressors
996 * and care is required in interpreting the hatmatrix.
997 *
998 * @param variablesToInclude array of variables to include in regression
999 * @return RegressionResults the structure holding all regression results
1000 * @exception ModelSpecificationException - thrown if number of observations is
1001 * less than the number of variables, the number of regressors requested
1002 * is greater than the regressors in the model or a regressor index in
1003 * regressor array does not exist
1004 */
1005 public RegressionResults regress(int[] variablesToInclude) throws ModelSpecificationException {
1006 if (variablesToInclude.length > this.nvars) {
1007 throw new ModelSpecificationException(
1008 LocalizedFormats.TOO_MANY_REGRESSORS, variablesToInclude.length, this.nvars);
1009 }
1010 if (this.nobs <= this.nvars) {
1011 throw new ModelSpecificationException(
1012 LocalizedFormats.NOT_ENOUGH_DATA_FOR_NUMBER_OF_PREDICTORS,
1013 this.nobs, this.nvars);
1014 }
1015 Arrays.sort(variablesToInclude);
1016 int iExclude = 0;
1017 for (int i = 0; i < variablesToInclude.length; i++) {
1018 if (i >= this.nvars) {
1019 throw new ModelSpecificationException(
1020 LocalizedFormats.INDEX_LARGER_THAN_MAX, i, this.nvars);
1021 }
1022 if (i > 0 && variablesToInclude[i] == variablesToInclude[i - 1]) {
1023 variablesToInclude[i] = -1;
1024 ++iExclude;
1025 }
1026 }
1027 int[] series;
1028 if (iExclude > 0) {
1029 int j = 0;
1030 series = new int[variablesToInclude.length - iExclude];
1031 for (int i = 0; i < variablesToInclude.length; i++) {
1032 if (variablesToInclude[i] > -1) {
1033 series[j] = variablesToInclude[i];
1034 ++j;
1035 }
1036 }
1037 } else {
1038 series = variablesToInclude;
1039 }
1040
1041 reorderRegressors(series, 0);
1042 tolset();
1043 singcheck();
1044
1045 double[] beta = this.regcf(series.length);
1046
1047 ss();
1048
1049 double[] cov = this.cov(series.length);
1050
1051 int rnk = 0;
1052 for (int i = 0; i < this.lindep.length; i++) {
1053 if (!this.lindep[i]) {
1054 ++rnk;
1055 }
1056 }
1057
1058 boolean needsReorder = false;
1059 for (int i = 0; i < this.nvars; i++) {
1060 if (this.vorder[i] != series[i]) {
1061 needsReorder = true;
1062 break;
1063 }
1064 }
1065 if (!needsReorder) {
1066 return new RegressionResults(
1067 beta, new double[][]{cov}, true, this.nobs, rnk,
1068 this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false);
1069 } else {
1070 double[] betaNew = new double[beta.length];
1071 int[] newIndices = new int[beta.length];
1072 for (int i = 0; i < series.length; i++) {
1073 for (int j = 0; j < this.vorder.length; j++) {
1074 if (this.vorder[j] == series[i]) {
1075 betaNew[i] = beta[ j];
1076 newIndices[i] = j;
1077 }
1078 }
1079 }
1080 double[] covNew = new double[cov.length];
1081 int idx1 = 0;
1082 int idx2;
1083 int _i;
1084 int _j;
1085 for (int i = 0; i < beta.length; i++) {
1086 _i = newIndices[i];
1087 for (int j = 0; j <= i; j++, idx1++) {
1088 _j = newIndices[j];
1089 if (_i > _j) {
1090 idx2 = _i * (_i + 1) / 2 + _j;
1091 } else {
1092 idx2 = _j * (_j + 1) / 2 + _i;
1093 }
1094 covNew[idx1] = cov[idx2];
1095 }
1096 }
1097 return new RegressionResults(
1098 betaNew, new double[][]{covNew}, true, this.nobs, rnk,
1099 this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false);
1100 }
1101 }
1102 }