001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017 package org.apache.commons.math3.analysis.interpolation;
018
019 import java.io.Serializable;
020 import java.util.Arrays;
021
022 import org.apache.commons.math3.analysis.polynomials.PolynomialSplineFunction;
023 import org.apache.commons.math3.exception.NotPositiveException;
024 import org.apache.commons.math3.exception.OutOfRangeException;
025 import org.apache.commons.math3.exception.DimensionMismatchException;
026 import org.apache.commons.math3.exception.NoDataException;
027 import org.apache.commons.math3.exception.NumberIsTooSmallException;
028 import org.apache.commons.math3.exception.NonMonotonicSequenceException;
029 import org.apache.commons.math3.exception.NotFiniteNumberException;
030 import org.apache.commons.math3.exception.util.LocalizedFormats;
031 import org.apache.commons.math3.util.FastMath;
032 import org.apache.commons.math3.util.MathUtils;
033 import org.apache.commons.math3.util.MathArrays;
034
035 /**
036 * Implements the <a href="http://en.wikipedia.org/wiki/Local_regression">
037 * Local Regression Algorithm</a> (also Loess, Lowess) for interpolation of
038 * real univariate functions.
039 * <p/>
040 * For reference, see
041 * <a href="http://www.math.tau.ac.il/~yekutiel/MA seminar/Cleveland 1979.pdf">
042 * William S. Cleveland - Robust Locally Weighted Regression and Smoothing
043 * Scatterplots</a>
044 * <p/>
045 * This class implements both the loess method and serves as an interpolation
046 * adapter to it, allowing one to build a spline on the obtained loess fit.
047 *
048 * @version $Id: LoessInterpolator.java 1379904 2012-09-01 23:54:52Z erans $
049 * @since 2.0
050 */
051 public class LoessInterpolator
052 implements UnivariateInterpolator, Serializable {
053 /** Default value of the bandwidth parameter. */
054 public static final double DEFAULT_BANDWIDTH = 0.3;
055 /** Default value of the number of robustness iterations. */
056 public static final int DEFAULT_ROBUSTNESS_ITERS = 2;
057 /**
058 * Default value for accuracy.
059 * @since 2.1
060 */
061 public static final double DEFAULT_ACCURACY = 1e-12;
062 /** serializable version identifier. */
063 private static final long serialVersionUID = 5204927143605193821L;
064 /**
065 * The bandwidth parameter: when computing the loess fit at
066 * a particular point, this fraction of source points closest
067 * to the current point is taken into account for computing
068 * a least-squares regression.
069 * <p/>
070 * A sensible value is usually 0.25 to 0.5.
071 */
072 private final double bandwidth;
073 /**
074 * The number of robustness iterations parameter: this many
075 * robustness iterations are done.
076 * <p/>
077 * A sensible value is usually 0 (just the initial fit without any
078 * robustness iterations) to 4.
079 */
080 private final int robustnessIters;
081 /**
082 * If the median residual at a certain robustness iteration
083 * is less than this amount, no more iterations are done.
084 */
085 private final double accuracy;
086
087 /**
088 * Constructs a new {@link LoessInterpolator}
089 * with a bandwidth of {@link #DEFAULT_BANDWIDTH},
090 * {@link #DEFAULT_ROBUSTNESS_ITERS} robustness iterations
091 * and an accuracy of {#link #DEFAULT_ACCURACY}.
092 * See {@link #LoessInterpolator(double, int, double)} for an explanation of
093 * the parameters.
094 */
095 public LoessInterpolator() {
096 this.bandwidth = DEFAULT_BANDWIDTH;
097 this.robustnessIters = DEFAULT_ROBUSTNESS_ITERS;
098 this.accuracy = DEFAULT_ACCURACY;
099 }
100
101 /**
102 * Construct a new {@link LoessInterpolator}
103 * with given bandwidth and number of robustness iterations.
104 * <p>
105 * Calling this constructor is equivalent to calling {link {@link
106 * #LoessInterpolator(double, int, double) LoessInterpolator(bandwidth,
107 * robustnessIters, LoessInterpolator.DEFAULT_ACCURACY)}
108 * </p>
109 *
110 * @param bandwidth when computing the loess fit at
111 * a particular point, this fraction of source points closest
112 * to the current point is taken into account for computing
113 * a least-squares regression.</br>
114 * A sensible value is usually 0.25 to 0.5, the default value is
115 * {@link #DEFAULT_BANDWIDTH}.
116 * @param robustnessIters This many robustness iterations are done.</br>
117 * A sensible value is usually 0 (just the initial fit without any
118 * robustness iterations) to 4, the default value is
119 * {@link #DEFAULT_ROBUSTNESS_ITERS}.
120
121 * @see #LoessInterpolator(double, int, double)
122 */
123 public LoessInterpolator(double bandwidth, int robustnessIters) {
124 this(bandwidth, robustnessIters, DEFAULT_ACCURACY);
125 }
126
127 /**
128 * Construct a new {@link LoessInterpolator}
129 * with given bandwidth, number of robustness iterations and accuracy.
130 *
131 * @param bandwidth when computing the loess fit at
132 * a particular point, this fraction of source points closest
133 * to the current point is taken into account for computing
134 * a least-squares regression.</br>
135 * A sensible value is usually 0.25 to 0.5, the default value is
136 * {@link #DEFAULT_BANDWIDTH}.
137 * @param robustnessIters This many robustness iterations are done.</br>
138 * A sensible value is usually 0 (just the initial fit without any
139 * robustness iterations) to 4, the default value is
140 * {@link #DEFAULT_ROBUSTNESS_ITERS}.
141 * @param accuracy If the median residual at a certain robustness iteration
142 * is less than this amount, no more iterations are done.
143 * @throws OutOfRangeException if bandwidth does not lie in the interval [0,1].
144 * @throws NotPositiveException if {@code robustnessIters} is negative.
145 * @see #LoessInterpolator(double, int)
146 * @since 2.1
147 */
148 public LoessInterpolator(double bandwidth, int robustnessIters, double accuracy)
149 throws OutOfRangeException,
150 NotPositiveException {
151 if (bandwidth < 0 ||
152 bandwidth > 1) {
153 throw new OutOfRangeException(LocalizedFormats.BANDWIDTH, bandwidth, 0, 1);
154 }
155 this.bandwidth = bandwidth;
156 if (robustnessIters < 0) {
157 throw new NotPositiveException(LocalizedFormats.ROBUSTNESS_ITERATIONS, robustnessIters);
158 }
159 this.robustnessIters = robustnessIters;
160 this.accuracy = accuracy;
161 }
162
163 /**
164 * Compute an interpolating function by performing a loess fit
165 * on the data at the original abscissae and then building a cubic spline
166 * with a
167 * {@link org.apache.commons.math3.analysis.interpolation.SplineInterpolator}
168 * on the resulting fit.
169 *
170 * @param xval the arguments for the interpolation points
171 * @param yval the values for the interpolation points
172 * @return A cubic spline built upon a loess fit to the data at the original abscissae
173 * @throws NonMonotonicSequenceException if {@code xval} not sorted in
174 * strictly increasing order.
175 * @throws DimensionMismatchException if {@code xval} and {@code yval} have
176 * different sizes.
177 * @throws NoDataException if {@code xval} or {@code yval} has zero size.
178 * @throws NotFiniteNumberException if any of the arguments and values are
179 * not finite real numbers.
180 * @throws NumberIsTooSmallException if the bandwidth is too small to
181 * accomodate the size of the input data (i.e. the bandwidth must be
182 * larger than 2/n).
183 */
184 public final PolynomialSplineFunction interpolate(final double[] xval,
185 final double[] yval)
186 throws NonMonotonicSequenceException,
187 DimensionMismatchException,
188 NoDataException,
189 NotFiniteNumberException,
190 NumberIsTooSmallException {
191 return new SplineInterpolator().interpolate(xval, smooth(xval, yval));
192 }
193
194 /**
195 * Compute a weighted loess fit on the data at the original abscissae.
196 *
197 * @param xval Arguments for the interpolation points.
198 * @param yval Values for the interpolation points.
199 * @param weights point weights: coefficients by which the robustness weight
200 * of a point is multiplied.
201 * @return the values of the loess fit at corresponding original abscissae.
202 * @throws NonMonotonicSequenceException if {@code xval} not sorted in
203 * strictly increasing order.
204 * @throws DimensionMismatchException if {@code xval} and {@code yval} have
205 * different sizes.
206 * @throws NoDataException if {@code xval} or {@code yval} has zero size.
207 * @throws NotFiniteNumberException if any of the arguments and values are
208 not finite real numbers.
209 * @throws NumberIsTooSmallException if the bandwidth is too small to
210 * accomodate the size of the input data (i.e. the bandwidth must be
211 * larger than 2/n).
212 * @since 2.1
213 */
214 public final double[] smooth(final double[] xval, final double[] yval,
215 final double[] weights)
216 throws NonMonotonicSequenceException,
217 DimensionMismatchException,
218 NoDataException,
219 NotFiniteNumberException,
220 NumberIsTooSmallException {
221 if (xval.length != yval.length) {
222 throw new DimensionMismatchException(xval.length, yval.length);
223 }
224
225 final int n = xval.length;
226
227 if (n == 0) {
228 throw new NoDataException();
229 }
230
231 checkAllFiniteReal(xval);
232 checkAllFiniteReal(yval);
233 checkAllFiniteReal(weights);
234
235 MathArrays.checkOrder(xval);
236
237 if (n == 1) {
238 return new double[]{yval[0]};
239 }
240
241 if (n == 2) {
242 return new double[]{yval[0], yval[1]};
243 }
244
245 int bandwidthInPoints = (int) (bandwidth * n);
246
247 if (bandwidthInPoints < 2) {
248 throw new NumberIsTooSmallException(LocalizedFormats.BANDWIDTH,
249 bandwidthInPoints, 2, true);
250 }
251
252 final double[] res = new double[n];
253
254 final double[] residuals = new double[n];
255 final double[] sortedResiduals = new double[n];
256
257 final double[] robustnessWeights = new double[n];
258
259 // Do an initial fit and 'robustnessIters' robustness iterations.
260 // This is equivalent to doing 'robustnessIters+1' robustness iterations
261 // starting with all robustness weights set to 1.
262 Arrays.fill(robustnessWeights, 1);
263
264 for (int iter = 0; iter <= robustnessIters; ++iter) {
265 final int[] bandwidthInterval = {0, bandwidthInPoints - 1};
266 // At each x, compute a local weighted linear regression
267 for (int i = 0; i < n; ++i) {
268 final double x = xval[i];
269
270 // Find out the interval of source points on which
271 // a regression is to be made.
272 if (i > 0) {
273 updateBandwidthInterval(xval, weights, i, bandwidthInterval);
274 }
275
276 final int ileft = bandwidthInterval[0];
277 final int iright = bandwidthInterval[1];
278
279 // Compute the point of the bandwidth interval that is
280 // farthest from x
281 final int edge;
282 if (xval[i] - xval[ileft] > xval[iright] - xval[i]) {
283 edge = ileft;
284 } else {
285 edge = iright;
286 }
287
288 // Compute a least-squares linear fit weighted by
289 // the product of robustness weights and the tricube
290 // weight function.
291 // See http://en.wikipedia.org/wiki/Linear_regression
292 // (section "Univariate linear case")
293 // and http://en.wikipedia.org/wiki/Weighted_least_squares
294 // (section "Weighted least squares")
295 double sumWeights = 0;
296 double sumX = 0;
297 double sumXSquared = 0;
298 double sumY = 0;
299 double sumXY = 0;
300 double denom = FastMath.abs(1.0 / (xval[edge] - x));
301 for (int k = ileft; k <= iright; ++k) {
302 final double xk = xval[k];
303 final double yk = yval[k];
304 final double dist = (k < i) ? x - xk : xk - x;
305 final double w = tricube(dist * denom) * robustnessWeights[k] * weights[k];
306 final double xkw = xk * w;
307 sumWeights += w;
308 sumX += xkw;
309 sumXSquared += xk * xkw;
310 sumY += yk * w;
311 sumXY += yk * xkw;
312 }
313
314 final double meanX = sumX / sumWeights;
315 final double meanY = sumY / sumWeights;
316 final double meanXY = sumXY / sumWeights;
317 final double meanXSquared = sumXSquared / sumWeights;
318
319 final double beta;
320 if (FastMath.sqrt(FastMath.abs(meanXSquared - meanX * meanX)) < accuracy) {
321 beta = 0;
322 } else {
323 beta = (meanXY - meanX * meanY) / (meanXSquared - meanX * meanX);
324 }
325
326 final double alpha = meanY - beta * meanX;
327
328 res[i] = beta * x + alpha;
329 residuals[i] = FastMath.abs(yval[i] - res[i]);
330 }
331
332 // No need to recompute the robustness weights at the last
333 // iteration, they won't be needed anymore
334 if (iter == robustnessIters) {
335 break;
336 }
337
338 // Recompute the robustness weights.
339
340 // Find the median residual.
341 // An arraycopy and a sort are completely tractable here,
342 // because the preceding loop is a lot more expensive
343 System.arraycopy(residuals, 0, sortedResiduals, 0, n);
344 Arrays.sort(sortedResiduals);
345 final double medianResidual = sortedResiduals[n / 2];
346
347 if (FastMath.abs(medianResidual) < accuracy) {
348 break;
349 }
350
351 for (int i = 0; i < n; ++i) {
352 final double arg = residuals[i] / (6 * medianResidual);
353 if (arg >= 1) {
354 robustnessWeights[i] = 0;
355 } else {
356 final double w = 1 - arg * arg;
357 robustnessWeights[i] = w * w;
358 }
359 }
360 }
361
362 return res;
363 }
364
365 /**
366 * Compute a loess fit on the data at the original abscissae.
367 *
368 * @param xval the arguments for the interpolation points
369 * @param yval the values for the interpolation points
370 * @return values of the loess fit at corresponding original abscissae
371 * @throws NonMonotonicSequenceException if {@code xval} not sorted in
372 * strictly increasing order.
373 * @throws DimensionMismatchException if {@code xval} and {@code yval} have
374 * different sizes.
375 * @throws NoDataException if {@code xval} or {@code yval} has zero size.
376 * @throws NotFiniteNumberException if any of the arguments and values are
377 * not finite real numbers.
378 * @throws NumberIsTooSmallException if the bandwidth is too small to
379 * accomodate the size of the input data (i.e. the bandwidth must be
380 * larger than 2/n).
381 */
382 public final double[] smooth(final double[] xval, final double[] yval)
383 throws NonMonotonicSequenceException,
384 DimensionMismatchException,
385 NoDataException,
386 NotFiniteNumberException,
387 NumberIsTooSmallException {
388 if (xval.length != yval.length) {
389 throw new DimensionMismatchException(xval.length, yval.length);
390 }
391
392 final double[] unitWeights = new double[xval.length];
393 Arrays.fill(unitWeights, 1.0);
394
395 return smooth(xval, yval, unitWeights);
396 }
397
398 /**
399 * Given an index interval into xval that embraces a certain number of
400 * points closest to {@code xval[i-1]}, update the interval so that it
401 * embraces the same number of points closest to {@code xval[i]},
402 * ignoring zero weights.
403 *
404 * @param xval Arguments array.
405 * @param weights Weights array.
406 * @param i Index around which the new interval should be computed.
407 * @param bandwidthInterval a two-element array {left, right} such that:
408 * {@code (left==0 or xval[i] - xval[left-1] > xval[right] - xval[i])}
409 * and
410 * {@code (right==xval.length-1 or xval[right+1] - xval[i] > xval[i] - xval[left])}.
411 * The array will be updated.
412 */
413 private static void updateBandwidthInterval(final double[] xval, final double[] weights,
414 final int i,
415 final int[] bandwidthInterval) {
416 final int left = bandwidthInterval[0];
417 final int right = bandwidthInterval[1];
418
419 // The right edge should be adjusted if the next point to the right
420 // is closer to xval[i] than the leftmost point of the current interval
421 int nextRight = nextNonzero(weights, right);
422 if (nextRight < xval.length && xval[nextRight] - xval[i] < xval[i] - xval[left]) {
423 int nextLeft = nextNonzero(weights, bandwidthInterval[0]);
424 bandwidthInterval[0] = nextLeft;
425 bandwidthInterval[1] = nextRight;
426 }
427 }
428
429 /**
430 * Return the smallest index {@code j} such that
431 * {@code j > i && (j == weights.length || weights[j] != 0)}.
432 *
433 * @param weights Weights array.
434 * @param i Index from which to start search.
435 * @return the smallest compliant index.
436 */
437 private static int nextNonzero(final double[] weights, final int i) {
438 int j = i + 1;
439 while(j < weights.length && weights[j] == 0) {
440 ++j;
441 }
442 return j;
443 }
444
445 /**
446 * Compute the
447 * <a href="http://en.wikipedia.org/wiki/Local_regression#Weight_function">tricube</a>
448 * weight function
449 *
450 * @param x Argument.
451 * @return <code>(1 - |x|<sup>3</sup>)<sup>3</sup></code> for |x| < 1, 0 otherwise.
452 */
453 private static double tricube(final double x) {
454 final double absX = FastMath.abs(x);
455 if (absX >= 1.0) {
456 return 0.0;
457 }
458 final double tmp = 1 - absX * absX * absX;
459 return tmp * tmp * tmp;
460 }
461
462 /**
463 * Check that all elements of an array are finite real numbers.
464 *
465 * @param values Values array.
466 * @throws org.apache.commons.math3.exception.NotFiniteNumberException
467 * if one of the values is not a finite real number.
468 */
469 private static void checkAllFiniteReal(final double[] values) {
470 for (int i = 0; i < values.length; i++) {
471 MathUtils.checkFinite(values[i]);
472 }
473 }
474 }