001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017 package org.apache.commons.math3.stat.descriptive;
018
019 import java.io.Serializable;
020 import java.util.Arrays;
021
022 import org.apache.commons.math3.exception.util.LocalizedFormats;
023 import org.apache.commons.math3.exception.DimensionMismatchException;
024 import org.apache.commons.math3.exception.MathIllegalStateException;
025 import org.apache.commons.math3.linear.RealMatrix;
026 import org.apache.commons.math3.stat.descriptive.moment.GeometricMean;
027 import org.apache.commons.math3.stat.descriptive.moment.Mean;
028 import org.apache.commons.math3.stat.descriptive.moment.VectorialCovariance;
029 import org.apache.commons.math3.stat.descriptive.rank.Max;
030 import org.apache.commons.math3.stat.descriptive.rank.Min;
031 import org.apache.commons.math3.stat.descriptive.summary.Sum;
032 import org.apache.commons.math3.stat.descriptive.summary.SumOfLogs;
033 import org.apache.commons.math3.stat.descriptive.summary.SumOfSquares;
034 import org.apache.commons.math3.util.MathUtils;
035 import org.apache.commons.math3.util.MathArrays;
036 import org.apache.commons.math3.util.Precision;
037 import org.apache.commons.math3.util.FastMath;
038
039 /**
040 * <p>Computes summary statistics for a stream of n-tuples added using the
041 * {@link #addValue(double[]) addValue} method. The data values are not stored
042 * in memory, so this class can be used to compute statistics for very large
043 * n-tuple streams.</p>
044 *
045 * <p>The {@link StorelessUnivariateStatistic} instances used to maintain
046 * summary state and compute statistics are configurable via setters.
047 * For example, the default implementation for the mean can be overridden by
048 * calling {@link #setMeanImpl(StorelessUnivariateStatistic[])}. Actual
049 * parameters to these methods must implement the
050 * {@link StorelessUnivariateStatistic} interface and configuration must be
051 * completed before <code>addValue</code> is called. No configuration is
052 * necessary to use the default, commons-math provided implementations.</p>
053 *
054 * <p>To compute statistics for a stream of n-tuples, construct a
055 * MultivariateStatistics instance with dimension n and then use
056 * {@link #addValue(double[])} to add n-tuples. The <code>getXxx</code>
057 * methods where Xxx is a statistic return an array of <code>double</code>
058 * values, where for <code>i = 0,...,n-1</code> the i<sup>th</sup> array element is the
059 * value of the given statistic for data range consisting of the i<sup>th</sup> element of
060 * each of the input n-tuples. For example, if <code>addValue</code> is called
061 * with actual parameters {0, 1, 2}, then {3, 4, 5} and finally {6, 7, 8},
062 * <code>getSum</code> will return a three-element array with values
063 * {0+3+6, 1+4+7, 2+5+8}</p>
064 *
065 * <p>Note: This class is not thread-safe. Use
066 * {@link SynchronizedMultivariateSummaryStatistics} if concurrent access from multiple
067 * threads is required.</p>
068 *
069 * @since 1.2
070 * @version $Id: MultivariateSummaryStatistics.java 1416643 2012-12-03 19:37:14Z tn $
071 */
072 public class MultivariateSummaryStatistics
073 implements StatisticalMultivariateSummary, Serializable {
074
075 /** Serialization UID */
076 private static final long serialVersionUID = 2271900808994826718L;
077
078 /** Dimension of the data. */
079 private int k;
080
081 /** Count of values that have been added */
082 private long n = 0;
083
084 /** Sum statistic implementation - can be reset by setter. */
085 private StorelessUnivariateStatistic[] sumImpl;
086
087 /** Sum of squares statistic implementation - can be reset by setter. */
088 private StorelessUnivariateStatistic[] sumSqImpl;
089
090 /** Minimum statistic implementation - can be reset by setter. */
091 private StorelessUnivariateStatistic[] minImpl;
092
093 /** Maximum statistic implementation - can be reset by setter. */
094 private StorelessUnivariateStatistic[] maxImpl;
095
096 /** Sum of log statistic implementation - can be reset by setter. */
097 private StorelessUnivariateStatistic[] sumLogImpl;
098
099 /** Geometric mean statistic implementation - can be reset by setter. */
100 private StorelessUnivariateStatistic[] geoMeanImpl;
101
102 /** Mean statistic implementation - can be reset by setter. */
103 private StorelessUnivariateStatistic[] meanImpl;
104
105 /** Covariance statistic implementation - cannot be reset. */
106 private VectorialCovariance covarianceImpl;
107
108 /**
109 * Construct a MultivariateSummaryStatistics instance
110 * @param k dimension of the data
111 * @param isCovarianceBiasCorrected if true, the unbiased sample
112 * covariance is computed, otherwise the biased population covariance
113 * is computed
114 */
115 public MultivariateSummaryStatistics(int k, boolean isCovarianceBiasCorrected) {
116 this.k = k;
117
118 sumImpl = new StorelessUnivariateStatistic[k];
119 sumSqImpl = new StorelessUnivariateStatistic[k];
120 minImpl = new StorelessUnivariateStatistic[k];
121 maxImpl = new StorelessUnivariateStatistic[k];
122 sumLogImpl = new StorelessUnivariateStatistic[k];
123 geoMeanImpl = new StorelessUnivariateStatistic[k];
124 meanImpl = new StorelessUnivariateStatistic[k];
125
126 for (int i = 0; i < k; ++i) {
127 sumImpl[i] = new Sum();
128 sumSqImpl[i] = new SumOfSquares();
129 minImpl[i] = new Min();
130 maxImpl[i] = new Max();
131 sumLogImpl[i] = new SumOfLogs();
132 geoMeanImpl[i] = new GeometricMean();
133 meanImpl[i] = new Mean();
134 }
135
136 covarianceImpl =
137 new VectorialCovariance(k, isCovarianceBiasCorrected);
138
139 }
140
141 /**
142 * Add an n-tuple to the data
143 *
144 * @param value the n-tuple to add
145 * @throws DimensionMismatchException if the length of the array
146 * does not match the one used at construction
147 */
148 public void addValue(double[] value) throws DimensionMismatchException {
149 checkDimension(value.length);
150 for (int i = 0; i < k; ++i) {
151 double v = value[i];
152 sumImpl[i].increment(v);
153 sumSqImpl[i].increment(v);
154 minImpl[i].increment(v);
155 maxImpl[i].increment(v);
156 sumLogImpl[i].increment(v);
157 geoMeanImpl[i].increment(v);
158 meanImpl[i].increment(v);
159 }
160 covarianceImpl.increment(value);
161 n++;
162 }
163
164 /**
165 * Returns the dimension of the data
166 * @return The dimension of the data
167 */
168 public int getDimension() {
169 return k;
170 }
171
172 /**
173 * Returns the number of available values
174 * @return The number of available values
175 */
176 public long getN() {
177 return n;
178 }
179
180 /**
181 * Returns an array of the results of a statistic.
182 * @param stats univariate statistic array
183 * @return results array
184 */
185 private double[] getResults(StorelessUnivariateStatistic[] stats) {
186 double[] results = new double[stats.length];
187 for (int i = 0; i < results.length; ++i) {
188 results[i] = stats[i].getResult();
189 }
190 return results;
191 }
192
193 /**
194 * Returns an array whose i<sup>th</sup> entry is the sum of the
195 * i<sup>th</sup> entries of the arrays that have been added using
196 * {@link #addValue(double[])}
197 *
198 * @return the array of component sums
199 */
200 public double[] getSum() {
201 return getResults(sumImpl);
202 }
203
204 /**
205 * Returns an array whose i<sup>th</sup> entry is the sum of squares of the
206 * i<sup>th</sup> entries of the arrays that have been added using
207 * {@link #addValue(double[])}
208 *
209 * @return the array of component sums of squares
210 */
211 public double[] getSumSq() {
212 return getResults(sumSqImpl);
213 }
214
215 /**
216 * Returns an array whose i<sup>th</sup> entry is the sum of logs of the
217 * i<sup>th</sup> entries of the arrays that have been added using
218 * {@link #addValue(double[])}
219 *
220 * @return the array of component log sums
221 */
222 public double[] getSumLog() {
223 return getResults(sumLogImpl);
224 }
225
226 /**
227 * Returns an array whose i<sup>th</sup> entry is the mean of the
228 * i<sup>th</sup> entries of the arrays that have been added using
229 * {@link #addValue(double[])}
230 *
231 * @return the array of component means
232 */
233 public double[] getMean() {
234 return getResults(meanImpl);
235 }
236
237 /**
238 * Returns an array whose i<sup>th</sup> entry is the standard deviation of the
239 * i<sup>th</sup> entries of the arrays that have been added using
240 * {@link #addValue(double[])}
241 *
242 * @return the array of component standard deviations
243 */
244 public double[] getStandardDeviation() {
245 double[] stdDev = new double[k];
246 if (getN() < 1) {
247 Arrays.fill(stdDev, Double.NaN);
248 } else if (getN() < 2) {
249 Arrays.fill(stdDev, 0.0);
250 } else {
251 RealMatrix matrix = covarianceImpl.getResult();
252 for (int i = 0; i < k; ++i) {
253 stdDev[i] = FastMath.sqrt(matrix.getEntry(i, i));
254 }
255 }
256 return stdDev;
257 }
258
259 /**
260 * Returns the covariance matrix of the values that have been added.
261 *
262 * @return the covariance matrix
263 */
264 public RealMatrix getCovariance() {
265 return covarianceImpl.getResult();
266 }
267
268 /**
269 * Returns an array whose i<sup>th</sup> entry is the maximum of the
270 * i<sup>th</sup> entries of the arrays that have been added using
271 * {@link #addValue(double[])}
272 *
273 * @return the array of component maxima
274 */
275 public double[] getMax() {
276 return getResults(maxImpl);
277 }
278
279 /**
280 * Returns an array whose i<sup>th</sup> entry is the minimum of the
281 * i<sup>th</sup> entries of the arrays that have been added using
282 * {@link #addValue(double[])}
283 *
284 * @return the array of component minima
285 */
286 public double[] getMin() {
287 return getResults(minImpl);
288 }
289
290 /**
291 * Returns an array whose i<sup>th</sup> entry is the geometric mean of the
292 * i<sup>th</sup> entries of the arrays that have been added using
293 * {@link #addValue(double[])}
294 *
295 * @return the array of component geometric means
296 */
297 public double[] getGeometricMean() {
298 return getResults(geoMeanImpl);
299 }
300
301 /**
302 * Generates a text report displaying
303 * summary statistics from values that
304 * have been added.
305 * @return String with line feeds displaying statistics
306 */
307 @Override
308 public String toString() {
309 final String separator = ", ";
310 final String suffix = System.getProperty("line.separator");
311 StringBuilder outBuffer = new StringBuilder();
312 outBuffer.append("MultivariateSummaryStatistics:" + suffix);
313 outBuffer.append("n: " + getN() + suffix);
314 append(outBuffer, getMin(), "min: ", separator, suffix);
315 append(outBuffer, getMax(), "max: ", separator, suffix);
316 append(outBuffer, getMean(), "mean: ", separator, suffix);
317 append(outBuffer, getGeometricMean(), "geometric mean: ", separator, suffix);
318 append(outBuffer, getSumSq(), "sum of squares: ", separator, suffix);
319 append(outBuffer, getSumLog(), "sum of logarithms: ", separator, suffix);
320 append(outBuffer, getStandardDeviation(), "standard deviation: ", separator, suffix);
321 outBuffer.append("covariance: " + getCovariance().toString() + suffix);
322 return outBuffer.toString();
323 }
324
325 /**
326 * Append a text representation of an array to a buffer.
327 * @param buffer buffer to fill
328 * @param data data array
329 * @param prefix text prefix
330 * @param separator elements separator
331 * @param suffix text suffix
332 */
333 private void append(StringBuilder buffer, double[] data,
334 String prefix, String separator, String suffix) {
335 buffer.append(prefix);
336 for (int i = 0; i < data.length; ++i) {
337 if (i > 0) {
338 buffer.append(separator);
339 }
340 buffer.append(data[i]);
341 }
342 buffer.append(suffix);
343 }
344
345 /**
346 * Resets all statistics and storage
347 */
348 public void clear() {
349 this.n = 0;
350 for (int i = 0; i < k; ++i) {
351 minImpl[i].clear();
352 maxImpl[i].clear();
353 sumImpl[i].clear();
354 sumLogImpl[i].clear();
355 sumSqImpl[i].clear();
356 geoMeanImpl[i].clear();
357 meanImpl[i].clear();
358 }
359 covarianceImpl.clear();
360 }
361
362 /**
363 * Returns true iff <code>object</code> is a <code>MultivariateSummaryStatistics</code>
364 * instance and all statistics have the same values as this.
365 * @param object the object to test equality against.
366 * @return true if object equals this
367 */
368 @Override
369 public boolean equals(Object object) {
370 if (object == this ) {
371 return true;
372 }
373 if (object instanceof MultivariateSummaryStatistics == false) {
374 return false;
375 }
376 MultivariateSummaryStatistics stat = (MultivariateSummaryStatistics) object;
377 return MathArrays.equalsIncludingNaN(stat.getGeometricMean(), getGeometricMean()) &&
378 MathArrays.equalsIncludingNaN(stat.getMax(), getMax()) &&
379 MathArrays.equalsIncludingNaN(stat.getMean(), getMean()) &&
380 MathArrays.equalsIncludingNaN(stat.getMin(), getMin()) &&
381 Precision.equalsIncludingNaN(stat.getN(), getN()) &&
382 MathArrays.equalsIncludingNaN(stat.getSum(), getSum()) &&
383 MathArrays.equalsIncludingNaN(stat.getSumSq(), getSumSq()) &&
384 MathArrays.equalsIncludingNaN(stat.getSumLog(), getSumLog()) &&
385 stat.getCovariance().equals( getCovariance());
386 }
387
388 /**
389 * Returns hash code based on values of statistics
390 *
391 * @return hash code
392 */
393 @Override
394 public int hashCode() {
395 int result = 31 + MathUtils.hash(getGeometricMean());
396 result = result * 31 + MathUtils.hash(getGeometricMean());
397 result = result * 31 + MathUtils.hash(getMax());
398 result = result * 31 + MathUtils.hash(getMean());
399 result = result * 31 + MathUtils.hash(getMin());
400 result = result * 31 + MathUtils.hash(getN());
401 result = result * 31 + MathUtils.hash(getSum());
402 result = result * 31 + MathUtils.hash(getSumSq());
403 result = result * 31 + MathUtils.hash(getSumLog());
404 result = result * 31 + getCovariance().hashCode();
405 return result;
406 }
407
408 // Getters and setters for statistics implementations
409 /**
410 * Sets statistics implementations.
411 * @param newImpl new implementations for statistics
412 * @param oldImpl old implementations for statistics
413 * @throws DimensionMismatchException if the array dimension
414 * does not match the one used at construction
415 * @throws MathIllegalStateException if data has already been added
416 * (i.e. if n > 0)
417 */
418 private void setImpl(StorelessUnivariateStatistic[] newImpl,
419 StorelessUnivariateStatistic[] oldImpl) throws MathIllegalStateException,
420 DimensionMismatchException {
421 checkEmpty();
422 checkDimension(newImpl.length);
423 System.arraycopy(newImpl, 0, oldImpl, 0, newImpl.length);
424 }
425
426 /**
427 * Returns the currently configured Sum implementation
428 *
429 * @return the StorelessUnivariateStatistic implementing the sum
430 */
431 public StorelessUnivariateStatistic[] getSumImpl() {
432 return sumImpl.clone();
433 }
434
435 /**
436 * <p>Sets the implementation for the Sum.</p>
437 * <p>This method must be activated before any data has been added - i.e.,
438 * before {@link #addValue(double[]) addValue} has been used to add data;
439 * otherwise an IllegalStateException will be thrown.</p>
440 *
441 * @param sumImpl the StorelessUnivariateStatistic instance to use
442 * for computing the Sum
443 * @throws DimensionMismatchException if the array dimension
444 * does not match the one used at construction
445 * @throws MathIllegalStateException if data has already been added
446 * (i.e if n > 0)
447 */
448 public void setSumImpl(StorelessUnivariateStatistic[] sumImpl)
449 throws MathIllegalStateException, DimensionMismatchException {
450 setImpl(sumImpl, this.sumImpl);
451 }
452
453 /**
454 * Returns the currently configured sum of squares implementation
455 *
456 * @return the StorelessUnivariateStatistic implementing the sum of squares
457 */
458 public StorelessUnivariateStatistic[] getSumsqImpl() {
459 return sumSqImpl.clone();
460 }
461
462 /**
463 * <p>Sets the implementation for the sum of squares.</p>
464 * <p>This method must be activated before any data has been added - i.e.,
465 * before {@link #addValue(double[]) addValue} has been used to add data;
466 * otherwise an IllegalStateException will be thrown.</p>
467 *
468 * @param sumsqImpl the StorelessUnivariateStatistic instance to use
469 * for computing the sum of squares
470 * @throws DimensionMismatchException if the array dimension
471 * does not match the one used at construction
472 * @throws MathIllegalStateException if data has already been added
473 * (i.e if n > 0)
474 */
475 public void setSumsqImpl(StorelessUnivariateStatistic[] sumsqImpl)
476 throws MathIllegalStateException, DimensionMismatchException {
477 setImpl(sumsqImpl, this.sumSqImpl);
478 }
479
480 /**
481 * Returns the currently configured minimum implementation
482 *
483 * @return the StorelessUnivariateStatistic implementing the minimum
484 */
485 public StorelessUnivariateStatistic[] getMinImpl() {
486 return minImpl.clone();
487 }
488
489 /**
490 * <p>Sets the implementation for the minimum.</p>
491 * <p>This method must be activated before any data has been added - i.e.,
492 * before {@link #addValue(double[]) addValue} has been used to add data;
493 * otherwise an IllegalStateException will be thrown.</p>
494 *
495 * @param minImpl the StorelessUnivariateStatistic instance to use
496 * for computing the minimum
497 * @throws DimensionMismatchException if the array dimension
498 * does not match the one used at construction
499 * @throws MathIllegalStateException if data has already been added
500 * (i.e if n > 0)
501 */
502 public void setMinImpl(StorelessUnivariateStatistic[] minImpl)
503 throws MathIllegalStateException, DimensionMismatchException {
504 setImpl(minImpl, this.minImpl);
505 }
506
507 /**
508 * Returns the currently configured maximum implementation
509 *
510 * @return the StorelessUnivariateStatistic implementing the maximum
511 */
512 public StorelessUnivariateStatistic[] getMaxImpl() {
513 return maxImpl.clone();
514 }
515
516 /**
517 * <p>Sets the implementation for the maximum.</p>
518 * <p>This method must be activated before any data has been added - i.e.,
519 * before {@link #addValue(double[]) addValue} has been used to add data;
520 * otherwise an IllegalStateException will be thrown.</p>
521 *
522 * @param maxImpl the StorelessUnivariateStatistic instance to use
523 * for computing the maximum
524 * @throws DimensionMismatchException if the array dimension
525 * does not match the one used at construction
526 * @throws MathIllegalStateException if data has already been added
527 * (i.e if n > 0)
528 */
529 public void setMaxImpl(StorelessUnivariateStatistic[] maxImpl)
530 throws MathIllegalStateException, DimensionMismatchException{
531 setImpl(maxImpl, this.maxImpl);
532 }
533
534 /**
535 * Returns the currently configured sum of logs implementation
536 *
537 * @return the StorelessUnivariateStatistic implementing the log sum
538 */
539 public StorelessUnivariateStatistic[] getSumLogImpl() {
540 return sumLogImpl.clone();
541 }
542
543 /**
544 * <p>Sets the implementation for the sum of logs.</p>
545 * <p>This method must be activated before any data has been added - i.e.,
546 * before {@link #addValue(double[]) addValue} has been used to add data;
547 * otherwise an IllegalStateException will be thrown.</p>
548 *
549 * @param sumLogImpl the StorelessUnivariateStatistic instance to use
550 * for computing the log sum
551 * @throws DimensionMismatchException if the array dimension
552 * does not match the one used at construction
553 * @throws MathIllegalStateException if data has already been added
554 * (i.e if n > 0)
555 */
556 public void setSumLogImpl(StorelessUnivariateStatistic[] sumLogImpl)
557 throws MathIllegalStateException, DimensionMismatchException{
558 setImpl(sumLogImpl, this.sumLogImpl);
559 }
560
561 /**
562 * Returns the currently configured geometric mean implementation
563 *
564 * @return the StorelessUnivariateStatistic implementing the geometric mean
565 */
566 public StorelessUnivariateStatistic[] getGeoMeanImpl() {
567 return geoMeanImpl.clone();
568 }
569
570 /**
571 * <p>Sets the implementation for the geometric mean.</p>
572 * <p>This method must be activated before any data has been added - i.e.,
573 * before {@link #addValue(double[]) addValue} has been used to add data;
574 * otherwise an IllegalStateException will be thrown.</p>
575 *
576 * @param geoMeanImpl the StorelessUnivariateStatistic instance to use
577 * for computing the geometric mean
578 * @throws DimensionMismatchException if the array dimension
579 * does not match the one used at construction
580 * @throws MathIllegalStateException if data has already been added
581 * (i.e if n > 0)
582 */
583 public void setGeoMeanImpl(StorelessUnivariateStatistic[] geoMeanImpl)
584 throws MathIllegalStateException, DimensionMismatchException {
585 setImpl(geoMeanImpl, this.geoMeanImpl);
586 }
587
588 /**
589 * Returns the currently configured mean implementation
590 *
591 * @return the StorelessUnivariateStatistic implementing the mean
592 */
593 public StorelessUnivariateStatistic[] getMeanImpl() {
594 return meanImpl.clone();
595 }
596
597 /**
598 * <p>Sets the implementation for the mean.</p>
599 * <p>This method must be activated before any data has been added - i.e.,
600 * before {@link #addValue(double[]) addValue} has been used to add data;
601 * otherwise an IllegalStateException will be thrown.</p>
602 *
603 * @param meanImpl the StorelessUnivariateStatistic instance to use
604 * for computing the mean
605 * @throws DimensionMismatchException if the array dimension
606 * does not match the one used at construction
607 * @throws MathIllegalStateException if data has already been added
608 * (i.e if n > 0)
609 */
610 public void setMeanImpl(StorelessUnivariateStatistic[] meanImpl)
611 throws MathIllegalStateException, DimensionMismatchException{
612 setImpl(meanImpl, this.meanImpl);
613 }
614
615 /**
616 * Throws MathIllegalStateException if the statistic is not empty.
617 * @throws MathIllegalStateException if n > 0.
618 */
619 private void checkEmpty() throws MathIllegalStateException {
620 if (n > 0) {
621 throw new MathIllegalStateException(
622 LocalizedFormats.VALUES_ADDED_BEFORE_CONFIGURING_STATISTIC, n);
623 }
624 }
625
626 /**
627 * Throws DimensionMismatchException if dimension != k.
628 * @param dimension dimension to check
629 * @throws DimensionMismatchException if dimension != k
630 */
631 private void checkDimension(int dimension) throws DimensionMismatchException {
632 if (dimension != k) {
633 throw new DimensionMismatchException(dimension, k);
634 }
635 }
636 }