001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018 package org.apache.commons.math3.stat.descriptive;
019
020 import java.io.Serializable;
021 import java.util.Collection;
022 import java.util.Iterator;
023
024 import org.apache.commons.math3.exception.NullArgumentException;
025
026 /**
027 * <p>
028 * An aggregator for {@code SummaryStatistics} from several data sets or
029 * data set partitions. In its simplest usage mode, the client creates an
030 * instance via the zero-argument constructor, then uses
031 * {@link #createContributingStatistics()} to obtain a {@code SummaryStatistics}
032 * for each individual data set / partition. The per-set statistics objects
033 * are used as normal, and at any time the aggregate statistics for all the
034 * contributors can be obtained from this object.
035 * </p><p>
036 * Clients with specialized requirements can use alternative constructors to
037 * control the statistics implementations and initial values used by the
038 * contributing and the internal aggregate {@code SummaryStatistics} objects.
039 * </p><p>
040 * A static {@link #aggregate(Collection)} method is also included that computes
041 * aggregate statistics directly from a Collection of SummaryStatistics instances.
042 * </p><p>
043 * When {@link #createContributingStatistics()} is used to create SummaryStatistics
044 * instances to be aggregated concurrently, the created instances'
045 * {@link SummaryStatistics#addValue(double)} methods must synchronize on the aggregating
046 * instance maintained by this class. In multithreaded environments, if the functionality
047 * provided by {@link #aggregate(Collection)} is adequate, that method should be used
048 * to avoid unnecessary computation and synchronization delays.</p>
049 *
050 * @since 2.0
051 * @version $Id: AggregateSummaryStatistics.java 1416643 2012-12-03 19:37:14Z tn $
052 *
053 */
054 public class AggregateSummaryStatistics implements StatisticalSummary,
055 Serializable {
056
057
058 /** Serializable version identifier */
059 private static final long serialVersionUID = -8207112444016386906L;
060
061 /**
062 * A SummaryStatistics serving as a prototype for creating SummaryStatistics
063 * contributing to this aggregate
064 */
065 private final SummaryStatistics statisticsPrototype;
066
067 /**
068 * The SummaryStatistics in which aggregate statistics are accumulated.
069 */
070 private final SummaryStatistics statistics;
071
072 /**
073 * Initializes a new AggregateSummaryStatistics with default statistics
074 * implementations.
075 *
076 */
077 public AggregateSummaryStatistics() {
078 // No try-catch or throws NAE because arg is guaranteed non-null
079 this(new SummaryStatistics());
080 }
081
082 /**
083 * Initializes a new AggregateSummaryStatistics with the specified statistics
084 * object as a prototype for contributing statistics and for the internal
085 * aggregate statistics. This provides for customized statistics implementations
086 * to be used by contributing and aggregate statistics.
087 *
088 * @param prototypeStatistics a {@code SummaryStatistics} serving as a
089 * prototype both for the internal aggregate statistics and for
090 * contributing statistics obtained via the
091 * {@code createContributingStatistics()} method. Being a prototype
092 * means that other objects are initialized by copying this object's state.
093 * If {@code null}, a new, default statistics object is used. Any statistic
094 * values in the prototype are propagated to contributing statistics
095 * objects and (once) into these aggregate statistics.
096 * @throws NullArgumentException if prototypeStatistics is null
097 * @see #createContributingStatistics()
098 */
099 public AggregateSummaryStatistics(SummaryStatistics prototypeStatistics) throws NullArgumentException {
100 this(prototypeStatistics,
101 prototypeStatistics == null ? null : new SummaryStatistics(prototypeStatistics));
102 }
103
104 /**
105 * Initializes a new AggregateSummaryStatistics with the specified statistics
106 * object as a prototype for contributing statistics and for the internal
107 * aggregate statistics. This provides for different statistics implementations
108 * to be used by contributing and aggregate statistics and for an initial
109 * state to be supplied for the aggregate statistics.
110 *
111 * @param prototypeStatistics a {@code SummaryStatistics} serving as a
112 * prototype both for the internal aggregate statistics and for
113 * contributing statistics obtained via the
114 * {@code createContributingStatistics()} method. Being a prototype
115 * means that other objects are initialized by copying this object's state.
116 * If {@code null}, a new, default statistics object is used. Any statistic
117 * values in the prototype are propagated to contributing statistics
118 * objects, but not into these aggregate statistics.
119 * @param initialStatistics a {@code SummaryStatistics} to serve as the
120 * internal aggregate statistics object. If {@code null}, a new, default
121 * statistics object is used.
122 * @see #createContributingStatistics()
123 */
124 public AggregateSummaryStatistics(SummaryStatistics prototypeStatistics,
125 SummaryStatistics initialStatistics) {
126 this.statisticsPrototype =
127 (prototypeStatistics == null) ? new SummaryStatistics() : prototypeStatistics;
128 this.statistics =
129 (initialStatistics == null) ? new SummaryStatistics() : initialStatistics;
130 }
131
132 /**
133 * {@inheritDoc}. This version returns the maximum over all the aggregated
134 * data.
135 *
136 * @see StatisticalSummary#getMax()
137 */
138 public double getMax() {
139 synchronized (statistics) {
140 return statistics.getMax();
141 }
142 }
143
144 /**
145 * {@inheritDoc}. This version returns the mean of all the aggregated data.
146 *
147 * @see StatisticalSummary#getMean()
148 */
149 public double getMean() {
150 synchronized (statistics) {
151 return statistics.getMean();
152 }
153 }
154
155 /**
156 * {@inheritDoc}. This version returns the minimum over all the aggregated
157 * data.
158 *
159 * @see StatisticalSummary#getMin()
160 */
161 public double getMin() {
162 synchronized (statistics) {
163 return statistics.getMin();
164 }
165 }
166
167 /**
168 * {@inheritDoc}. This version returns a count of all the aggregated data.
169 *
170 * @see StatisticalSummary#getN()
171 */
172 public long getN() {
173 synchronized (statistics) {
174 return statistics.getN();
175 }
176 }
177
178 /**
179 * {@inheritDoc}. This version returns the standard deviation of all the
180 * aggregated data.
181 *
182 * @see StatisticalSummary#getStandardDeviation()
183 */
184 public double getStandardDeviation() {
185 synchronized (statistics) {
186 return statistics.getStandardDeviation();
187 }
188 }
189
190 /**
191 * {@inheritDoc}. This version returns a sum of all the aggregated data.
192 *
193 * @see StatisticalSummary#getSum()
194 */
195 public double getSum() {
196 synchronized (statistics) {
197 return statistics.getSum();
198 }
199 }
200
201 /**
202 * {@inheritDoc}. This version returns the variance of all the aggregated
203 * data.
204 *
205 * @see StatisticalSummary#getVariance()
206 */
207 public double getVariance() {
208 synchronized (statistics) {
209 return statistics.getVariance();
210 }
211 }
212
213 /**
214 * Returns the sum of the logs of all the aggregated data.
215 *
216 * @return the sum of logs
217 * @see SummaryStatistics#getSumOfLogs()
218 */
219 public double getSumOfLogs() {
220 synchronized (statistics) {
221 return statistics.getSumOfLogs();
222 }
223 }
224
225 /**
226 * Returns the geometric mean of all the aggregated data.
227 *
228 * @return the geometric mean
229 * @see SummaryStatistics#getGeometricMean()
230 */
231 public double getGeometricMean() {
232 synchronized (statistics) {
233 return statistics.getGeometricMean();
234 }
235 }
236
237 /**
238 * Returns the sum of the squares of all the aggregated data.
239 *
240 * @return The sum of squares
241 * @see SummaryStatistics#getSumsq()
242 */
243 public double getSumsq() {
244 synchronized (statistics) {
245 return statistics.getSumsq();
246 }
247 }
248
249 /**
250 * Returns a statistic related to the Second Central Moment. Specifically,
251 * what is returned is the sum of squared deviations from the sample mean
252 * among the all of the aggregated data.
253 *
254 * @return second central moment statistic
255 * @see SummaryStatistics#getSecondMoment()
256 */
257 public double getSecondMoment() {
258 synchronized (statistics) {
259 return statistics.getSecondMoment();
260 }
261 }
262
263 /**
264 * Return a {@link StatisticalSummaryValues} instance reporting current
265 * aggregate statistics.
266 *
267 * @return Current values of aggregate statistics
268 */
269 public StatisticalSummary getSummary() {
270 synchronized (statistics) {
271 return new StatisticalSummaryValues(getMean(), getVariance(), getN(),
272 getMax(), getMin(), getSum());
273 }
274 }
275
276 /**
277 * Creates and returns a {@code SummaryStatistics} whose data will be
278 * aggregated with those of this {@code AggregateSummaryStatistics}.
279 *
280 * @return a {@code SummaryStatistics} whose data will be aggregated with
281 * those of this {@code AggregateSummaryStatistics}. The initial state
282 * is a copy of the configured prototype statistics.
283 */
284 public SummaryStatistics createContributingStatistics() {
285 SummaryStatistics contributingStatistics
286 = new AggregatingSummaryStatistics(statistics);
287
288 // No try - catch or advertising NAE because neither argument will ever be null
289 SummaryStatistics.copy(statisticsPrototype, contributingStatistics);
290
291 return contributingStatistics;
292 }
293
294 /**
295 * Computes aggregate summary statistics. This method can be used to combine statistics
296 * computed over partitions or subsamples - i.e., the StatisticalSummaryValues returned
297 * should contain the same values that would have been obtained by computing a single
298 * StatisticalSummary over the combined dataset.
299 * <p>
300 * Returns null if the collection is empty or null.
301 * </p>
302 *
303 * @param statistics collection of SummaryStatistics to aggregate
304 * @return summary statistics for the combined dataset
305 */
306 public static StatisticalSummaryValues aggregate(Collection<SummaryStatistics> statistics) {
307 if (statistics == null) {
308 return null;
309 }
310 Iterator<SummaryStatistics> iterator = statistics.iterator();
311 if (!iterator.hasNext()) {
312 return null;
313 }
314 SummaryStatistics current = iterator.next();
315 long n = current.getN();
316 double min = current.getMin();
317 double sum = current.getSum();
318 double max = current.getMax();
319 double m2 = current.getSecondMoment();
320 double mean = current.getMean();
321 while (iterator.hasNext()) {
322 current = iterator.next();
323 if (current.getMin() < min || Double.isNaN(min)) {
324 min = current.getMin();
325 }
326 if (current.getMax() > max || Double.isNaN(max)) {
327 max = current.getMax();
328 }
329 sum += current.getSum();
330 final double oldN = n;
331 final double curN = current.getN();
332 n += curN;
333 final double meanDiff = current.getMean() - mean;
334 mean = sum / n;
335 m2 = m2 + current.getSecondMoment() + meanDiff * meanDiff * oldN * curN / n;
336 }
337 final double variance;
338 if (n == 0) {
339 variance = Double.NaN;
340 } else if (n == 1) {
341 variance = 0d;
342 } else {
343 variance = m2 / (n - 1);
344 }
345 return new StatisticalSummaryValues(mean, variance, n, max, min, sum);
346 }
347
348 /**
349 * A SummaryStatistics that also forwards all values added to it to a second
350 * {@code SummaryStatistics} for aggregation.
351 *
352 * @since 2.0
353 */
354 private static class AggregatingSummaryStatistics extends SummaryStatistics {
355
356 /**
357 * The serialization version of this class
358 */
359 private static final long serialVersionUID = 1L;
360
361 /**
362 * An additional SummaryStatistics into which values added to these
363 * statistics (and possibly others) are aggregated
364 */
365 private final SummaryStatistics aggregateStatistics;
366
367 /**
368 * Initializes a new AggregatingSummaryStatistics with the specified
369 * aggregate statistics object
370 *
371 * @param aggregateStatistics a {@code SummaryStatistics} into which
372 * values added to this statistics object should be aggregated
373 */
374 public AggregatingSummaryStatistics(SummaryStatistics aggregateStatistics) {
375 this.aggregateStatistics = aggregateStatistics;
376 }
377
378 /**
379 * {@inheritDoc}. This version adds the provided value to the configured
380 * aggregate after adding it to these statistics.
381 *
382 * @see SummaryStatistics#addValue(double)
383 */
384 @Override
385 public void addValue(double value) {
386 super.addValue(value);
387 synchronized (aggregateStatistics) {
388 aggregateStatistics.addValue(value);
389 }
390 }
391
392 /**
393 * Returns true iff <code>object</code> is a
394 * <code>SummaryStatistics</code> instance and all statistics have the
395 * same values as this.
396 * @param object the object to test equality against.
397 * @return true if object equals this
398 */
399 @Override
400 public boolean equals(Object object) {
401 if (object == this) {
402 return true;
403 }
404 if (object instanceof AggregatingSummaryStatistics == false) {
405 return false;
406 }
407 AggregatingSummaryStatistics stat = (AggregatingSummaryStatistics)object;
408 return super.equals(stat) &&
409 aggregateStatistics.equals(stat.aggregateStatistics);
410 }
411
412 /**
413 * Returns hash code based on values of statistics
414 * @return hash code
415 */
416 @Override
417 public int hashCode() {
418 return 123 + super.hashCode() + aggregateStatistics.hashCode();
419 }
420 }
421 }