001/* =========================================================== 002 * JFreeChart : a free chart library for the Java(tm) platform 003 * =========================================================== 004 * 005 * (C) Copyright 2000-2022, by David Gilbert and Contributors. 006 * 007 * Project Info: http://www.jfree.org/jfreechart/index.html 008 * 009 * This library is free software; you can redistribute it and/or modify it 010 * under the terms of the GNU Lesser General Public License as published by 011 * the Free Software Foundation; either version 2.1 of the License, or 012 * (at your option) any later version. 013 * 014 * This library is distributed in the hope that it will be useful, but 015 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 016 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 017 * License for more details. 018 * 019 * You should have received a copy of the GNU Lesser General Public 020 * License along with this library; if not, write to the Free Software 021 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, 022 * USA. 023 * 024 * [Oracle and Java are registered trademarks of Oracle and/or its affiliates. 025 * Other names may be trademarks of their respective owners.] 026 * 027 * --------------- 028 * Statistics.java 029 * --------------- 030 * (C) Copyright 2000-2021, by Matthew Wright and Contributors. 031 * 032 * Original Author: Matthew Wright; 033 * Contributor(s): David Gilbert; 034 * 035 */ 036 037package org.jfree.data.statistics; 038 039import java.util.ArrayList; 040import java.util.Collection; 041import java.util.Collections; 042import java.util.List; 043import org.jfree.chart.internal.Args; 044 045/** 046 * A utility class that provides some common statistical functions. 047 */ 048public abstract class Statistics { 049 050 /** 051 * Returns the mean of an array of numbers. This is equivalent to calling 052 * {@code calculateMean(values, true)}. 053 * 054 * @param values the values ({@code null} not permitted). 055 * 056 * @return The mean. 057 */ 058 public static double calculateMean(Number[] values) { 059 return calculateMean(values, true); 060 } 061 062 /** 063 * Returns the mean of an array of numbers. 064 * 065 * @param values the values ({@code null} not permitted). 066 * @param includeNullAndNaN a flag that controls whether or not 067 * {@code null} and {@code Double.NaN} values are included 068 * in the calculation (if either is present in the array, the result is 069 * {@link Double#NaN}). 070 * 071 * @return The mean. 072 * 073 * @since 1.0.3 074 */ 075 public static double calculateMean(Number[] values, 076 boolean includeNullAndNaN) { 077 078 Args.nullNotPermitted(values, "values"); 079 double sum = 0.0; 080 double current; 081 int counter = 0; 082 for (int i = 0; i < values.length; i++) { 083 // treat nulls the same as NaNs 084 if (values[i] != null) { 085 current = values[i].doubleValue(); 086 } 087 else { 088 current = Double.NaN; 089 } 090 // calculate the sum and count 091 if (includeNullAndNaN || !Double.isNaN(current)) { 092 sum = sum + current; 093 counter++; 094 } 095 } 096 double result = (sum / counter); 097 return result; 098 } 099 100 /** 101 * Returns the mean of a collection of {@code Number} objects. 102 * 103 * @param values the values ({@code null} not permitted). 104 * 105 * @return The mean. 106 */ 107 public static double calculateMean(Collection values) { 108 return calculateMean(values, true); 109 } 110 111 /** 112 * Returns the mean of a collection of {@code Number} objects. 113 * 114 * @param values the values ({@code null} not permitted). 115 * @param includeNullAndNaN a flag that controls whether or not 116 * {@code null} and {@code Double.NaN} values are included 117 * in the calculation (if either is present in the array, the result is 118 * {@link Double#NaN}). 119 * 120 * @return The mean. 121 * 122 * @since 1.0.3 123 */ 124 public static double calculateMean(Collection values, 125 boolean includeNullAndNaN) { 126 127 Args.nullNotPermitted(values, "values"); 128 int count = 0; 129 double total = 0.0; 130 for (Object object : values) { 131 if (object == null) { 132 if (includeNullAndNaN) { 133 return Double.NaN; 134 } 135 } 136 else { 137 if (object instanceof Number) { 138 Number number = (Number) object; 139 double value = number.doubleValue(); 140 if (Double.isNaN(value)) { 141 if (includeNullAndNaN) { 142 return Double.NaN; 143 } 144 } 145 else { 146 total = total + number.doubleValue(); 147 count = count + 1; 148 } 149 } 150 } 151 } 152 return total / count; 153 } 154 155 /** 156 * Calculates the median for a list of values ({@code Number} objects). 157 * The list of values will be copied, and the copy sorted, before 158 * calculating the median. To avoid this step (if your list of values 159 * is already sorted), use the {@link #calculateMedian(List, boolean)} 160 * method. 161 * 162 * @param values the values ({@code null} permitted). 163 * 164 * @return The median. 165 */ 166 public static double calculateMedian(List values) { 167 return calculateMedian(values, true); 168 } 169 170 /** 171 * Calculates the median for a list of values ({@code Number} objects). 172 * If {@code copyAndSort} is {@code false}, the list is assumed 173 * to be presorted in ascending order by value. 174 * 175 * @param values the values ({@code null} permitted). 176 * @param copyAndSort a flag that controls whether the list of values is 177 * copied and sorted. 178 * 179 * @return The median. 180 */ 181 public static double calculateMedian(List values, boolean copyAndSort) { 182 183 double result = Double.NaN; 184 if (values != null) { 185 if (copyAndSort) { 186 int itemCount = values.size(); 187 List copy = new ArrayList(itemCount); 188 for (int i = 0; i < itemCount; i++) { 189 copy.add(i, values.get(i)); 190 } 191 Collections.sort(copy); 192 values = copy; 193 } 194 int count = values.size(); 195 if (count > 0) { 196 if (count % 2 == 1) { 197 if (count > 1) { 198 Number value = (Number) values.get((count - 1) / 2); 199 result = value.doubleValue(); 200 } 201 else { 202 Number value = (Number) values.get(0); 203 result = value.doubleValue(); 204 } 205 } 206 else { 207 Number value1 = (Number) values.get(count / 2 - 1); 208 Number value2 = (Number) values.get(count / 2); 209 result = (value1.doubleValue() + value2.doubleValue()) 210 / 2.0; 211 } 212 } 213 } 214 return result; 215 } 216 217 /** 218 * Calculates the median for a sublist within a list of values 219 * ({@code Number} objects). 220 * 221 * @param values the values, in any order ({@code null} not permitted). 222 * @param start the start index. 223 * @param end the end index. 224 * 225 * @return The median. 226 */ 227 public static double calculateMedian(List values, int start, int end) { 228 return calculateMedian(values, start, end, true); 229 } 230 231 /** 232 * Calculates the median for a sublist within a list of values 233 * ({@code Number} objects). The entire list will be sorted if the 234 * {@code ascending} argument is {@code false}. 235 * 236 * @param values the values ({@code null} not permitted). 237 * @param start the start index. 238 * @param end the end index. 239 * @param copyAndSort a flag that that controls whether the list of values 240 * is copied and sorted. 241 * 242 * @return The median. 243 */ 244 public static double calculateMedian(List values, int start, int end, 245 boolean copyAndSort) { 246 247 double result = Double.NaN; 248 if (copyAndSort) { 249 List working = new ArrayList(end - start + 1); 250 for (int i = start; i <= end; i++) { 251 working.add(values.get(i)); 252 } 253 Collections.sort(working); 254 result = calculateMedian(working, false); 255 } 256 else { 257 int count = end - start + 1; 258 if (count > 0) { 259 if (count % 2 == 1) { 260 if (count > 1) { 261 Number value 262 = (Number) values.get(start + (count - 1) / 2); 263 result = value.doubleValue(); 264 } 265 else { 266 Number value = (Number) values.get(start); 267 result = value.doubleValue(); 268 } 269 } 270 else { 271 Number value1 = (Number) values.get(start + count / 2 - 1); 272 Number value2 = (Number) values.get(start + count / 2); 273 result 274 = (value1.doubleValue() + value2.doubleValue()) / 2.0; 275 } 276 } 277 } 278 return result; 279 280 } 281 282 /** 283 * Returns the standard deviation of a set of numbers. 284 * 285 * @param data the data ({@code null} or zero length array not 286 * permitted). 287 * 288 * @return The standard deviation of a set of numbers. 289 */ 290 public static double getStdDev(Number[] data) { 291 Args.nullNotPermitted(data, "data"); 292 if (data.length == 0) { 293 throw new IllegalArgumentException("Zero length 'data' array."); 294 } 295 double avg = calculateMean(data); 296 double sum = 0.0; 297 298 for (int counter = 0; counter < data.length; counter++) { 299 double diff = data[counter].doubleValue() - avg; 300 sum = sum + diff * diff; 301 } 302 return Math.sqrt(sum / (data.length - 1)); 303 } 304 305 /** 306 * Fits a straight line to a set of (x, y) data, returning the slope and 307 * intercept. 308 * 309 * @param xData the x-data ({@code null} not permitted). 310 * @param yData the y-data ({@code null} not permitted). 311 * 312 * @return A double array with the intercept in [0] and the slope in [1]. 313 */ 314 public static double[] getLinearFit(Number[] xData, Number[] yData) { 315 316 Args.nullNotPermitted(xData, "xData"); 317 Args.nullNotPermitted(yData, "yData"); 318 if (xData.length != yData.length) { 319 throw new IllegalArgumentException( 320 "Statistics.getLinearFit(): array lengths must be equal."); 321 } 322 323 double[] result = new double[2]; 324 // slope 325 result[1] = getSlope(xData, yData); 326 // intercept 327 result[0] = calculateMean(yData) - result[1] * calculateMean(xData); 328 329 return result; 330 331 } 332 333 /** 334 * Finds the slope of a regression line using least squares. 335 * 336 * @param xData the x-values ({@code null} not permitted). 337 * @param yData the y-values ({@code null} not permitted). 338 * 339 * @return The slope. 340 */ 341 public static double getSlope(Number[] xData, Number[] yData) { 342 Args.nullNotPermitted(xData, "xData"); 343 Args.nullNotPermitted(yData, "yData"); 344 if (xData.length != yData.length) { 345 throw new IllegalArgumentException("Array lengths must be equal."); 346 } 347 348 // ********* stat function for linear slope ******** 349 // y = a + bx 350 // a = ybar - b * xbar 351 // sum(x * y) - (sum (x) * sum(y)) / n 352 // b = ------------------------------------ 353 // sum (x^2) - (sum(x)^2 / n 354 // ************************************************* 355 356 // sum of x, x^2, x * y, y 357 double sx = 0.0, sxx = 0.0, sxy = 0.0, sy = 0.0; 358 int counter; 359 for (counter = 0; counter < xData.length; counter++) { 360 sx = sx + xData[counter].doubleValue(); 361 sxx = sxx + Math.pow(xData[counter].doubleValue(), 2); 362 sxy = sxy + yData[counter].doubleValue() 363 * xData[counter].doubleValue(); 364 sy = sy + yData[counter].doubleValue(); 365 } 366 return (sxy - (sx * sy) / counter) / (sxx - (sx * sx) / counter); 367 368 } 369 370 /** 371 * Calculates the correlation between two datasets. Both arrays should 372 * contain the same number of items. Null values are treated as zero. 373 * <P> 374 * Information about the correlation calculation was obtained from: 375 * 376 * http://trochim.human.cornell.edu/kb/statcorr.htm 377 * 378 * @param data1 the first dataset. 379 * @param data2 the second dataset. 380 * 381 * @return The correlation. 382 */ 383 public static double getCorrelation(Number[] data1, Number[] data2) { 384 Args.nullNotPermitted(data1, "data1"); 385 Args.nullNotPermitted(data2, "data2"); 386 if (data1.length != data2.length) { 387 throw new IllegalArgumentException( 388 "'data1' and 'data2' arrays must have same length." 389 ); 390 } 391 int n = data1.length; 392 double sumX = 0.0; 393 double sumY = 0.0; 394 double sumX2 = 0.0; 395 double sumY2 = 0.0; 396 double sumXY = 0.0; 397 for (int i = 0; i < n; i++) { 398 double x = 0.0; 399 if (data1[i] != null) { 400 x = data1[i].doubleValue(); 401 } 402 double y = 0.0; 403 if (data2[i] != null) { 404 y = data2[i].doubleValue(); 405 } 406 sumX = sumX + x; 407 sumY = sumY + y; 408 sumXY = sumXY + (x * y); 409 sumX2 = sumX2 + (x * x); 410 sumY2 = sumY2 + (y * y); 411 } 412 return (n * sumXY - sumX * sumY) / Math.pow((n * sumX2 - sumX * sumX) 413 * (n * sumY2 - sumY * sumY), 0.5); 414 } 415 416 /** 417 * Returns a data set for a moving average on the data set passed in. 418 * 419 * @param xData an array of the x data. 420 * @param yData an array of the y data. 421 * @param period the number of data points to average 422 * 423 * @return A double[][] the length of the data set in the first dimension, 424 * with two doubles for x and y in the second dimension 425 */ 426 public static double[][] getMovingAverage(Number[] xData, Number[] yData, 427 int period) { 428 429 // check arguments... 430 if (xData.length != yData.length) { 431 throw new IllegalArgumentException("Array lengths must be equal."); 432 } 433 434 if (period > xData.length) { 435 throw new IllegalArgumentException( 436 "Period can't be longer than dataset."); 437 } 438 439 double[][] result = new double[xData.length - period][2]; 440 for (int i = 0; i < result.length; i++) { 441 result[i][0] = xData[i + period].doubleValue(); 442 // holds the moving average sum 443 double sum = 0.0; 444 for (int j = 0; j < period; j++) { 445 sum += yData[i + j].doubleValue(); 446 } 447 sum = sum / period; 448 result[i][1] = sum; 449 } 450 return result; 451 452 } 453 454}