001/* ===========================================================
002 * JFreeChart : a free chart library for the Java(tm) platform
003 * ===========================================================
004 *
005 * (C) Copyright 2000-2022, by David Gilbert and Contributors.
006 *
007 * Project Info:  http://www.jfree.org/jfreechart/index.html
008 *
009 * This library is free software; you can redistribute it and/or modify it
010 * under the terms of the GNU Lesser General Public License as published by
011 * the Free Software Foundation; either version 2.1 of the License, or
012 * (at your option) any later version.
013 *
014 * This library is distributed in the hope that it will be useful, but
015 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
016 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
017 * License for more details.
018 *
019 * You should have received a copy of the GNU Lesser General Public
020 * License along with this library; if not, write to the Free Software
021 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301,
022 * USA.
023 *
024 * [Oracle and Java are registered trademarks of Oracle and/or its affiliates. 
025 * Other names may be trademarks of their respective owners.]
026 *
027 * ---------------
028 * Statistics.java
029 * ---------------
030 * (C) Copyright 2000-2021, by Matthew Wright and Contributors.
031 *
032 * Original Author:  Matthew Wright;
033 * Contributor(s):   David Gilbert;
034 *
035 */
036
037package org.jfree.data.statistics;
038
039import java.util.ArrayList;
040import java.util.Collection;
041import java.util.Collections;
042import java.util.List;
043import org.jfree.chart.internal.Args;
044
045/**
046 * A utility class that provides some common statistical functions.
047 */
048public abstract class Statistics {
049
050    /**
051     * Returns the mean of an array of numbers.  This is equivalent to calling
052     * {@code calculateMean(values, true)}.
053     *
054     * @param values  the values ({@code null} not permitted).
055     *
056     * @return The mean.
057     */
058    public static double calculateMean(Number[] values) {
059        return calculateMean(values, true);
060    }
061
062    /**
063     * Returns the mean of an array of numbers.
064     *
065     * @param values  the values ({@code null} not permitted).
066     * @param includeNullAndNaN  a flag that controls whether or not
067     *     {@code null} and {@code Double.NaN} values are included
068     *     in the calculation (if either is present in the array, the result is
069     *     {@link Double#NaN}).
070     *
071     * @return The mean.
072     *
073     * @since 1.0.3
074     */
075    public static double calculateMean(Number[] values,
076            boolean includeNullAndNaN) {
077
078        Args.nullNotPermitted(values, "values");
079        double sum = 0.0;
080        double current;
081        int counter = 0;
082        for (int i = 0; i < values.length; i++) {
083            // treat nulls the same as NaNs
084            if (values[i] != null) {
085                current = values[i].doubleValue();
086            }
087            else {
088                current = Double.NaN;
089            }
090            // calculate the sum and count
091            if (includeNullAndNaN || !Double.isNaN(current)) {
092                sum = sum + current;
093                counter++;
094            }
095        }
096        double result = (sum / counter);
097        return result;
098    }
099
100    /**
101     * Returns the mean of a collection of {@code Number} objects.
102     *
103     * @param values  the values ({@code null} not permitted).
104     *
105     * @return The mean.
106     */
107    public static double calculateMean(Collection values) {
108        return calculateMean(values, true);
109    }
110
111    /**
112     * Returns the mean of a collection of {@code Number} objects.
113     *
114     * @param values  the values ({@code null} not permitted).
115     * @param includeNullAndNaN  a flag that controls whether or not
116     *     {@code null} and {@code Double.NaN} values are included
117     *     in the calculation (if either is present in the array, the result is
118     *     {@link Double#NaN}).
119     *
120     * @return The mean.
121     *
122     * @since 1.0.3
123     */
124    public static double calculateMean(Collection values,
125            boolean includeNullAndNaN) {
126
127        Args.nullNotPermitted(values, "values");
128        int count = 0;
129        double total = 0.0;
130        for (Object object : values) {
131            if (object == null) {
132                if (includeNullAndNaN) {
133                    return Double.NaN;
134                }
135            }
136            else {
137                if (object instanceof Number) {
138                    Number number = (Number) object;
139                    double value = number.doubleValue();
140                    if (Double.isNaN(value)) {
141                        if (includeNullAndNaN) {
142                            return Double.NaN;
143                        }
144                    }
145                    else {
146                        total = total + number.doubleValue();
147                        count = count + 1;
148                    }
149                }
150            }
151        }
152        return total / count;
153    }
154
155    /**
156     * Calculates the median for a list of values ({@code Number} objects).
157     * The list of values will be copied, and the copy sorted, before
158     * calculating the median.  To avoid this step (if your list of values
159     * is already sorted), use the {@link #calculateMedian(List, boolean)}
160     * method.
161     *
162     * @param values  the values ({@code null} permitted).
163     *
164     * @return The median.
165     */
166    public static double calculateMedian(List values) {
167        return calculateMedian(values, true);
168    }
169
170    /**
171     * Calculates the median for a list of values ({@code Number} objects).
172     * If {@code copyAndSort} is {@code false}, the list is assumed
173     * to be presorted in ascending order by value.
174     *
175     * @param values  the values ({@code null} permitted).
176     * @param copyAndSort  a flag that controls whether the list of values is
177     *                     copied and sorted.
178     *
179     * @return The median.
180     */
181    public static double calculateMedian(List values, boolean copyAndSort) {
182
183        double result = Double.NaN;
184        if (values != null) {
185            if (copyAndSort) {
186                int itemCount = values.size();
187                List copy = new ArrayList(itemCount);
188                for (int i = 0; i < itemCount; i++) {
189                    copy.add(i, values.get(i));
190                }
191                Collections.sort(copy);
192                values = copy;
193            }
194            int count = values.size();
195            if (count > 0) {
196                if (count % 2 == 1) {
197                    if (count > 1) {
198                        Number value = (Number) values.get((count - 1) / 2);
199                        result = value.doubleValue();
200                    }
201                    else {
202                        Number value = (Number) values.get(0);
203                        result = value.doubleValue();
204                    }
205                }
206                else {
207                    Number value1 = (Number) values.get(count / 2 - 1);
208                    Number value2 = (Number) values.get(count / 2);
209                    result = (value1.doubleValue() + value2.doubleValue())
210                             / 2.0;
211                }
212            }
213        }
214        return result;
215    }
216
217    /**
218     * Calculates the median for a sublist within a list of values
219     * ({@code Number} objects).
220     *
221     * @param values  the values, in any order ({@code null} not permitted).
222     * @param start  the start index.
223     * @param end  the end index.
224     *
225     * @return The median.
226     */
227    public static double calculateMedian(List values, int start, int end) {
228        return calculateMedian(values, start, end, true);
229    }
230
231    /**
232     * Calculates the median for a sublist within a list of values
233     * ({@code Number} objects).  The entire list will be sorted if the
234     * {@code ascending} argument is {@code false}.
235     *
236     * @param values  the values ({@code null} not permitted).
237     * @param start  the start index.
238     * @param end  the end index.
239     * @param copyAndSort  a flag that that controls whether the list of values
240     *                     is copied and sorted.
241     *
242     * @return The median.
243     */
244    public static double calculateMedian(List values, int start, int end,
245                                         boolean copyAndSort) {
246
247        double result = Double.NaN;
248        if (copyAndSort) {
249            List working = new ArrayList(end - start + 1);
250            for (int i = start; i <= end; i++) {
251                working.add(values.get(i));
252            }
253            Collections.sort(working);
254            result = calculateMedian(working, false);
255        }
256        else {
257            int count = end - start + 1;
258            if (count > 0) {
259                if (count % 2 == 1) {
260                    if (count > 1) {
261                        Number value
262                            = (Number) values.get(start + (count - 1) / 2);
263                        result = value.doubleValue();
264                    }
265                    else {
266                        Number value = (Number) values.get(start);
267                        result = value.doubleValue();
268                    }
269                }
270                else {
271                    Number value1 = (Number) values.get(start + count / 2 - 1);
272                    Number value2 = (Number) values.get(start + count / 2);
273                    result
274                        = (value1.doubleValue() + value2.doubleValue()) / 2.0;
275                }
276            }
277        }
278        return result;
279
280    }
281
282    /**
283     * Returns the standard deviation of a set of numbers.
284     *
285     * @param data  the data ({@code null} or zero length array not
286     *     permitted).
287     *
288     * @return The standard deviation of a set of numbers.
289     */
290    public static double getStdDev(Number[] data) {
291        Args.nullNotPermitted(data, "data");
292        if (data.length == 0) {
293            throw new IllegalArgumentException("Zero length 'data' array.");
294        }
295        double avg = calculateMean(data);
296        double sum = 0.0;
297
298        for (int counter = 0; counter < data.length; counter++) {
299            double diff = data[counter].doubleValue() - avg;
300            sum = sum + diff * diff;
301        }
302        return Math.sqrt(sum / (data.length - 1));
303    }
304
305    /**
306     * Fits a straight line to a set of (x, y) data, returning the slope and
307     * intercept.
308     *
309     * @param xData  the x-data ({@code null} not permitted).
310     * @param yData  the y-data ({@code null} not permitted).
311     *
312     * @return A double array with the intercept in [0] and the slope in [1].
313     */
314    public static double[] getLinearFit(Number[] xData, Number[] yData) {
315
316        Args.nullNotPermitted(xData, "xData");
317        Args.nullNotPermitted(yData, "yData");
318        if (xData.length != yData.length) {
319            throw new IllegalArgumentException(
320                "Statistics.getLinearFit(): array lengths must be equal.");
321        }
322
323        double[] result = new double[2];
324        // slope
325        result[1] = getSlope(xData, yData);
326        // intercept
327        result[0] = calculateMean(yData) - result[1] * calculateMean(xData);
328
329        return result;
330
331    }
332
333    /**
334     * Finds the slope of a regression line using least squares.
335     *
336     * @param xData  the x-values ({@code null} not permitted).
337     * @param yData  the y-values ({@code null} not permitted).
338     *
339     * @return The slope.
340     */
341    public static double getSlope(Number[] xData, Number[] yData) {
342        Args.nullNotPermitted(xData, "xData");
343        Args.nullNotPermitted(yData, "yData");
344        if (xData.length != yData.length) {
345            throw new IllegalArgumentException("Array lengths must be equal.");
346        }
347
348        // ********* stat function for linear slope ********
349        // y = a + bx
350        // a = ybar - b * xbar
351        //     sum(x * y) - (sum (x) * sum(y)) / n
352        // b = ------------------------------------
353        //     sum (x^2) - (sum(x)^2 / n
354        // *************************************************
355
356        // sum of x, x^2, x * y, y
357        double sx = 0.0, sxx = 0.0, sxy = 0.0, sy = 0.0;
358        int counter;
359        for (counter = 0; counter < xData.length; counter++) {
360            sx = sx + xData[counter].doubleValue();
361            sxx = sxx + Math.pow(xData[counter].doubleValue(), 2);
362            sxy = sxy + yData[counter].doubleValue()
363                      * xData[counter].doubleValue();
364            sy = sy + yData[counter].doubleValue();
365        }
366        return (sxy - (sx * sy) / counter) / (sxx - (sx * sx) / counter);
367
368    }
369
370    /**
371     * Calculates the correlation between two datasets.  Both arrays should
372     * contain the same number of items.  Null values are treated as zero.
373     * <P>
374     * Information about the correlation calculation was obtained from:
375     *
376     * http://trochim.human.cornell.edu/kb/statcorr.htm
377     *
378     * @param data1  the first dataset.
379     * @param data2  the second dataset.
380     *
381     * @return The correlation.
382     */
383    public static double getCorrelation(Number[] data1, Number[] data2) {
384        Args.nullNotPermitted(data1, "data1");
385        Args.nullNotPermitted(data2, "data2");
386        if (data1.length != data2.length) {
387            throw new IllegalArgumentException(
388                "'data1' and 'data2' arrays must have same length."
389            );
390        }
391        int n = data1.length;
392        double sumX = 0.0;
393        double sumY = 0.0;
394        double sumX2 = 0.0;
395        double sumY2 = 0.0;
396        double sumXY = 0.0;
397        for (int i = 0; i < n; i++) {
398            double x = 0.0;
399            if (data1[i] != null) {
400                x = data1[i].doubleValue();
401            }
402            double y = 0.0;
403            if (data2[i] != null) {
404                y = data2[i].doubleValue();
405            }
406            sumX = sumX + x;
407            sumY = sumY + y;
408            sumXY = sumXY + (x * y);
409            sumX2 = sumX2 + (x * x);
410            sumY2 = sumY2 + (y * y);
411        }
412        return (n * sumXY - sumX * sumY) / Math.pow((n * sumX2 - sumX * sumX)
413                * (n * sumY2 - sumY * sumY), 0.5);
414    }
415
416    /**
417     * Returns a data set for a moving average on the data set passed in.
418     *
419     * @param xData  an array of the x data.
420     * @param yData  an array of the y data.
421     * @param period  the number of data points to average
422     *
423     * @return A double[][] the length of the data set in the first dimension,
424     *         with two doubles for x and y in the second dimension
425     */
426    public static double[][] getMovingAverage(Number[] xData, Number[] yData,
427            int period) {
428
429        // check arguments...
430        if (xData.length != yData.length) {
431            throw new IllegalArgumentException("Array lengths must be equal.");
432        }
433
434        if (period > xData.length) {
435            throw new IllegalArgumentException(
436                "Period can't be longer than dataset.");
437        }
438
439        double[][] result = new double[xData.length - period][2];
440        for (int i = 0; i < result.length; i++) {
441            result[i][0] = xData[i + period].doubleValue();
442            // holds the moving average sum
443            double sum = 0.0;
444            for (int j = 0; j < period; j++) {
445                sum += yData[i + j].doubleValue();
446            }
447            sum = sum / period;
448            result[i][1] = sum;
449        }
450        return result;
451
452    }
453
454}