001/* ===========================================================
002 * JFreeChart : a free chart library for the Java(tm) platform
003 * ===========================================================
004 *
005 * (C) Copyright 2000-2022, by David Gilbert and Contributors.
006 *
007 * Project Info:  http://www.jfree.org/jfreechart/index.html
008 *
009 * This library is free software; you can redistribute it and/or modify it
010 * under the terms of the GNU Lesser General Public License as published by
011 * the Free Software Foundation; either version 2.1 of the License, or
012 * (at your option) any later version.
013 *
014 * This library is distributed in the hope that it will be useful, but
015 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
016 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
017 * License for more details.
018 *
019 * You should have received a copy of the GNU Lesser General Public
020 * License along with this library; if not, write to the Free Software
021 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301,
022 * USA.
023 *
024 * [Oracle and Java are registered trademarks of Oracle and/or its affiliates. 
025 * Other names may be trademarks of their respective owners.]
026 *
027 * ---------------
028 * Regression.java
029 * ---------------
030 * (C) Copyright 2002-2022, by David Gilbert.
031 *
032 * Original Author:  David Gilbert;
033 * Contributor(s):   Peter Kolb (patch 2795746);
034 *
035 */
036
037package org.jfree.data.statistics;
038
039import org.jfree.chart.internal.Args;
040import org.jfree.data.xy.XYDataset;
041
042/**
043 * A utility class for fitting regression curves to data.
044 */
045public abstract class Regression {
046
047    /**
048     * Returns the parameters 'a' and 'b' for an equation y = a + bx, fitted to
049     * the data using ordinary least squares regression.  The result is
050     * returned as a double[], where result[0] --> a, and result[1] --> b.
051     *
052     * @param data  the data.
053     *
054     * @return The parameters.
055     */
056    public static double[] getOLSRegression(double[][] data) {
057
058        int n = data.length;
059        if (n < 2) {
060            throw new IllegalArgumentException("Not enough data.");
061        }
062
063        double sumX = 0;
064        double sumY = 0;
065        double sumXX = 0;
066        double sumXY = 0;
067        for (int i = 0; i < n; i++) {
068            double x = data[i][0];
069            double y = data[i][1];
070            sumX += x;
071            sumY += y;
072            double xx = x * x;
073            sumXX += xx;
074            double xy = x * y;
075            sumXY += xy;
076        }
077        double sxx = sumXX - (sumX * sumX) / n;
078        double sxy = sumXY - (sumX * sumY) / n;
079        double xbar = sumX / n;
080        double ybar = sumY / n;
081
082        double[] result = new double[2];
083        result[1] = sxy / sxx;
084        result[0] = ybar - result[1] * xbar;
085
086        return result;
087
088    }
089
090    /**
091     * Returns the parameters 'a' and 'b' for an equation y = a + bx, fitted to
092     * the data using ordinary least squares regression. The result is returned
093     * as a double[], where result[0] --&gt; a, and result[1] --&gt; b.
094     *
095     * @param data  the data.
096     * @param series  the series (zero-based index).
097     *
098     * @return The parameters.
099     */
100    public static double[] getOLSRegression(XYDataset data, int series) {
101
102        int n = data.getItemCount(series);
103        if (n < 2) {
104            throw new IllegalArgumentException("Not enough data.");
105        }
106
107        double sumX = 0;
108        double sumY = 0;
109        double sumXX = 0;
110        double sumXY = 0;
111        for (int i = 0; i < n; i++) {
112            double x = data.getXValue(series, i);
113            double y = data.getYValue(series, i);
114            sumX += x;
115            sumY += y;
116            double xx = x * x;
117            sumXX += xx;
118            double xy = x * y;
119            sumXY += xy;
120        }
121        double sxx = sumXX - (sumX * sumX) / n;
122        double sxy = sumXY - (sumX * sumY) / n;
123        double xbar = sumX / n;
124        double ybar = sumY / n;
125
126        double[] result = new double[2];
127        result[1] = sxy / sxx;
128        result[0] = ybar - result[1] * xbar;
129
130        return result;
131
132    }
133
134    /**
135     * Returns the parameters 'a' and 'b' for an equation y = ax^b, fitted to
136     * the data using a power regression equation.  The result is returned as
137     * an array, where double[0] --&gt; a, and double[1] --&gt; b.
138     *
139     * @param data  the data.
140     *
141     * @return The parameters.
142     */
143    public static double[] getPowerRegression(double[][] data) {
144
145        int n = data.length;
146        if (n < 2) {
147            throw new IllegalArgumentException("Not enough data.");
148        }
149
150        double sumX = 0;
151        double sumY = 0;
152        double sumXX = 0;
153        double sumXY = 0;
154        for (int i = 0; i < n; i++) {
155            double x = Math.log(data[i][0]);
156            double y = Math.log(data[i][1]);
157            sumX += x;
158            sumY += y;
159            double xx = x * x;
160            sumXX += xx;
161            double xy = x * y;
162            sumXY += xy;
163        }
164        double sxx = sumXX - (sumX * sumX) / n;
165        double sxy = sumXY - (sumX * sumY) / n;
166        double xbar = sumX / n;
167        double ybar = sumY / n;
168
169        double[] result = new double[2];
170        result[1] = sxy / sxx;
171        result[0] = Math.pow(Math.exp(1.0), ybar - result[1] * xbar);
172
173        return result;
174
175    }
176
177    /**
178     * Returns the parameters 'a' and 'b' for an equation y = ax^b, fitted to
179     * the data using a power regression equation.  The result is returned as
180     * an array, where double[0] --&gt; a, and double[1] --&gt; b.
181     *
182     * @param data  the data.
183     * @param series  the series to fit the regression line against.
184     *
185     * @return The parameters.
186     */
187    public static double[] getPowerRegression(XYDataset data, int series) {
188
189        int n = data.getItemCount(series);
190        if (n < 2) {
191            throw new IllegalArgumentException("Not enough data.");
192        }
193
194        double sumX = 0;
195        double sumY = 0;
196        double sumXX = 0;
197        double sumXY = 0;
198        for (int i = 0; i < n; i++) {
199            double x = Math.log(data.getXValue(series, i));
200            double y = Math.log(data.getYValue(series, i));
201            sumX += x;
202            sumY += y;
203            double xx = x * x;
204            sumXX += xx;
205            double xy = x * y;
206            sumXY += xy;
207        }
208        double sxx = sumXX - (sumX * sumX) / n;
209        double sxy = sumXY - (sumX * sumY) / n;
210        double xbar = sumX / n;
211        double ybar = sumY / n;
212
213        double[] result = new double[2];
214        result[1] = sxy / sxx;
215        result[0] = Math.pow(Math.exp(1.0), ybar - result[1] * xbar);
216
217        return result;
218
219    }
220
221    /**
222     * Returns the parameters 'a0', 'a1', 'a2', ..., 'an' for a polynomial 
223     * function of order n, y = a0 + a1 * x + a2 * x^2 + ... + an * x^n,
224     * fitted to the data using a polynomial regression equation.
225     * The result is returned as an array with a length of n + 2,
226     * where double[0] --&gt; a0, double[1] --&gt; a1, .., double[n] --&gt; an.
227     * and double[n + 1] is the correlation coefficient R2
228     * Reference: J. D. Faires, R. L. Burden, Numerische Methoden (german
229     * edition), pp. 243ff and 327ff.
230     *
231     * @param dataset  the dataset ({@code null} not permitted).
232     * @param series  the series to fit the regression line against (the series
233     *         must have at least order + 1 non-NaN items).
234     * @param order  the order of the function (&gt; 0).
235     *
236     * @return The parameters.
237     *
238     * @since 1.0.14
239     */
240    public static double[] getPolynomialRegression(XYDataset dataset, 
241            int series, int order) {
242        Args.nullNotPermitted(dataset, "dataset");
243        int itemCount = dataset.getItemCount(series);
244        if (itemCount < order + 1) {
245            throw new IllegalArgumentException("Not enough data.");
246        }
247        int validItems = 0;
248        double[][] data = new double[2][itemCount];
249        for(int item = 0; item < itemCount; item++){
250            double x = dataset.getXValue(series, item);
251            double y = dataset.getYValue(series, item);
252            if (!Double.isNaN(x) && !Double.isNaN(y)){
253                data[0][validItems] = x;
254                data[1][validItems] = y;
255                validItems++;
256            }
257        }
258        if (validItems < order + 1) {
259            throw new IllegalArgumentException("Not enough data.");
260        }
261        int equations = order + 1;
262        int coefficients = order + 2;
263        double[] result = new double[equations + 1];
264        double[][] matrix = new double[equations][coefficients];
265        double sumX = 0.0;
266        double sumY = 0.0;
267
268        for(int item = 0; item < validItems; item++){
269            sumX += data[0][item];
270            sumY += data[1][item];
271            for(int eq = 0; eq < equations; eq++){
272                for(int coe = 0; coe < coefficients - 1; coe++){
273                    matrix[eq][coe] += Math.pow(data[0][item],eq + coe);
274                }
275                matrix[eq][coefficients - 1] += data[1][item]
276                        * Math.pow(data[0][item],eq);
277            }
278        }
279        double[][] subMatrix = calculateSubMatrix(matrix);
280        for (int eq = 1; eq < equations; eq++) {
281            matrix[eq][0] = 0;
282            for (int coe = 1; coe < coefficients; coe++) {
283                matrix[eq][coe] = subMatrix[eq - 1][coe - 1];
284            }
285        }
286        for (int eq = equations - 1; eq > -1; eq--) {
287            double value = matrix[eq][coefficients - 1];
288            for (int coe = eq; coe < coefficients -1; coe++) {
289                value -= matrix[eq][coe] * result[coe];
290            }
291            result[eq] = value / matrix[eq][eq];
292        }
293        double meanY = sumY / validItems;
294        double yObsSquare = 0.0;
295        double yRegSquare = 0.0;
296        for (int item = 0; item < validItems; item++) {
297            double yCalc = 0;
298            for (int eq = 0; eq < equations; eq++) {
299                yCalc += result[eq] * Math.pow(data[0][item],eq);
300            }
301            yRegSquare += Math.pow(yCalc - meanY, 2);
302            yObsSquare += Math.pow(data[1][item] - meanY, 2);
303        }
304        double rSquare = yRegSquare / yObsSquare;
305        result[equations] = rSquare;
306        return result;
307    }
308
309    /**
310     * Returns a matrix with the following features: (1) the number of rows
311     * and columns is 1 less than that of the original matrix; (2)the matrix
312     * is triangular, i.e. all elements a (row, column) with column &gt; row are
313     * zero.  This method is used for calculating a polynomial regression.
314     * 
315     * @param matrix  the start matrix.
316     *
317     * @return The new matrix.
318     */
319    private static double[][] calculateSubMatrix(double[][] matrix){
320        int equations = matrix.length;
321        int coefficients = matrix[0].length;
322        double[][] result = new double[equations - 1][coefficients - 1];
323        for (int eq = 1; eq < equations; eq++) {
324            double factor = matrix[0][0] / matrix[eq][0];
325            for (int coe = 1; coe < coefficients; coe++) {
326                result[eq - 1][coe -1] = matrix[0][coe] - matrix[eq][coe]
327                        * factor;
328            }
329        }
330        if (equations == 1) {
331            return result;
332        }
333        // check for zero pivot element
334        if (result[0][0] == 0) {
335            boolean found = false;
336            for (int i = 0; i < result.length; i ++) {
337                if (result[i][0] != 0) {
338                    found = true;
339                    double[] temp = result[0];
340                    System.arraycopy(result[i], 0, result[0], 0, 
341                            result[i].length);
342                    System.arraycopy(temp, 0, result[i], 0, temp.length);
343                    break;
344                }
345            }
346            if (!found) {
347                //System.out.println("Equation has no solution!");
348                return new double[equations - 1][coefficients - 1];
349            }
350        }
351        double[][] subMatrix = calculateSubMatrix(result);
352        for (int eq = 1; eq < equations -  1; eq++) {
353            result[eq][0] = 0;
354            for (int coe = 1; coe < coefficients - 1; coe++) {
355                result[eq][coe] = subMatrix[eq - 1][coe - 1];
356            }
357        }
358        return result;
359    }
360
361}