001/* =========================================================== 002 * JFreeChart : a free chart library for the Java(tm) platform 003 * =========================================================== 004 * 005 * (C) Copyright 2000-2022, by David Gilbert and Contributors. 006 * 007 * Project Info: http://www.jfree.org/jfreechart/index.html 008 * 009 * This library is free software; you can redistribute it and/or modify it 010 * under the terms of the GNU Lesser General Public License as published by 011 * the Free Software Foundation; either version 2.1 of the License, or 012 * (at your option) any later version. 013 * 014 * This library is distributed in the hope that it will be useful, but 015 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 016 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 017 * License for more details. 018 * 019 * You should have received a copy of the GNU Lesser General Public 020 * License along with this library; if not, write to the Free Software 021 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, 022 * USA. 023 * 024 * [Oracle and Java are registered trademarks of Oracle and/or its affiliates. 025 * Other names may be trademarks of their respective owners.] 026 * 027 * --------------- 028 * Regression.java 029 * --------------- 030 * (C) Copyright 2002-2022, by David Gilbert. 031 * 032 * Original Author: David Gilbert; 033 * Contributor(s): Peter Kolb (patch 2795746); 034 * 035 */ 036 037package org.jfree.data.statistics; 038 039import org.jfree.chart.internal.Args; 040import org.jfree.data.xy.XYDataset; 041 042/** 043 * A utility class for fitting regression curves to data. 044 */ 045public abstract class Regression { 046 047 /** 048 * Returns the parameters 'a' and 'b' for an equation y = a + bx, fitted to 049 * the data using ordinary least squares regression. The result is 050 * returned as a double[], where result[0] --> a, and result[1] --> b. 051 * 052 * @param data the data. 053 * 054 * @return The parameters. 055 */ 056 public static double[] getOLSRegression(double[][] data) { 057 058 int n = data.length; 059 if (n < 2) { 060 throw new IllegalArgumentException("Not enough data."); 061 } 062 063 double sumX = 0; 064 double sumY = 0; 065 double sumXX = 0; 066 double sumXY = 0; 067 for (int i = 0; i < n; i++) { 068 double x = data[i][0]; 069 double y = data[i][1]; 070 sumX += x; 071 sumY += y; 072 double xx = x * x; 073 sumXX += xx; 074 double xy = x * y; 075 sumXY += xy; 076 } 077 double sxx = sumXX - (sumX * sumX) / n; 078 double sxy = sumXY - (sumX * sumY) / n; 079 double xbar = sumX / n; 080 double ybar = sumY / n; 081 082 double[] result = new double[2]; 083 result[1] = sxy / sxx; 084 result[0] = ybar - result[1] * xbar; 085 086 return result; 087 088 } 089 090 /** 091 * Returns the parameters 'a' and 'b' for an equation y = a + bx, fitted to 092 * the data using ordinary least squares regression. The result is returned 093 * as a double[], where result[0] --> a, and result[1] --> b. 094 * 095 * @param data the data. 096 * @param series the series (zero-based index). 097 * 098 * @return The parameters. 099 */ 100 public static double[] getOLSRegression(XYDataset data, int series) { 101 102 int n = data.getItemCount(series); 103 if (n < 2) { 104 throw new IllegalArgumentException("Not enough data."); 105 } 106 107 double sumX = 0; 108 double sumY = 0; 109 double sumXX = 0; 110 double sumXY = 0; 111 for (int i = 0; i < n; i++) { 112 double x = data.getXValue(series, i); 113 double y = data.getYValue(series, i); 114 sumX += x; 115 sumY += y; 116 double xx = x * x; 117 sumXX += xx; 118 double xy = x * y; 119 sumXY += xy; 120 } 121 double sxx = sumXX - (sumX * sumX) / n; 122 double sxy = sumXY - (sumX * sumY) / n; 123 double xbar = sumX / n; 124 double ybar = sumY / n; 125 126 double[] result = new double[2]; 127 result[1] = sxy / sxx; 128 result[0] = ybar - result[1] * xbar; 129 130 return result; 131 132 } 133 134 /** 135 * Returns the parameters 'a' and 'b' for an equation y = ax^b, fitted to 136 * the data using a power regression equation. The result is returned as 137 * an array, where double[0] --> a, and double[1] --> b. 138 * 139 * @param data the data. 140 * 141 * @return The parameters. 142 */ 143 public static double[] getPowerRegression(double[][] data) { 144 145 int n = data.length; 146 if (n < 2) { 147 throw new IllegalArgumentException("Not enough data."); 148 } 149 150 double sumX = 0; 151 double sumY = 0; 152 double sumXX = 0; 153 double sumXY = 0; 154 for (int i = 0; i < n; i++) { 155 double x = Math.log(data[i][0]); 156 double y = Math.log(data[i][1]); 157 sumX += x; 158 sumY += y; 159 double xx = x * x; 160 sumXX += xx; 161 double xy = x * y; 162 sumXY += xy; 163 } 164 double sxx = sumXX - (sumX * sumX) / n; 165 double sxy = sumXY - (sumX * sumY) / n; 166 double xbar = sumX / n; 167 double ybar = sumY / n; 168 169 double[] result = new double[2]; 170 result[1] = sxy / sxx; 171 result[0] = Math.pow(Math.exp(1.0), ybar - result[1] * xbar); 172 173 return result; 174 175 } 176 177 /** 178 * Returns the parameters 'a' and 'b' for an equation y = ax^b, fitted to 179 * the data using a power regression equation. The result is returned as 180 * an array, where double[0] --> a, and double[1] --> b. 181 * 182 * @param data the data. 183 * @param series the series to fit the regression line against. 184 * 185 * @return The parameters. 186 */ 187 public static double[] getPowerRegression(XYDataset data, int series) { 188 189 int n = data.getItemCount(series); 190 if (n < 2) { 191 throw new IllegalArgumentException("Not enough data."); 192 } 193 194 double sumX = 0; 195 double sumY = 0; 196 double sumXX = 0; 197 double sumXY = 0; 198 for (int i = 0; i < n; i++) { 199 double x = Math.log(data.getXValue(series, i)); 200 double y = Math.log(data.getYValue(series, i)); 201 sumX += x; 202 sumY += y; 203 double xx = x * x; 204 sumXX += xx; 205 double xy = x * y; 206 sumXY += xy; 207 } 208 double sxx = sumXX - (sumX * sumX) / n; 209 double sxy = sumXY - (sumX * sumY) / n; 210 double xbar = sumX / n; 211 double ybar = sumY / n; 212 213 double[] result = new double[2]; 214 result[1] = sxy / sxx; 215 result[0] = Math.pow(Math.exp(1.0), ybar - result[1] * xbar); 216 217 return result; 218 219 } 220 221 /** 222 * Returns the parameters 'a0', 'a1', 'a2', ..., 'an' for a polynomial 223 * function of order n, y = a0 + a1 * x + a2 * x^2 + ... + an * x^n, 224 * fitted to the data using a polynomial regression equation. 225 * The result is returned as an array with a length of n + 2, 226 * where double[0] --> a0, double[1] --> a1, .., double[n] --> an. 227 * and double[n + 1] is the correlation coefficient R2 228 * Reference: J. D. Faires, R. L. Burden, Numerische Methoden (german 229 * edition), pp. 243ff and 327ff. 230 * 231 * @param dataset the dataset ({@code null} not permitted). 232 * @param series the series to fit the regression line against (the series 233 * must have at least order + 1 non-NaN items). 234 * @param order the order of the function (> 0). 235 * 236 * @return The parameters. 237 * 238 * @since 1.0.14 239 */ 240 public static double[] getPolynomialRegression(XYDataset dataset, 241 int series, int order) { 242 Args.nullNotPermitted(dataset, "dataset"); 243 int itemCount = dataset.getItemCount(series); 244 if (itemCount < order + 1) { 245 throw new IllegalArgumentException("Not enough data."); 246 } 247 int validItems = 0; 248 double[][] data = new double[2][itemCount]; 249 for(int item = 0; item < itemCount; item++){ 250 double x = dataset.getXValue(series, item); 251 double y = dataset.getYValue(series, item); 252 if (!Double.isNaN(x) && !Double.isNaN(y)){ 253 data[0][validItems] = x; 254 data[1][validItems] = y; 255 validItems++; 256 } 257 } 258 if (validItems < order + 1) { 259 throw new IllegalArgumentException("Not enough data."); 260 } 261 int equations = order + 1; 262 int coefficients = order + 2; 263 double[] result = new double[equations + 1]; 264 double[][] matrix = new double[equations][coefficients]; 265 double sumX = 0.0; 266 double sumY = 0.0; 267 268 for(int item = 0; item < validItems; item++){ 269 sumX += data[0][item]; 270 sumY += data[1][item]; 271 for(int eq = 0; eq < equations; eq++){ 272 for(int coe = 0; coe < coefficients - 1; coe++){ 273 matrix[eq][coe] += Math.pow(data[0][item],eq + coe); 274 } 275 matrix[eq][coefficients - 1] += data[1][item] 276 * Math.pow(data[0][item],eq); 277 } 278 } 279 double[][] subMatrix = calculateSubMatrix(matrix); 280 for (int eq = 1; eq < equations; eq++) { 281 matrix[eq][0] = 0; 282 for (int coe = 1; coe < coefficients; coe++) { 283 matrix[eq][coe] = subMatrix[eq - 1][coe - 1]; 284 } 285 } 286 for (int eq = equations - 1; eq > -1; eq--) { 287 double value = matrix[eq][coefficients - 1]; 288 for (int coe = eq; coe < coefficients -1; coe++) { 289 value -= matrix[eq][coe] * result[coe]; 290 } 291 result[eq] = value / matrix[eq][eq]; 292 } 293 double meanY = sumY / validItems; 294 double yObsSquare = 0.0; 295 double yRegSquare = 0.0; 296 for (int item = 0; item < validItems; item++) { 297 double yCalc = 0; 298 for (int eq = 0; eq < equations; eq++) { 299 yCalc += result[eq] * Math.pow(data[0][item],eq); 300 } 301 yRegSquare += Math.pow(yCalc - meanY, 2); 302 yObsSquare += Math.pow(data[1][item] - meanY, 2); 303 } 304 double rSquare = yRegSquare / yObsSquare; 305 result[equations] = rSquare; 306 return result; 307 } 308 309 /** 310 * Returns a matrix with the following features: (1) the number of rows 311 * and columns is 1 less than that of the original matrix; (2)the matrix 312 * is triangular, i.e. all elements a (row, column) with column > row are 313 * zero. This method is used for calculating a polynomial regression. 314 * 315 * @param matrix the start matrix. 316 * 317 * @return The new matrix. 318 */ 319 private static double[][] calculateSubMatrix(double[][] matrix){ 320 int equations = matrix.length; 321 int coefficients = matrix[0].length; 322 double[][] result = new double[equations - 1][coefficients - 1]; 323 for (int eq = 1; eq < equations; eq++) { 324 double factor = matrix[0][0] / matrix[eq][0]; 325 for (int coe = 1; coe < coefficients; coe++) { 326 result[eq - 1][coe -1] = matrix[0][coe] - matrix[eq][coe] 327 * factor; 328 } 329 } 330 if (equations == 1) { 331 return result; 332 } 333 // check for zero pivot element 334 if (result[0][0] == 0) { 335 boolean found = false; 336 for (int i = 0; i < result.length; i ++) { 337 if (result[i][0] != 0) { 338 found = true; 339 double[] temp = result[0]; 340 System.arraycopy(result[i], 0, result[0], 0, 341 result[i].length); 342 System.arraycopy(temp, 0, result[i], 0, temp.length); 343 break; 344 } 345 } 346 if (!found) { 347 //System.out.println("Equation has no solution!"); 348 return new double[equations - 1][coefficients - 1]; 349 } 350 } 351 double[][] subMatrix = calculateSubMatrix(result); 352 for (int eq = 1; eq < equations - 1; eq++) { 353 result[eq][0] = 0; 354 for (int coe = 1; coe < coefficients - 1; coe++) { 355 result[eq][coe] = subMatrix[eq - 1][coe - 1]; 356 } 357 } 358 return result; 359 } 360 361}