001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017 package org.apache.commons.math3.stat.correlation;
018
019 import org.apache.commons.math3.exception.MathIllegalArgumentException;
020 import org.apache.commons.math3.exception.util.LocalizedFormats;
021 import org.apache.commons.math3.linear.RealMatrix;
022 import org.apache.commons.math3.linear.BlockRealMatrix;
023 import org.apache.commons.math3.stat.descriptive.moment.Mean;
024 import org.apache.commons.math3.stat.descriptive.moment.Variance;
025
026 /**
027 * Computes covariances for pairs of arrays or columns of a matrix.
028 *
029 * <p>The constructors that take <code>RealMatrix</code> or
030 * <code>double[][]</code> arguments generate covariance matrices. The
031 * columns of the input matrices are assumed to represent variable values.</p>
032 *
033 * <p>The constructor argument <code>biasCorrected</code> determines whether or
034 * not computed covariances are bias-corrected.</p>
035 *
036 * <p>Unbiased covariances are given by the formula</p>
037 * <code>cov(X, Y) = Σ[(x<sub>i</sub> - E(X))(y<sub>i</sub> - E(Y))] / (n - 1)</code>
038 * where <code>E(X)</code> is the mean of <code>X</code> and <code>E(Y)</code>
039 * is the mean of the <code>Y</code> values.
040 *
041 * <p>Non-bias-corrected estimates use <code>n</code> in place of <code>n - 1</code>
042 *
043 * @version $Id: Covariance.java 1416643 2012-12-03 19:37:14Z tn $
044 * @since 2.0
045 */
046 public class Covariance {
047
048 /** covariance matrix */
049 private final RealMatrix covarianceMatrix;
050
051 /**
052 * Create an empty covariance matrix.
053 */
054 /** Number of observations (length of covariate vectors) */
055 private final int n;
056
057 /**
058 * Create a Covariance with no data
059 */
060 public Covariance() {
061 super();
062 covarianceMatrix = null;
063 n = 0;
064 }
065
066 /**
067 * Create a Covariance matrix from a rectangular array
068 * whose columns represent covariates.
069 *
070 * <p>The <code>biasCorrected</code> parameter determines whether or not
071 * covariance estimates are bias-corrected.</p>
072 *
073 * <p>The input array must be rectangular with at least two columns
074 * and two rows.</p>
075 *
076 * @param data rectangular array with columns representing covariates
077 * @param biasCorrected true means covariances are bias-corrected
078 * @throws MathIllegalArgumentException if the input data array is not
079 * rectangular with at least two rows and two columns.
080 */
081 public Covariance(double[][] data, boolean biasCorrected)
082 throws MathIllegalArgumentException {
083 this(new BlockRealMatrix(data), biasCorrected);
084 }
085
086 /**
087 * Create a Covariance matrix from a rectangular array
088 * whose columns represent covariates.
089 *
090 * <p>The input array must be rectangular with at least two columns
091 * and two rows</p>
092 *
093 * @param data rectangular array with columns representing covariates
094 * @throws MathIllegalArgumentException if the input data array is not
095 * rectangular with at least two rows and two columns.
096 */
097 public Covariance(double[][] data) throws MathIllegalArgumentException {
098 this(data, true);
099 }
100
101 /**
102 * Create a covariance matrix from a matrix whose columns
103 * represent covariates.
104 *
105 * <p>The <code>biasCorrected</code> parameter determines whether or not
106 * covariance estimates are bias-corrected.</p>
107 *
108 * <p>The matrix must have at least two columns and two rows</p>
109 *
110 * @param matrix matrix with columns representing covariates
111 * @param biasCorrected true means covariances are bias-corrected
112 * @throws MathIllegalArgumentException if the input matrix does not have
113 * at least two rows and two columns
114 */
115 public Covariance(RealMatrix matrix, boolean biasCorrected)
116 throws MathIllegalArgumentException {
117 checkSufficientData(matrix);
118 n = matrix.getRowDimension();
119 covarianceMatrix = computeCovarianceMatrix(matrix, biasCorrected);
120 }
121
122 /**
123 * Create a covariance matrix from a matrix whose columns
124 * represent covariates.
125 *
126 * <p>The matrix must have at least two columns and two rows</p>
127 *
128 * @param matrix matrix with columns representing covariates
129 * @throws MathIllegalArgumentException if the input matrix does not have
130 * at least two rows and two columns
131 */
132 public Covariance(RealMatrix matrix) throws MathIllegalArgumentException {
133 this(matrix, true);
134 }
135
136 /**
137 * Returns the covariance matrix
138 *
139 * @return covariance matrix
140 */
141 public RealMatrix getCovarianceMatrix() {
142 return covarianceMatrix;
143 }
144
145 /**
146 * Returns the number of observations (length of covariate vectors)
147 *
148 * @return number of observations
149 */
150 public int getN() {
151 return n;
152 }
153
154 /**
155 * Compute a covariance matrix from a matrix whose columns represent
156 * covariates.
157 * @param matrix input matrix (must have at least two columns and two rows)
158 * @param biasCorrected determines whether or not covariance estimates are bias-corrected
159 * @return covariance matrix
160 * @throws MathIllegalArgumentException if the matrix does not contain sufficient data
161 */
162 protected RealMatrix computeCovarianceMatrix(RealMatrix matrix, boolean biasCorrected)
163 throws MathIllegalArgumentException {
164 int dimension = matrix.getColumnDimension();
165 Variance variance = new Variance(biasCorrected);
166 RealMatrix outMatrix = new BlockRealMatrix(dimension, dimension);
167 for (int i = 0; i < dimension; i++) {
168 for (int j = 0; j < i; j++) {
169 double cov = covariance(matrix.getColumn(i), matrix.getColumn(j), biasCorrected);
170 outMatrix.setEntry(i, j, cov);
171 outMatrix.setEntry(j, i, cov);
172 }
173 outMatrix.setEntry(i, i, variance.evaluate(matrix.getColumn(i)));
174 }
175 return outMatrix;
176 }
177
178 /**
179 * Create a covariance matrix from a matrix whose columns represent
180 * covariates. Covariances are computed using the bias-corrected formula.
181 * @param matrix input matrix (must have at least two columns and two rows)
182 * @return covariance matrix
183 * @throws MathIllegalArgumentException if matrix does not contain sufficient data
184 * @see #Covariance
185 */
186 protected RealMatrix computeCovarianceMatrix(RealMatrix matrix)
187 throws MathIllegalArgumentException {
188 return computeCovarianceMatrix(matrix, true);
189 }
190
191 /**
192 * Compute a covariance matrix from a rectangular array whose columns represent
193 * covariates.
194 * @param data input array (must have at least two columns and two rows)
195 * @param biasCorrected determines whether or not covariance estimates are bias-corrected
196 * @return covariance matrix
197 * @throws MathIllegalArgumentException if the data array does not contain sufficient
198 * data
199 */
200 protected RealMatrix computeCovarianceMatrix(double[][] data, boolean biasCorrected)
201 throws MathIllegalArgumentException {
202 return computeCovarianceMatrix(new BlockRealMatrix(data), biasCorrected);
203 }
204
205 /**
206 * Create a covariance matrix from a rectangular array whose columns represent
207 * covariates. Covariances are computed using the bias-corrected formula.
208 * @param data input array (must have at least two columns and two rows)
209 * @return covariance matrix
210 * @throws MathIllegalArgumentException if the data array does not contain sufficient data
211 * @see #Covariance
212 */
213 protected RealMatrix computeCovarianceMatrix(double[][] data) throws MathIllegalArgumentException {
214 return computeCovarianceMatrix(data, true);
215 }
216
217 /**
218 * Computes the covariance between the two arrays.
219 *
220 * <p>Array lengths must match and the common length must be at least 2.</p>
221 *
222 * @param xArray first data array
223 * @param yArray second data array
224 * @param biasCorrected if true, returned value will be bias-corrected
225 * @return returns the covariance for the two arrays
226 * @throws MathIllegalArgumentException if the arrays lengths do not match or
227 * there is insufficient data
228 */
229 public double covariance(final double[] xArray, final double[] yArray, boolean biasCorrected)
230 throws MathIllegalArgumentException {
231 Mean mean = new Mean();
232 double result = 0d;
233 int length = xArray.length;
234 if (length != yArray.length) {
235 throw new MathIllegalArgumentException(
236 LocalizedFormats.DIMENSIONS_MISMATCH_SIMPLE, length, yArray.length);
237 } else if (length < 2) {
238 throw new MathIllegalArgumentException(
239 LocalizedFormats.INSUFFICIENT_OBSERVED_POINTS_IN_SAMPLE, length, 2);
240 } else {
241 double xMean = mean.evaluate(xArray);
242 double yMean = mean.evaluate(yArray);
243 for (int i = 0; i < length; i++) {
244 double xDev = xArray[i] - xMean;
245 double yDev = yArray[i] - yMean;
246 result += (xDev * yDev - result) / (i + 1);
247 }
248 }
249 return biasCorrected ? result * ((double) length / (double)(length - 1)) : result;
250 }
251
252 /**
253 * Computes the covariance between the two arrays, using the bias-corrected
254 * formula.
255 *
256 * <p>Array lengths must match and the common length must be at least 2.</p>
257 *
258 * @param xArray first data array
259 * @param yArray second data array
260 * @return returns the covariance for the two arrays
261 * @throws MathIllegalArgumentException if the arrays lengths do not match or
262 * there is insufficient data
263 */
264 public double covariance(final double[] xArray, final double[] yArray)
265 throws MathIllegalArgumentException {
266 return covariance(xArray, yArray, true);
267 }
268
269 /**
270 * Throws MathIllegalArgumentException if the matrix does not have at least
271 * two columns and two rows.
272 * @param matrix matrix to check
273 * @throws MathIllegalArgumentException if the matrix does not contain sufficient data
274 * to compute covariance
275 */
276 private void checkSufficientData(final RealMatrix matrix) throws MathIllegalArgumentException {
277 int nRows = matrix.getRowDimension();
278 int nCols = matrix.getColumnDimension();
279 if (nRows < 2 || nCols < 2) {
280 throw new MathIllegalArgumentException(
281 LocalizedFormats.INSUFFICIENT_ROWS_AND_COLUMNS,
282 nRows, nCols);
283 }
284 }
285 }