001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017 package org.apache.commons.math3.stat.correlation;
018
019 import org.apache.commons.math3.exception.DimensionMismatchException;
020 import org.apache.commons.math3.exception.MathUnsupportedOperationException;
021 import org.apache.commons.math3.exception.NumberIsTooSmallException;
022 import org.apache.commons.math3.linear.MatrixUtils;
023 import org.apache.commons.math3.linear.RealMatrix;
024
025 /**
026 * Covariance implementation that does not require input data to be
027 * stored in memory. The size of the covariance matrix is specified in the
028 * constructor. Specific elements of the matrix are incrementally updated with
029 * calls to incrementRow() or increment Covariance().
030 *
031 * <p>This class is based on a paper written by Philippe Pébay:
032 * <a href="http://prod.sandia.gov/techlib/access-control.cgi/2008/086212.pdf">
033 * Formulas for Robust, One-Pass Parallel Computation of Covariances and
034 * Arbitrary-Order Statistical Moments</a>, 2008, Technical Report SAND2008-6212,
035 * Sandia National Laboratories.</p>
036 *
037 * <p>Note: the underlying covariance matrix is symmetric, thus only the
038 * upper triangular part of the matrix is stored and updated each increment.</p>
039 *
040 * @version $Id: StorelessCovariance.java 1410238 2012-11-16 07:58:49Z luc $
041 * @since 3.0
042 */
043 public class StorelessCovariance extends Covariance {
044
045 /** the square covariance matrix (upper triangular part) */
046 private StorelessBivariateCovariance[] covMatrix;
047
048 /** dimension of the square covariance matrix */
049 private int dimension;
050
051 /**
052 * Create a bias corrected covariance matrix with a given dimension.
053 *
054 * @param dim the dimension of the square covariance matrix
055 */
056 public StorelessCovariance(final int dim) {
057 this(dim, true);
058 }
059
060 /**
061 * Create a covariance matrix with a given number of rows and columns and the
062 * indicated bias correction.
063 *
064 * @param dim the dimension of the covariance matrix
065 * @param biasCorrected if <code>true</code> the covariance estimate is corrected
066 * for bias, i.e. n-1 in the denominator, otherwise there is no bias correction,
067 * i.e. n in the denominator.
068 */
069 public StorelessCovariance(final int dim, final boolean biasCorrected) {
070 dimension = dim;
071 covMatrix = new StorelessBivariateCovariance[dimension * (dimension + 1) / 2];
072 initializeMatrix(biasCorrected);
073 }
074
075 /**
076 * Initialize the internal two-dimensional array of
077 * {@link StorelessBivariateCovariance} instances.
078 *
079 * @param biasCorrected if the covariance estimate shall be corrected for bias
080 */
081 private void initializeMatrix(final boolean biasCorrected) {
082 for(int i = 0; i < dimension; i++){
083 for(int j = 0; j < dimension; j++){
084 setElement(i, j, new StorelessBivariateCovariance(biasCorrected));
085 }
086 }
087 }
088
089 /**
090 * Returns the index (i, j) translated into the one-dimensional
091 * array used to store the upper triangular part of the symmetric
092 * covariance matrix.
093 *
094 * @param i the row index
095 * @param j the column index
096 * @return the corresponding index in the matrix array
097 */
098 private int indexOf(final int i, final int j) {
099 return j < i ? i * (i + 1) / 2 + j : j * (j + 1) / 2 + i;
100 }
101
102 /**
103 * Gets the element at index (i, j) from the covariance matrix
104 * @param i the row index
105 * @param j the column index
106 * @return the {@link StorelessBivariateCovariance} element at the given index
107 */
108 private StorelessBivariateCovariance getElement(final int i, final int j) {
109 return covMatrix[indexOf(i, j)];
110 }
111
112 /**
113 * Sets the covariance element at index (i, j) in the covariance matrix
114 * @param i the row index
115 * @param j the column index
116 * @param cov the {@link StorelessBivariateCovariance} element to be set
117 */
118 private void setElement(final int i, final int j,
119 final StorelessBivariateCovariance cov) {
120 covMatrix[indexOf(i, j)] = cov;
121 }
122
123 /**
124 * Get the covariance for an individual element of the covariance matrix.
125 *
126 * @param xIndex row index in the covariance matrix
127 * @param yIndex column index in the covariance matrix
128 * @return the covariance of the given element
129 * @throws NumberIsTooSmallException if the number of observations
130 * in the cell is < 2
131 */
132 public double getCovariance(final int xIndex,
133 final int yIndex)
134 throws NumberIsTooSmallException {
135
136 return getElement(xIndex, yIndex).getResult();
137
138 }
139
140 /**
141 * Increment the covariance matrix with one row of data.
142 *
143 * @param data array representing one row of data.
144 * @throws DimensionMismatchException if the length of <code>rowData</code>
145 * does not match with the covariance matrix
146 */
147 public void increment(final double[] data)
148 throws DimensionMismatchException {
149
150 int length = data.length;
151 if (length != dimension) {
152 throw new DimensionMismatchException(length, dimension);
153 }
154
155 // only update the upper triangular part of the covariance matrix
156 // as only these parts are actually stored
157 for (int i = 0; i < length; i++){
158 for (int j = i; j < length; j++){
159 getElement(i, j).increment(data[i], data[j]);
160 }
161 }
162
163 }
164
165 /**
166 * {@inheritDoc}
167 * @throws NumberIsTooSmallException if the number of observations
168 * in a cell is < 2
169 */
170 @Override
171 public RealMatrix getCovarianceMatrix() throws NumberIsTooSmallException {
172 return MatrixUtils.createRealMatrix(getData());
173 }
174
175 /**
176 * Return the covariance matrix as two-dimensional array.
177 *
178 * @return a two-dimensional double array of covariance values
179 * @throws NumberIsTooSmallException if the number of observations
180 * for a cell is < 2
181 */
182 public double[][] getData() throws NumberIsTooSmallException {
183 final double[][] data = new double[dimension][dimension];
184 for (int i = 0; i < dimension; i++) {
185 for (int j = 0; j < dimension; j++) {
186 data[i][j] = getElement(i, j).getResult();
187 }
188 }
189 return data;
190 }
191
192 /**
193 * This {@link Covariance} method is not supported by a {@link StorelessCovariance},
194 * since the number of bivariate observations does not have to be the same for different
195 * pairs of covariates - i.e., N as defined in {@link Covariance#getN()} is undefined.
196 *
197 * @return nothing as this implementation always throws a
198 * {@link MathUnsupportedOperationException}
199 * @throws MathUnsupportedOperationException in all cases
200 */
201 @Override
202 public int getN()
203 throws MathUnsupportedOperationException {
204 throw new MathUnsupportedOperationException();
205 }
206 }