001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018 package org.apache.commons.math3.distribution;
019
020 import org.apache.commons.math3.exception.NotPositiveException;
021 import org.apache.commons.math3.exception.NotStrictlyPositiveException;
022 import org.apache.commons.math3.exception.NumberIsTooLargeException;
023 import org.apache.commons.math3.exception.util.LocalizedFormats;
024 import org.apache.commons.math3.util.FastMath;
025 import org.apache.commons.math3.random.RandomGenerator;
026 import org.apache.commons.math3.random.Well19937c;
027
028 /**
029 * Implementation of the hypergeometric distribution.
030 *
031 * @see <a href="http://en.wikipedia.org/wiki/Hypergeometric_distribution">Hypergeometric distribution (Wikipedia)</a>
032 * @see <a href="http://mathworld.wolfram.com/HypergeometricDistribution.html">Hypergeometric distribution (MathWorld)</a>
033 * @version $Id: HypergeometricDistribution.java 1416643 2012-12-03 19:37:14Z tn $
034 */
035 public class HypergeometricDistribution extends AbstractIntegerDistribution {
036 /** Serializable version identifier. */
037 private static final long serialVersionUID = -436928820673516179L;
038 /** The number of successes in the population. */
039 private final int numberOfSuccesses;
040 /** The population size. */
041 private final int populationSize;
042 /** The sample size. */
043 private final int sampleSize;
044 /** Cached numerical variance */
045 private double numericalVariance = Double.NaN;
046 /** Whether or not the numerical variance has been calculated */
047 private boolean numericalVarianceIsCalculated = false;
048
049 /**
050 * Construct a new hypergeometric distribution with the specified population
051 * size, number of successes in the population, and sample size.
052 *
053 * @param populationSize Population size.
054 * @param numberOfSuccesses Number of successes in the population.
055 * @param sampleSize Sample size.
056 * @throws NotPositiveException if {@code numberOfSuccesses < 0}.
057 * @throws NotStrictlyPositiveException if {@code populationSize <= 0}.
058 * @throws NumberIsTooLargeException if {@code numberOfSuccesses > populationSize},
059 * or {@code sampleSize > populationSize}.
060 */
061 public HypergeometricDistribution(int populationSize, int numberOfSuccesses, int sampleSize)
062 throws NotPositiveException, NotStrictlyPositiveException, NumberIsTooLargeException {
063 this(new Well19937c(), populationSize, numberOfSuccesses, sampleSize);
064 }
065
066 /**
067 * Creates a new hypergeometric distribution.
068 *
069 * @param rng Random number generator.
070 * @param populationSize Population size.
071 * @param numberOfSuccesses Number of successes in the population.
072 * @param sampleSize Sample size.
073 * @throws NotPositiveException if {@code numberOfSuccesses < 0}.
074 * @throws NotStrictlyPositiveException if {@code populationSize <= 0}.
075 * @throws NumberIsTooLargeException if {@code numberOfSuccesses > populationSize},
076 * or {@code sampleSize > populationSize}.
077 * @since 3.1
078 */
079 public HypergeometricDistribution(RandomGenerator rng,
080 int populationSize,
081 int numberOfSuccesses,
082 int sampleSize)
083 throws NotPositiveException, NotStrictlyPositiveException, NumberIsTooLargeException {
084 super(rng);
085
086 if (populationSize <= 0) {
087 throw new NotStrictlyPositiveException(LocalizedFormats.POPULATION_SIZE,
088 populationSize);
089 }
090 if (numberOfSuccesses < 0) {
091 throw new NotPositiveException(LocalizedFormats.NUMBER_OF_SUCCESSES,
092 numberOfSuccesses);
093 }
094 if (sampleSize < 0) {
095 throw new NotPositiveException(LocalizedFormats.NUMBER_OF_SAMPLES,
096 sampleSize);
097 }
098
099 if (numberOfSuccesses > populationSize) {
100 throw new NumberIsTooLargeException(LocalizedFormats.NUMBER_OF_SUCCESS_LARGER_THAN_POPULATION_SIZE,
101 numberOfSuccesses, populationSize, true);
102 }
103 if (sampleSize > populationSize) {
104 throw new NumberIsTooLargeException(LocalizedFormats.SAMPLE_SIZE_LARGER_THAN_POPULATION_SIZE,
105 sampleSize, populationSize, true);
106 }
107
108 this.numberOfSuccesses = numberOfSuccesses;
109 this.populationSize = populationSize;
110 this.sampleSize = sampleSize;
111 }
112
113 /** {@inheritDoc} */
114 public double cumulativeProbability(int x) {
115 double ret;
116
117 int[] domain = getDomain(populationSize, numberOfSuccesses, sampleSize);
118 if (x < domain[0]) {
119 ret = 0.0;
120 } else if (x >= domain[1]) {
121 ret = 1.0;
122 } else {
123 ret = innerCumulativeProbability(domain[0], x, 1);
124 }
125
126 return ret;
127 }
128
129 /**
130 * Return the domain for the given hypergeometric distribution parameters.
131 *
132 * @param n Population size.
133 * @param m Number of successes in the population.
134 * @param k Sample size.
135 * @return a two element array containing the lower and upper bounds of the
136 * hypergeometric distribution.
137 */
138 private int[] getDomain(int n, int m, int k) {
139 return new int[] { getLowerDomain(n, m, k), getUpperDomain(m, k) };
140 }
141
142 /**
143 * Return the lowest domain value for the given hypergeometric distribution
144 * parameters.
145 *
146 * @param n Population size.
147 * @param m Number of successes in the population.
148 * @param k Sample size.
149 * @return the lowest domain value of the hypergeometric distribution.
150 */
151 private int getLowerDomain(int n, int m, int k) {
152 return FastMath.max(0, m - (n - k));
153 }
154
155 /**
156 * Access the number of successes.
157 *
158 * @return the number of successes.
159 */
160 public int getNumberOfSuccesses() {
161 return numberOfSuccesses;
162 }
163
164 /**
165 * Access the population size.
166 *
167 * @return the population size.
168 */
169 public int getPopulationSize() {
170 return populationSize;
171 }
172
173 /**
174 * Access the sample size.
175 *
176 * @return the sample size.
177 */
178 public int getSampleSize() {
179 return sampleSize;
180 }
181
182 /**
183 * Return the highest domain value for the given hypergeometric distribution
184 * parameters.
185 *
186 * @param m Number of successes in the population.
187 * @param k Sample size.
188 * @return the highest domain value of the hypergeometric distribution.
189 */
190 private int getUpperDomain(int m, int k) {
191 return FastMath.min(k, m);
192 }
193
194 /** {@inheritDoc} */
195 public double probability(int x) {
196 double ret;
197
198 int[] domain = getDomain(populationSize, numberOfSuccesses, sampleSize);
199 if (x < domain[0] || x > domain[1]) {
200 ret = 0.0;
201 } else {
202 double p = (double) sampleSize / (double) populationSize;
203 double q = (double) (populationSize - sampleSize) / (double) populationSize;
204 double p1 = SaddlePointExpansion.logBinomialProbability(x,
205 numberOfSuccesses, p, q);
206 double p2 =
207 SaddlePointExpansion.logBinomialProbability(sampleSize - x,
208 populationSize - numberOfSuccesses, p, q);
209 double p3 =
210 SaddlePointExpansion.logBinomialProbability(sampleSize, populationSize, p, q);
211 ret = FastMath.exp(p1 + p2 - p3);
212 }
213
214 return ret;
215 }
216
217 /**
218 * For this distribution, {@code X}, this method returns {@code P(X >= x)}.
219 *
220 * @param x Value at which the CDF is evaluated.
221 * @return the upper tail CDF for this distribution.
222 * @since 1.1
223 */
224 public double upperCumulativeProbability(int x) {
225 double ret;
226
227 final int[] domain = getDomain(populationSize, numberOfSuccesses, sampleSize);
228 if (x <= domain[0]) {
229 ret = 1.0;
230 } else if (x > domain[1]) {
231 ret = 0.0;
232 } else {
233 ret = innerCumulativeProbability(domain[1], x, -1);
234 }
235
236 return ret;
237 }
238
239 /**
240 * For this distribution, {@code X}, this method returns
241 * {@code P(x0 <= X <= x1)}.
242 * This probability is computed by summing the point probabilities for the
243 * values {@code x0, x0 + 1, x0 + 2, ..., x1}, in the order directed by
244 * {@code dx}.
245 *
246 * @param x0 Inclusive lower bound.
247 * @param x1 Inclusive upper bound.
248 * @param dx Direction of summation (1 indicates summing from x0 to x1, and
249 * 0 indicates summing from x1 to x0).
250 * @return {@code P(x0 <= X <= x1)}.
251 */
252 private double innerCumulativeProbability(int x0, int x1, int dx) {
253 double ret = probability(x0);
254 while (x0 != x1) {
255 x0 += dx;
256 ret += probability(x0);
257 }
258 return ret;
259 }
260
261 /**
262 * {@inheritDoc}
263 *
264 * For population size {@code N}, number of successes {@code m}, and sample
265 * size {@code n}, the mean is {@code n * m / N}.
266 */
267 public double getNumericalMean() {
268 return (double) (getSampleSize() * getNumberOfSuccesses()) / (double) getPopulationSize();
269 }
270
271 /**
272 * {@inheritDoc}
273 *
274 * For population size {@code N}, number of successes {@code m}, and sample
275 * size {@code n}, the variance is
276 * {@code [n * m * (N - n) * (N - m)] / [N^2 * (N - 1)]}.
277 */
278 public double getNumericalVariance() {
279 if (!numericalVarianceIsCalculated) {
280 numericalVariance = calculateNumericalVariance();
281 numericalVarianceIsCalculated = true;
282 }
283 return numericalVariance;
284 }
285
286 /**
287 * Used by {@link #getNumericalVariance()}.
288 *
289 * @return the variance of this distribution
290 */
291 protected double calculateNumericalVariance() {
292 final double N = getPopulationSize();
293 final double m = getNumberOfSuccesses();
294 final double n = getSampleSize();
295 return (n * m * (N - n) * (N - m)) / (N * N * (N - 1));
296 }
297
298 /**
299 * {@inheritDoc}
300 *
301 * For population size {@code N}, number of successes {@code m}, and sample
302 * size {@code n}, the lower bound of the support is
303 * {@code max(0, n + m - N)}.
304 *
305 * @return lower bound of the support
306 */
307 public int getSupportLowerBound() {
308 return FastMath.max(0,
309 getSampleSize() + getNumberOfSuccesses() - getPopulationSize());
310 }
311
312 /**
313 * {@inheritDoc}
314 *
315 * For number of successes {@code m} and sample size {@code n}, the upper
316 * bound of the support is {@code min(m, n)}.
317 *
318 * @return upper bound of the support
319 */
320 public int getSupportUpperBound() {
321 return FastMath.min(getNumberOfSuccesses(), getSampleSize());
322 }
323
324 /**
325 * {@inheritDoc}
326 *
327 * The support of this distribution is connected.
328 *
329 * @return {@code true}
330 */
331 public boolean isSupportConnected() {
332 return true;
333 }
334 }