001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017 package org.apache.commons.math3.stat.inference;
018
019 import org.apache.commons.math3.distribution.TDistribution;
020 import org.apache.commons.math3.exception.DimensionMismatchException;
021 import org.apache.commons.math3.exception.MathIllegalArgumentException;
022 import org.apache.commons.math3.exception.MaxCountExceededException;
023 import org.apache.commons.math3.exception.NoDataException;
024 import org.apache.commons.math3.exception.NotStrictlyPositiveException;
025 import org.apache.commons.math3.exception.NullArgumentException;
026 import org.apache.commons.math3.exception.NumberIsTooSmallException;
027 import org.apache.commons.math3.exception.OutOfRangeException;
028 import org.apache.commons.math3.exception.util.LocalizedFormats;
029 import org.apache.commons.math3.stat.StatUtils;
030 import org.apache.commons.math3.stat.descriptive.StatisticalSummary;
031 import org.apache.commons.math3.util.FastMath;
032
033 /**
034 * An implementation for Student's t-tests.
035 * <p>
036 * Tests can be:<ul>
037 * <li>One-sample or two-sample</li>
038 * <li>One-sided or two-sided</li>
039 * <li>Paired or unpaired (for two-sample tests)</li>
040 * <li>Homoscedastic (equal variance assumption) or heteroscedastic
041 * (for two sample tests)</li>
042 * <li>Fixed significance level (boolean-valued) or returning p-values.
043 * </li></ul></p>
044 * <p>
045 * Test statistics are available for all tests. Methods including "Test" in
046 * in their names perform tests, all other methods return t-statistics. Among
047 * the "Test" methods, <code>double-</code>valued methods return p-values;
048 * <code>boolean-</code>valued methods perform fixed significance level tests.
049 * Significance levels are always specified as numbers between 0 and 0.5
050 * (e.g. tests at the 95% level use <code>alpha=0.05</code>).</p>
051 * <p>
052 * Input to tests can be either <code>double[]</code> arrays or
053 * {@link StatisticalSummary} instances.</p><p>
054 * Uses commons-math {@link org.apache.commons.math3.distribution.TDistribution}
055 * implementation to estimate exact p-values.</p>
056 *
057 * @version $Id: TTest.java 1416643 2012-12-03 19:37:14Z tn $
058 */
059 public class TTest {
060 /**
061 * Computes a paired, 2-sample t-statistic based on the data in the input
062 * arrays. The t-statistic returned is equivalent to what would be returned by
063 * computing the one-sample t-statistic {@link #t(double, double[])}, with
064 * <code>mu = 0</code> and the sample array consisting of the (signed)
065 * differences between corresponding entries in <code>sample1</code> and
066 * <code>sample2.</code>
067 * <p>
068 * <strong>Preconditions</strong>: <ul>
069 * <li>The input arrays must have the same length and their common length
070 * must be at least 2.
071 * </li></ul></p>
072 *
073 * @param sample1 array of sample data values
074 * @param sample2 array of sample data values
075 * @return t statistic
076 * @throws NullArgumentException if the arrays are <code>null</code>
077 * @throws NoDataException if the arrays are empty
078 * @throws DimensionMismatchException if the length of the arrays is not equal
079 * @throws NumberIsTooSmallException if the length of the arrays is < 2
080 */
081 public double pairedT(final double[] sample1, final double[] sample2)
082 throws NullArgumentException, NoDataException,
083 DimensionMismatchException, NumberIsTooSmallException {
084
085 checkSampleData(sample1);
086 checkSampleData(sample2);
087 double meanDifference = StatUtils.meanDifference(sample1, sample2);
088 return t(meanDifference, 0,
089 StatUtils.varianceDifference(sample1, sample2, meanDifference),
090 sample1.length);
091
092 }
093
094 /**
095 * Returns the <i>observed significance level</i>, or
096 * <i> p-value</i>, associated with a paired, two-sample, two-tailed t-test
097 * based on the data in the input arrays.
098 * <p>
099 * The number returned is the smallest significance level
100 * at which one can reject the null hypothesis that the mean of the paired
101 * differences is 0 in favor of the two-sided alternative that the mean paired
102 * difference is not equal to 0. For a one-sided test, divide the returned
103 * value by 2.</p>
104 * <p>
105 * This test is equivalent to a one-sample t-test computed using
106 * {@link #tTest(double, double[])} with <code>mu = 0</code> and the sample
107 * array consisting of the signed differences between corresponding elements of
108 * <code>sample1</code> and <code>sample2.</code></p>
109 * <p>
110 * <strong>Usage Note:</strong><br>
111 * The validity of the p-value depends on the assumptions of the parametric
112 * t-test procedure, as discussed
113 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
114 * here</a></p>
115 * <p>
116 * <strong>Preconditions</strong>: <ul>
117 * <li>The input array lengths must be the same and their common length must
118 * be at least 2.
119 * </li></ul></p>
120 *
121 * @param sample1 array of sample data values
122 * @param sample2 array of sample data values
123 * @return p-value for t-test
124 * @throws NullArgumentException if the arrays are <code>null</code>
125 * @throws NoDataException if the arrays are empty
126 * @throws DimensionMismatchException if the length of the arrays is not equal
127 * @throws NumberIsTooSmallException if the length of the arrays is < 2
128 * @throws MaxCountExceededException if an error occurs computing the p-value
129 */
130 public double pairedTTest(final double[] sample1, final double[] sample2)
131 throws NullArgumentException, NoDataException, DimensionMismatchException,
132 NumberIsTooSmallException, MaxCountExceededException {
133
134 double meanDifference = StatUtils.meanDifference(sample1, sample2);
135 return tTest(meanDifference, 0,
136 StatUtils.varianceDifference(sample1, sample2, meanDifference),
137 sample1.length);
138
139 }
140
141 /**
142 * Performs a paired t-test evaluating the null hypothesis that the
143 * mean of the paired differences between <code>sample1</code> and
144 * <code>sample2</code> is 0 in favor of the two-sided alternative that the
145 * mean paired difference is not equal to 0, with significance level
146 * <code>alpha</code>.
147 * <p>
148 * Returns <code>true</code> iff the null hypothesis can be rejected with
149 * confidence <code>1 - alpha</code>. To perform a 1-sided test, use
150 * <code>alpha * 2</code></p>
151 * <p>
152 * <strong>Usage Note:</strong><br>
153 * The validity of the test depends on the assumptions of the parametric
154 * t-test procedure, as discussed
155 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
156 * here</a></p>
157 * <p>
158 * <strong>Preconditions</strong>: <ul>
159 * <li>The input array lengths must be the same and their common length
160 * must be at least 2.
161 * </li>
162 * <li> <code> 0 < alpha < 0.5 </code>
163 * </li></ul></p>
164 *
165 * @param sample1 array of sample data values
166 * @param sample2 array of sample data values
167 * @param alpha significance level of the test
168 * @return true if the null hypothesis can be rejected with
169 * confidence 1 - alpha
170 * @throws NullArgumentException if the arrays are <code>null</code>
171 * @throws NoDataException if the arrays are empty
172 * @throws DimensionMismatchException if the length of the arrays is not equal
173 * @throws NumberIsTooSmallException if the length of the arrays is < 2
174 * @throws OutOfRangeException if <code>alpha</code> is not in the range (0, 0.5]
175 * @throws MaxCountExceededException if an error occurs computing the p-value
176 */
177 public boolean pairedTTest(final double[] sample1, final double[] sample2,
178 final double alpha)
179 throws NullArgumentException, NoDataException, DimensionMismatchException,
180 NumberIsTooSmallException, OutOfRangeException, MaxCountExceededException {
181
182 checkSignificanceLevel(alpha);
183 return pairedTTest(sample1, sample2) < alpha;
184
185 }
186
187 /**
188 * Computes a <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc22.htm#formula">
189 * t statistic </a> given observed values and a comparison constant.
190 * <p>
191 * This statistic can be used to perform a one sample t-test for the mean.
192 * </p><p>
193 * <strong>Preconditions</strong>: <ul>
194 * <li>The observed array length must be at least 2.
195 * </li></ul></p>
196 *
197 * @param mu comparison constant
198 * @param observed array of values
199 * @return t statistic
200 * @throws NullArgumentException if <code>observed</code> is <code>null</code>
201 * @throws NumberIsTooSmallException if the length of <code>observed</code> is < 2
202 */
203 public double t(final double mu, final double[] observed)
204 throws NullArgumentException, NumberIsTooSmallException {
205
206 checkSampleData(observed);
207 // No try-catch or advertised exception because args have just been checked
208 return t(StatUtils.mean(observed), mu, StatUtils.variance(observed),
209 observed.length);
210
211 }
212
213 /**
214 * Computes a <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc22.htm#formula">
215 * t statistic </a> to use in comparing the mean of the dataset described by
216 * <code>sampleStats</code> to <code>mu</code>.
217 * <p>
218 * This statistic can be used to perform a one sample t-test for the mean.
219 * </p><p>
220 * <strong>Preconditions</strong>: <ul>
221 * <li><code>observed.getN() ≥ 2</code>.
222 * </li></ul></p>
223 *
224 * @param mu comparison constant
225 * @param sampleStats DescriptiveStatistics holding sample summary statitstics
226 * @return t statistic
227 * @throws NullArgumentException if <code>sampleStats</code> is <code>null</code>
228 * @throws NumberIsTooSmallException if the number of samples is < 2
229 */
230 public double t(final double mu, final StatisticalSummary sampleStats)
231 throws NullArgumentException, NumberIsTooSmallException {
232
233 checkSampleData(sampleStats);
234 return t(sampleStats.getMean(), mu, sampleStats.getVariance(),
235 sampleStats.getN());
236
237 }
238
239 /**
240 * Computes a 2-sample t statistic, under the hypothesis of equal
241 * subpopulation variances. To compute a t-statistic without the
242 * equal variances hypothesis, use {@link #t(double[], double[])}.
243 * <p>
244 * This statistic can be used to perform a (homoscedastic) two-sample
245 * t-test to compare sample means.</p>
246 * <p>
247 * The t-statistic is</p>
248 * <p>
249 * <code> t = (m1 - m2) / (sqrt(1/n1 +1/n2) sqrt(var))</code>
250 * </p><p>
251 * where <strong><code>n1</code></strong> is the size of first sample;
252 * <strong><code> n2</code></strong> is the size of second sample;
253 * <strong><code> m1</code></strong> is the mean of first sample;
254 * <strong><code> m2</code></strong> is the mean of second sample</li>
255 * </ul>
256 * and <strong><code>var</code></strong> is the pooled variance estimate:
257 * </p><p>
258 * <code>var = sqrt(((n1 - 1)var1 + (n2 - 1)var2) / ((n1-1) + (n2-1)))</code>
259 * </p><p>
260 * with <strong><code>var1</code></strong> the variance of the first sample and
261 * <strong><code>var2</code></strong> the variance of the second sample.
262 * </p><p>
263 * <strong>Preconditions</strong>: <ul>
264 * <li>The observed array lengths must both be at least 2.
265 * </li></ul></p>
266 *
267 * @param sample1 array of sample data values
268 * @param sample2 array of sample data values
269 * @return t statistic
270 * @throws NullArgumentException if the arrays are <code>null</code>
271 * @throws NumberIsTooSmallException if the length of the arrays is < 2
272 */
273 public double homoscedasticT(final double[] sample1, final double[] sample2)
274 throws NullArgumentException, NumberIsTooSmallException {
275
276 checkSampleData(sample1);
277 checkSampleData(sample2);
278 // No try-catch or advertised exception because args have just been checked
279 return homoscedasticT(StatUtils.mean(sample1), StatUtils.mean(sample2),
280 StatUtils.variance(sample1), StatUtils.variance(sample2),
281 sample1.length, sample2.length);
282
283 }
284
285 /**
286 * Computes a 2-sample t statistic, without the hypothesis of equal
287 * subpopulation variances. To compute a t-statistic assuming equal
288 * variances, use {@link #homoscedasticT(double[], double[])}.
289 * <p>
290 * This statistic can be used to perform a two-sample t-test to compare
291 * sample means.</p>
292 * <p>
293 * The t-statistic is</p>
294 * <p>
295 * <code> t = (m1 - m2) / sqrt(var1/n1 + var2/n2)</code>
296 * </p><p>
297 * where <strong><code>n1</code></strong> is the size of the first sample
298 * <strong><code> n2</code></strong> is the size of the second sample;
299 * <strong><code> m1</code></strong> is the mean of the first sample;
300 * <strong><code> m2</code></strong> is the mean of the second sample;
301 * <strong><code> var1</code></strong> is the variance of the first sample;
302 * <strong><code> var2</code></strong> is the variance of the second sample;
303 * </p><p>
304 * <strong>Preconditions</strong>: <ul>
305 * <li>The observed array lengths must both be at least 2.
306 * </li></ul></p>
307 *
308 * @param sample1 array of sample data values
309 * @param sample2 array of sample data values
310 * @return t statistic
311 * @throws NullArgumentException if the arrays are <code>null</code>
312 * @throws NumberIsTooSmallException if the length of the arrays is < 2
313 */
314 public double t(final double[] sample1, final double[] sample2)
315 throws NullArgumentException, NumberIsTooSmallException {
316
317 checkSampleData(sample1);
318 checkSampleData(sample2);
319 // No try-catch or advertised exception because args have just been checked
320 return t(StatUtils.mean(sample1), StatUtils.mean(sample2),
321 StatUtils.variance(sample1), StatUtils.variance(sample2),
322 sample1.length, sample2.length);
323
324 }
325
326 /**
327 * Computes a 2-sample t statistic </a>, comparing the means of the datasets
328 * described by two {@link StatisticalSummary} instances, without the
329 * assumption of equal subpopulation variances. Use
330 * {@link #homoscedasticT(StatisticalSummary, StatisticalSummary)} to
331 * compute a t-statistic under the equal variances assumption.
332 * <p>
333 * This statistic can be used to perform a two-sample t-test to compare
334 * sample means.</p>
335 * <p>
336 * The returned t-statistic is</p>
337 * <p>
338 * <code> t = (m1 - m2) / sqrt(var1/n1 + var2/n2)</code>
339 * </p><p>
340 * where <strong><code>n1</code></strong> is the size of the first sample;
341 * <strong><code> n2</code></strong> is the size of the second sample;
342 * <strong><code> m1</code></strong> is the mean of the first sample;
343 * <strong><code> m2</code></strong> is the mean of the second sample
344 * <strong><code> var1</code></strong> is the variance of the first sample;
345 * <strong><code> var2</code></strong> is the variance of the second sample
346 * </p><p>
347 * <strong>Preconditions</strong>: <ul>
348 * <li>The datasets described by the two Univariates must each contain
349 * at least 2 observations.
350 * </li></ul></p>
351 *
352 * @param sampleStats1 StatisticalSummary describing data from the first sample
353 * @param sampleStats2 StatisticalSummary describing data from the second sample
354 * @return t statistic
355 * @throws NullArgumentException if the sample statistics are <code>null</code>
356 * @throws NumberIsTooSmallException if the number of samples is < 2
357 */
358 public double t(final StatisticalSummary sampleStats1,
359 final StatisticalSummary sampleStats2)
360 throws NullArgumentException, NumberIsTooSmallException {
361
362 checkSampleData(sampleStats1);
363 checkSampleData(sampleStats2);
364 return t(sampleStats1.getMean(), sampleStats2.getMean(),
365 sampleStats1.getVariance(), sampleStats2.getVariance(),
366 sampleStats1.getN(), sampleStats2.getN());
367
368 }
369
370 /**
371 * Computes a 2-sample t statistic, comparing the means of the datasets
372 * described by two {@link StatisticalSummary} instances, under the
373 * assumption of equal subpopulation variances. To compute a t-statistic
374 * without the equal variances assumption, use
375 * {@link #t(StatisticalSummary, StatisticalSummary)}.
376 * <p>
377 * This statistic can be used to perform a (homoscedastic) two-sample
378 * t-test to compare sample means.</p>
379 * <p>
380 * The t-statistic returned is</p>
381 * <p>
382 * <code> t = (m1 - m2) / (sqrt(1/n1 +1/n2) sqrt(var))</code>
383 * </p><p>
384 * where <strong><code>n1</code></strong> is the size of first sample;
385 * <strong><code> n2</code></strong> is the size of second sample;
386 * <strong><code> m1</code></strong> is the mean of first sample;
387 * <strong><code> m2</code></strong> is the mean of second sample
388 * and <strong><code>var</code></strong> is the pooled variance estimate:
389 * </p><p>
390 * <code>var = sqrt(((n1 - 1)var1 + (n2 - 1)var2) / ((n1-1) + (n2-1)))</code>
391 * </p><p>
392 * with <strong><code>var1</code></strong> the variance of the first sample and
393 * <strong><code>var2</code></strong> the variance of the second sample.
394 * </p><p>
395 * <strong>Preconditions</strong>: <ul>
396 * <li>The datasets described by the two Univariates must each contain
397 * at least 2 observations.
398 * </li></ul></p>
399 *
400 * @param sampleStats1 StatisticalSummary describing data from the first sample
401 * @param sampleStats2 StatisticalSummary describing data from the second sample
402 * @return t statistic
403 * @throws NullArgumentException if the sample statistics are <code>null</code>
404 * @throws NumberIsTooSmallException if the number of samples is < 2
405 */
406 public double homoscedasticT(final StatisticalSummary sampleStats1,
407 final StatisticalSummary sampleStats2)
408 throws NullArgumentException, NumberIsTooSmallException {
409
410 checkSampleData(sampleStats1);
411 checkSampleData(sampleStats2);
412 return homoscedasticT(sampleStats1.getMean(), sampleStats2.getMean(),
413 sampleStats1.getVariance(), sampleStats2.getVariance(),
414 sampleStats1.getN(), sampleStats2.getN());
415
416 }
417
418 /**
419 * Returns the <i>observed significance level</i>, or
420 * <i>p-value</i>, associated with a one-sample, two-tailed t-test
421 * comparing the mean of the input array with the constant <code>mu</code>.
422 * <p>
423 * The number returned is the smallest significance level
424 * at which one can reject the null hypothesis that the mean equals
425 * <code>mu</code> in favor of the two-sided alternative that the mean
426 * is different from <code>mu</code>. For a one-sided test, divide the
427 * returned value by 2.</p>
428 * <p>
429 * <strong>Usage Note:</strong><br>
430 * The validity of the test depends on the assumptions of the parametric
431 * t-test procedure, as discussed
432 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">here</a>
433 * </p><p>
434 * <strong>Preconditions</strong>: <ul>
435 * <li>The observed array length must be at least 2.
436 * </li></ul></p>
437 *
438 * @param mu constant value to compare sample mean against
439 * @param sample array of sample data values
440 * @return p-value
441 * @throws NullArgumentException if the sample array is <code>null</code>
442 * @throws NumberIsTooSmallException if the length of the array is < 2
443 * @throws MaxCountExceededException if an error occurs computing the p-value
444 */
445 public double tTest(final double mu, final double[] sample)
446 throws NullArgumentException, NumberIsTooSmallException,
447 MaxCountExceededException {
448
449 checkSampleData(sample);
450 // No try-catch or advertised exception because args have just been checked
451 return tTest(StatUtils.mean(sample), mu, StatUtils.variance(sample),
452 sample.length);
453
454 }
455
456 /**
457 * Performs a <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
458 * two-sided t-test</a> evaluating the null hypothesis that the mean of the population from
459 * which <code>sample</code> is drawn equals <code>mu</code>.
460 * <p>
461 * Returns <code>true</code> iff the null hypothesis can be
462 * rejected with confidence <code>1 - alpha</code>. To
463 * perform a 1-sided test, use <code>alpha * 2</code></p>
464 * <p>
465 * <strong>Examples:</strong><br><ol>
466 * <li>To test the (2-sided) hypothesis <code>sample mean = mu </code> at
467 * the 95% level, use <br><code>tTest(mu, sample, 0.05) </code>
468 * </li>
469 * <li>To test the (one-sided) hypothesis <code> sample mean < mu </code>
470 * at the 99% level, first verify that the measured sample mean is less
471 * than <code>mu</code> and then use
472 * <br><code>tTest(mu, sample, 0.02) </code>
473 * </li></ol></p>
474 * <p>
475 * <strong>Usage Note:</strong><br>
476 * The validity of the test depends on the assumptions of the one-sample
477 * parametric t-test procedure, as discussed
478 * <a href="http://www.basic.nwu.edu/statguidefiles/sg_glos.html#one-sample">here</a>
479 * </p><p>
480 * <strong>Preconditions</strong>: <ul>
481 * <li>The observed array length must be at least 2.
482 * </li></ul></p>
483 *
484 * @param mu constant value to compare sample mean against
485 * @param sample array of sample data values
486 * @param alpha significance level of the test
487 * @return p-value
488 * @throws NullArgumentException if the sample array is <code>null</code>
489 * @throws NumberIsTooSmallException if the length of the array is < 2
490 * @throws OutOfRangeException if <code>alpha</code> is not in the range (0, 0.5]
491 * @throws MaxCountExceededException if an error computing the p-value
492 */
493 public boolean tTest(final double mu, final double[] sample, final double alpha)
494 throws NullArgumentException, NumberIsTooSmallException,
495 OutOfRangeException, MaxCountExceededException {
496
497 checkSignificanceLevel(alpha);
498 return tTest(mu, sample) < alpha;
499
500 }
501
502 /**
503 * Returns the <i>observed significance level</i>, or
504 * <i>p-value</i>, associated with a one-sample, two-tailed t-test
505 * comparing the mean of the dataset described by <code>sampleStats</code>
506 * with the constant <code>mu</code>.
507 * <p>
508 * The number returned is the smallest significance level
509 * at which one can reject the null hypothesis that the mean equals
510 * <code>mu</code> in favor of the two-sided alternative that the mean
511 * is different from <code>mu</code>. For a one-sided test, divide the
512 * returned value by 2.</p>
513 * <p>
514 * <strong>Usage Note:</strong><br>
515 * The validity of the test depends on the assumptions of the parametric
516 * t-test procedure, as discussed
517 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
518 * here</a></p>
519 * <p>
520 * <strong>Preconditions</strong>: <ul>
521 * <li>The sample must contain at least 2 observations.
522 * </li></ul></p>
523 *
524 * @param mu constant value to compare sample mean against
525 * @param sampleStats StatisticalSummary describing sample data
526 * @return p-value
527 * @throws NullArgumentException if <code>sampleStats</code> is <code>null</code>
528 * @throws NumberIsTooSmallException if the number of samples is < 2
529 * @throws MaxCountExceededException if an error occurs computing the p-value
530 */
531 public double tTest(final double mu, final StatisticalSummary sampleStats)
532 throws NullArgumentException, NumberIsTooSmallException,
533 MaxCountExceededException {
534
535 checkSampleData(sampleStats);
536 return tTest(sampleStats.getMean(), mu, sampleStats.getVariance(),
537 sampleStats.getN());
538
539 }
540
541 /**
542 * Performs a <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
543 * two-sided t-test</a> evaluating the null hypothesis that the mean of the
544 * population from which the dataset described by <code>stats</code> is
545 * drawn equals <code>mu</code>.
546 * <p>
547 * Returns <code>true</code> iff the null hypothesis can be rejected with
548 * confidence <code>1 - alpha</code>. To perform a 1-sided test, use
549 * <code>alpha * 2.</code></p>
550 * <p>
551 * <strong>Examples:</strong><br><ol>
552 * <li>To test the (2-sided) hypothesis <code>sample mean = mu </code> at
553 * the 95% level, use <br><code>tTest(mu, sampleStats, 0.05) </code>
554 * </li>
555 * <li>To test the (one-sided) hypothesis <code> sample mean < mu </code>
556 * at the 99% level, first verify that the measured sample mean is less
557 * than <code>mu</code> and then use
558 * <br><code>tTest(mu, sampleStats, 0.02) </code>
559 * </li></ol></p>
560 * <p>
561 * <strong>Usage Note:</strong><br>
562 * The validity of the test depends on the assumptions of the one-sample
563 * parametric t-test procedure, as discussed
564 * <a href="http://www.basic.nwu.edu/statguidefiles/sg_glos.html#one-sample">here</a>
565 * </p><p>
566 * <strong>Preconditions</strong>: <ul>
567 * <li>The sample must include at least 2 observations.
568 * </li></ul></p>
569 *
570 * @param mu constant value to compare sample mean against
571 * @param sampleStats StatisticalSummary describing sample data values
572 * @param alpha significance level of the test
573 * @return p-value
574 * @throws NullArgumentException if <code>sampleStats</code> is <code>null</code>
575 * @throws NumberIsTooSmallException if the number of samples is < 2
576 * @throws OutOfRangeException if <code>alpha</code> is not in the range (0, 0.5]
577 * @throws MaxCountExceededException if an error occurs computing the p-value
578 */
579 public boolean tTest(final double mu, final StatisticalSummary sampleStats,
580 final double alpha)
581 throws NullArgumentException, NumberIsTooSmallException,
582 OutOfRangeException, MaxCountExceededException {
583
584 checkSignificanceLevel(alpha);
585 return tTest(mu, sampleStats) < alpha;
586
587 }
588
589 /**
590 * Returns the <i>observed significance level</i>, or
591 * <i>p-value</i>, associated with a two-sample, two-tailed t-test
592 * comparing the means of the input arrays.
593 * <p>
594 * The number returned is the smallest significance level
595 * at which one can reject the null hypothesis that the two means are
596 * equal in favor of the two-sided alternative that they are different.
597 * For a one-sided test, divide the returned value by 2.</p>
598 * <p>
599 * The test does not assume that the underlying popuation variances are
600 * equal and it uses approximated degrees of freedom computed from the
601 * sample data to compute the p-value. The t-statistic used is as defined in
602 * {@link #t(double[], double[])} and the Welch-Satterthwaite approximation
603 * to the degrees of freedom is used,
604 * as described
605 * <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">
606 * here.</a> To perform the test under the assumption of equal subpopulation
607 * variances, use {@link #homoscedasticTTest(double[], double[])}.</p>
608 * <p>
609 * <strong>Usage Note:</strong><br>
610 * The validity of the p-value depends on the assumptions of the parametric
611 * t-test procedure, as discussed
612 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
613 * here</a></p>
614 * <p>
615 * <strong>Preconditions</strong>: <ul>
616 * <li>The observed array lengths must both be at least 2.
617 * </li></ul></p>
618 *
619 * @param sample1 array of sample data values
620 * @param sample2 array of sample data values
621 * @return p-value for t-test
622 * @throws NullArgumentException if the arrays are <code>null</code>
623 * @throws NumberIsTooSmallException if the length of the arrays is < 2
624 * @throws MaxCountExceededException if an error occurs computing the p-value
625 */
626 public double tTest(final double[] sample1, final double[] sample2)
627 throws NullArgumentException, NumberIsTooSmallException,
628 MaxCountExceededException {
629
630 checkSampleData(sample1);
631 checkSampleData(sample2);
632 // No try-catch or advertised exception because args have just been checked
633 return tTest(StatUtils.mean(sample1), StatUtils.mean(sample2),
634 StatUtils.variance(sample1), StatUtils.variance(sample2),
635 sample1.length, sample2.length);
636
637 }
638
639 /**
640 * Returns the <i>observed significance level</i>, or
641 * <i>p-value</i>, associated with a two-sample, two-tailed t-test
642 * comparing the means of the input arrays, under the assumption that
643 * the two samples are drawn from subpopulations with equal variances.
644 * To perform the test without the equal variances assumption, use
645 * {@link #tTest(double[], double[])}.</p>
646 * <p>
647 * The number returned is the smallest significance level
648 * at which one can reject the null hypothesis that the two means are
649 * equal in favor of the two-sided alternative that they are different.
650 * For a one-sided test, divide the returned value by 2.</p>
651 * <p>
652 * A pooled variance estimate is used to compute the t-statistic. See
653 * {@link #homoscedasticT(double[], double[])}. The sum of the sample sizes
654 * minus 2 is used as the degrees of freedom.</p>
655 * <p>
656 * <strong>Usage Note:</strong><br>
657 * The validity of the p-value depends on the assumptions of the parametric
658 * t-test procedure, as discussed
659 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
660 * here</a></p>
661 * <p>
662 * <strong>Preconditions</strong>: <ul>
663 * <li>The observed array lengths must both be at least 2.
664 * </li></ul></p>
665 *
666 * @param sample1 array of sample data values
667 * @param sample2 array of sample data values
668 * @return p-value for t-test
669 * @throws NullArgumentException if the arrays are <code>null</code>
670 * @throws NumberIsTooSmallException if the length of the arrays is < 2
671 * @throws MaxCountExceededException if an error occurs computing the p-value
672 */
673 public double homoscedasticTTest(final double[] sample1, final double[] sample2)
674 throws NullArgumentException, NumberIsTooSmallException,
675 MaxCountExceededException {
676
677 checkSampleData(sample1);
678 checkSampleData(sample2);
679 // No try-catch or advertised exception because args have just been checked
680 return homoscedasticTTest(StatUtils.mean(sample1),
681 StatUtils.mean(sample2),
682 StatUtils.variance(sample1),
683 StatUtils.variance(sample2),
684 sample1.length, sample2.length);
685
686 }
687
688 /**
689 * Performs a
690 * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
691 * two-sided t-test</a> evaluating the null hypothesis that <code>sample1</code>
692 * and <code>sample2</code> are drawn from populations with the same mean,
693 * with significance level <code>alpha</code>. This test does not assume
694 * that the subpopulation variances are equal. To perform the test assuming
695 * equal variances, use
696 * {@link #homoscedasticTTest(double[], double[], double)}.
697 * <p>
698 * Returns <code>true</code> iff the null hypothesis that the means are
699 * equal can be rejected with confidence <code>1 - alpha</code>. To
700 * perform a 1-sided test, use <code>alpha * 2</code></p>
701 * <p>
702 * See {@link #t(double[], double[])} for the formula used to compute the
703 * t-statistic. Degrees of freedom are approximated using the
704 * <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">
705 * Welch-Satterthwaite approximation.</a></p>
706 * <p>
707 * <strong>Examples:</strong><br><ol>
708 * <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at
709 * the 95% level, use
710 * <br><code>tTest(sample1, sample2, 0.05). </code>
711 * </li>
712 * <li>To test the (one-sided) hypothesis <code> mean 1 < mean 2 </code>,
713 * at the 99% level, first verify that the measured mean of <code>sample 1</code>
714 * is less than the mean of <code>sample 2</code> and then use
715 * <br><code>tTest(sample1, sample2, 0.02) </code>
716 * </li></ol></p>
717 * <p>
718 * <strong>Usage Note:</strong><br>
719 * The validity of the test depends on the assumptions of the parametric
720 * t-test procedure, as discussed
721 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
722 * here</a></p>
723 * <p>
724 * <strong>Preconditions</strong>: <ul>
725 * <li>The observed array lengths must both be at least 2.
726 * </li>
727 * <li> <code> 0 < alpha < 0.5 </code>
728 * </li></ul></p>
729 *
730 * @param sample1 array of sample data values
731 * @param sample2 array of sample data values
732 * @param alpha significance level of the test
733 * @return true if the null hypothesis can be rejected with
734 * confidence 1 - alpha
735 * @throws NullArgumentException if the arrays are <code>null</code>
736 * @throws NumberIsTooSmallException if the length of the arrays is < 2
737 * @throws OutOfRangeException if <code>alpha</code> is not in the range (0, 0.5]
738 * @throws MaxCountExceededException if an error occurs computing the p-value
739 */
740 public boolean tTest(final double[] sample1, final double[] sample2,
741 final double alpha)
742 throws NullArgumentException, NumberIsTooSmallException,
743 OutOfRangeException, MaxCountExceededException {
744
745 checkSignificanceLevel(alpha);
746 return tTest(sample1, sample2) < alpha;
747
748 }
749
750 /**
751 * Performs a
752 * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
753 * two-sided t-test</a> evaluating the null hypothesis that <code>sample1</code>
754 * and <code>sample2</code> are drawn from populations with the same mean,
755 * with significance level <code>alpha</code>, assuming that the
756 * subpopulation variances are equal. Use
757 * {@link #tTest(double[], double[], double)} to perform the test without
758 * the assumption of equal variances.
759 * <p>
760 * Returns <code>true</code> iff the null hypothesis that the means are
761 * equal can be rejected with confidence <code>1 - alpha</code>. To
762 * perform a 1-sided test, use <code>alpha * 2.</code> To perform the test
763 * without the assumption of equal subpopulation variances, use
764 * {@link #tTest(double[], double[], double)}.</p>
765 * <p>
766 * A pooled variance estimate is used to compute the t-statistic. See
767 * {@link #t(double[], double[])} for the formula. The sum of the sample
768 * sizes minus 2 is used as the degrees of freedom.</p>
769 * <p>
770 * <strong>Examples:</strong><br><ol>
771 * <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at
772 * the 95% level, use <br><code>tTest(sample1, sample2, 0.05). </code>
773 * </li>
774 * <li>To test the (one-sided) hypothesis <code> mean 1 < mean 2, </code>
775 * at the 99% level, first verify that the measured mean of
776 * <code>sample 1</code> is less than the mean of <code>sample 2</code>
777 * and then use
778 * <br><code>tTest(sample1, sample2, 0.02) </code>
779 * </li></ol></p>
780 * <p>
781 * <strong>Usage Note:</strong><br>
782 * The validity of the test depends on the assumptions of the parametric
783 * t-test procedure, as discussed
784 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
785 * here</a></p>
786 * <p>
787 * <strong>Preconditions</strong>: <ul>
788 * <li>The observed array lengths must both be at least 2.
789 * </li>
790 * <li> <code> 0 < alpha < 0.5 </code>
791 * </li></ul></p>
792 *
793 * @param sample1 array of sample data values
794 * @param sample2 array of sample data values
795 * @param alpha significance level of the test
796 * @return true if the null hypothesis can be rejected with
797 * confidence 1 - alpha
798 * @throws NullArgumentException if the arrays are <code>null</code>
799 * @throws NumberIsTooSmallException if the length of the arrays is < 2
800 * @throws OutOfRangeException if <code>alpha</code> is not in the range (0, 0.5]
801 * @throws MaxCountExceededException if an error occurs computing the p-value
802 */
803 public boolean homoscedasticTTest(final double[] sample1, final double[] sample2,
804 final double alpha)
805 throws NullArgumentException, NumberIsTooSmallException,
806 OutOfRangeException, MaxCountExceededException {
807
808 checkSignificanceLevel(alpha);
809 return homoscedasticTTest(sample1, sample2) < alpha;
810
811 }
812
813 /**
814 * Returns the <i>observed significance level</i>, or
815 * <i>p-value</i>, associated with a two-sample, two-tailed t-test
816 * comparing the means of the datasets described by two StatisticalSummary
817 * instances.
818 * <p>
819 * The number returned is the smallest significance level
820 * at which one can reject the null hypothesis that the two means are
821 * equal in favor of the two-sided alternative that they are different.
822 * For a one-sided test, divide the returned value by 2.</p>
823 * <p>
824 * The test does not assume that the underlying population variances are
825 * equal and it uses approximated degrees of freedom computed from the
826 * sample data to compute the p-value. To perform the test assuming
827 * equal variances, use
828 * {@link #homoscedasticTTest(StatisticalSummary, StatisticalSummary)}.</p>
829 * <p>
830 * <strong>Usage Note:</strong><br>
831 * The validity of the p-value depends on the assumptions of the parametric
832 * t-test procedure, as discussed
833 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
834 * here</a></p>
835 * <p>
836 * <strong>Preconditions</strong>: <ul>
837 * <li>The datasets described by the two Univariates must each contain
838 * at least 2 observations.
839 * </li></ul></p>
840 *
841 * @param sampleStats1 StatisticalSummary describing data from the first sample
842 * @param sampleStats2 StatisticalSummary describing data from the second sample
843 * @return p-value for t-test
844 * @throws NullArgumentException if the sample statistics are <code>null</code>
845 * @throws NumberIsTooSmallException if the number of samples is < 2
846 * @throws MaxCountExceededException if an error occurs computing the p-value
847 */
848 public double tTest(final StatisticalSummary sampleStats1,
849 final StatisticalSummary sampleStats2)
850 throws NullArgumentException, NumberIsTooSmallException,
851 MaxCountExceededException {
852
853 checkSampleData(sampleStats1);
854 checkSampleData(sampleStats2);
855 return tTest(sampleStats1.getMean(), sampleStats2.getMean(),
856 sampleStats1.getVariance(), sampleStats2.getVariance(),
857 sampleStats1.getN(), sampleStats2.getN());
858
859 }
860
861 /**
862 * Returns the <i>observed significance level</i>, or
863 * <i>p-value</i>, associated with a two-sample, two-tailed t-test
864 * comparing the means of the datasets described by two StatisticalSummary
865 * instances, under the hypothesis of equal subpopulation variances. To
866 * perform a test without the equal variances assumption, use
867 * {@link #tTest(StatisticalSummary, StatisticalSummary)}.
868 * <p>
869 * The number returned is the smallest significance level
870 * at which one can reject the null hypothesis that the two means are
871 * equal in favor of the two-sided alternative that they are different.
872 * For a one-sided test, divide the returned value by 2.</p>
873 * <p>
874 * See {@link #homoscedasticT(double[], double[])} for the formula used to
875 * compute the t-statistic. The sum of the sample sizes minus 2 is used as
876 * the degrees of freedom.</p>
877 * <p>
878 * <strong>Usage Note:</strong><br>
879 * The validity of the p-value depends on the assumptions of the parametric
880 * t-test procedure, as discussed
881 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">here</a>
882 * </p><p>
883 * <strong>Preconditions</strong>: <ul>
884 * <li>The datasets described by the two Univariates must each contain
885 * at least 2 observations.
886 * </li></ul></p>
887 *
888 * @param sampleStats1 StatisticalSummary describing data from the first sample
889 * @param sampleStats2 StatisticalSummary describing data from the second sample
890 * @return p-value for t-test
891 * @throws NullArgumentException if the sample statistics are <code>null</code>
892 * @throws NumberIsTooSmallException if the number of samples is < 2
893 * @throws MaxCountExceededException if an error occurs computing the p-value
894 */
895 public double homoscedasticTTest(final StatisticalSummary sampleStats1,
896 final StatisticalSummary sampleStats2)
897 throws NullArgumentException, NumberIsTooSmallException,
898 MaxCountExceededException {
899
900 checkSampleData(sampleStats1);
901 checkSampleData(sampleStats2);
902 return homoscedasticTTest(sampleStats1.getMean(),
903 sampleStats2.getMean(),
904 sampleStats1.getVariance(),
905 sampleStats2.getVariance(),
906 sampleStats1.getN(), sampleStats2.getN());
907
908 }
909
910 /**
911 * Performs a
912 * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
913 * two-sided t-test</a> evaluating the null hypothesis that
914 * <code>sampleStats1</code> and <code>sampleStats2</code> describe
915 * datasets drawn from populations with the same mean, with significance
916 * level <code>alpha</code>. This test does not assume that the
917 * subpopulation variances are equal. To perform the test under the equal
918 * variances assumption, use
919 * {@link #homoscedasticTTest(StatisticalSummary, StatisticalSummary)}.
920 * <p>
921 * Returns <code>true</code> iff the null hypothesis that the means are
922 * equal can be rejected with confidence <code>1 - alpha</code>. To
923 * perform a 1-sided test, use <code>alpha * 2</code></p>
924 * <p>
925 * See {@link #t(double[], double[])} for the formula used to compute the
926 * t-statistic. Degrees of freedom are approximated using the
927 * <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">
928 * Welch-Satterthwaite approximation.</a></p>
929 * <p>
930 * <strong>Examples:</strong><br><ol>
931 * <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at
932 * the 95%, use
933 * <br><code>tTest(sampleStats1, sampleStats2, 0.05) </code>
934 * </li>
935 * <li>To test the (one-sided) hypothesis <code> mean 1 < mean 2 </code>
936 * at the 99% level, first verify that the measured mean of
937 * <code>sample 1</code> is less than the mean of <code>sample 2</code>
938 * and then use
939 * <br><code>tTest(sampleStats1, sampleStats2, 0.02) </code>
940 * </li></ol></p>
941 * <p>
942 * <strong>Usage Note:</strong><br>
943 * The validity of the test depends on the assumptions of the parametric
944 * t-test procedure, as discussed
945 * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
946 * here</a></p>
947 * <p>
948 * <strong>Preconditions</strong>: <ul>
949 * <li>The datasets described by the two Univariates must each contain
950 * at least 2 observations.
951 * </li>
952 * <li> <code> 0 < alpha < 0.5 </code>
953 * </li></ul></p>
954 *
955 * @param sampleStats1 StatisticalSummary describing sample data values
956 * @param sampleStats2 StatisticalSummary describing sample data values
957 * @param alpha significance level of the test
958 * @return true if the null hypothesis can be rejected with
959 * confidence 1 - alpha
960 * @throws NullArgumentException if the sample statistics are <code>null</code>
961 * @throws NumberIsTooSmallException if the number of samples is < 2
962 * @throws OutOfRangeException if <code>alpha</code> is not in the range (0, 0.5]
963 * @throws MaxCountExceededException if an error occurs computing the p-value
964 */
965 public boolean tTest(final StatisticalSummary sampleStats1,
966 final StatisticalSummary sampleStats2,
967 final double alpha)
968 throws NullArgumentException, NumberIsTooSmallException,
969 OutOfRangeException, MaxCountExceededException {
970
971 checkSignificanceLevel(alpha);
972 return tTest(sampleStats1, sampleStats2) < alpha;
973
974 }
975
976 //----------------------------------------------- Protected methods
977
978 /**
979 * Computes approximate degrees of freedom for 2-sample t-test.
980 *
981 * @param v1 first sample variance
982 * @param v2 second sample variance
983 * @param n1 first sample n
984 * @param n2 second sample n
985 * @return approximate degrees of freedom
986 */
987 protected double df(double v1, double v2, double n1, double n2) {
988 return (((v1 / n1) + (v2 / n2)) * ((v1 / n1) + (v2 / n2))) /
989 ((v1 * v1) / (n1 * n1 * (n1 - 1d)) + (v2 * v2) /
990 (n2 * n2 * (n2 - 1d)));
991 }
992
993 /**
994 * Computes t test statistic for 1-sample t-test.
995 *
996 * @param m sample mean
997 * @param mu constant to test against
998 * @param v sample variance
999 * @param n sample n
1000 * @return t test statistic
1001 */
1002 protected double t(final double m, final double mu,
1003 final double v, final double n) {
1004 return (m - mu) / FastMath.sqrt(v / n);
1005 }
1006
1007 /**
1008 * Computes t test statistic for 2-sample t-test.
1009 * <p>
1010 * Does not assume that subpopulation variances are equal.</p>
1011 *
1012 * @param m1 first sample mean
1013 * @param m2 second sample mean
1014 * @param v1 first sample variance
1015 * @param v2 second sample variance
1016 * @param n1 first sample n
1017 * @param n2 second sample n
1018 * @return t test statistic
1019 */
1020 protected double t(final double m1, final double m2,
1021 final double v1, final double v2,
1022 final double n1, final double n2) {
1023 return (m1 - m2) / FastMath.sqrt((v1 / n1) + (v2 / n2));
1024 }
1025
1026 /**
1027 * Computes t test statistic for 2-sample t-test under the hypothesis
1028 * of equal subpopulation variances.
1029 *
1030 * @param m1 first sample mean
1031 * @param m2 second sample mean
1032 * @param v1 first sample variance
1033 * @param v2 second sample variance
1034 * @param n1 first sample n
1035 * @param n2 second sample n
1036 * @return t test statistic
1037 */
1038 protected double homoscedasticT(final double m1, final double m2,
1039 final double v1, final double v2,
1040 final double n1, final double n2) {
1041 final double pooledVariance = ((n1 - 1) * v1 + (n2 -1) * v2 ) / (n1 + n2 - 2);
1042 return (m1 - m2) / FastMath.sqrt(pooledVariance * (1d / n1 + 1d / n2));
1043 }
1044
1045 /**
1046 * Computes p-value for 2-sided, 1-sample t-test.
1047 *
1048 * @param m sample mean
1049 * @param mu constant to test against
1050 * @param v sample variance
1051 * @param n sample n
1052 * @return p-value
1053 * @throws MaxCountExceededException if an error occurs computing the p-value
1054 * @throws MathIllegalArgumentException if n is not greater than 1
1055 */
1056 protected double tTest(final double m, final double mu,
1057 final double v, final double n)
1058 throws MaxCountExceededException, MathIllegalArgumentException {
1059
1060 double t = FastMath.abs(t(m, mu, v, n));
1061 TDistribution distribution = new TDistribution(n - 1);
1062 return 2.0 * distribution.cumulativeProbability(-t);
1063
1064 }
1065
1066 /**
1067 * Computes p-value for 2-sided, 2-sample t-test.
1068 * <p>
1069 * Does not assume subpopulation variances are equal. Degrees of freedom
1070 * are estimated from the data.</p>
1071 *
1072 * @param m1 first sample mean
1073 * @param m2 second sample mean
1074 * @param v1 first sample variance
1075 * @param v2 second sample variance
1076 * @param n1 first sample n
1077 * @param n2 second sample n
1078 * @return p-value
1079 * @throws MaxCountExceededException if an error occurs computing the p-value
1080 * @throws NotStrictlyPositiveException if the estimated degrees of freedom is not
1081 * strictly positive
1082 */
1083 protected double tTest(final double m1, final double m2,
1084 final double v1, final double v2,
1085 final double n1, final double n2)
1086 throws MaxCountExceededException, NotStrictlyPositiveException {
1087
1088 final double t = FastMath.abs(t(m1, m2, v1, v2, n1, n2));
1089 final double degreesOfFreedom = df(v1, v2, n1, n2);
1090 TDistribution distribution = new TDistribution(degreesOfFreedom);
1091 return 2.0 * distribution.cumulativeProbability(-t);
1092
1093 }
1094
1095 /**
1096 * Computes p-value for 2-sided, 2-sample t-test, under the assumption
1097 * of equal subpopulation variances.
1098 * <p>
1099 * The sum of the sample sizes minus 2 is used as degrees of freedom.</p>
1100 *
1101 * @param m1 first sample mean
1102 * @param m2 second sample mean
1103 * @param v1 first sample variance
1104 * @param v2 second sample variance
1105 * @param n1 first sample n
1106 * @param n2 second sample n
1107 * @return p-value
1108 * @throws MaxCountExceededException if an error occurs computing the p-value
1109 * @throws NotStrictlyPositiveException if the estimated degrees of freedom is not
1110 * strictly positive
1111 */
1112 protected double homoscedasticTTest(double m1, double m2,
1113 double v1, double v2,
1114 double n1, double n2)
1115 throws MaxCountExceededException, NotStrictlyPositiveException {
1116
1117 final double t = FastMath.abs(homoscedasticT(m1, m2, v1, v2, n1, n2));
1118 final double degreesOfFreedom = n1 + n2 - 2;
1119 TDistribution distribution = new TDistribution(degreesOfFreedom);
1120 return 2.0 * distribution.cumulativeProbability(-t);
1121
1122 }
1123
1124 /**
1125 * Check significance level.
1126 *
1127 * @param alpha significance level
1128 * @throws OutOfRangeException if the significance level is out of bounds.
1129 */
1130 private void checkSignificanceLevel(final double alpha)
1131 throws OutOfRangeException {
1132
1133 if (alpha <= 0 || alpha > 0.5) {
1134 throw new OutOfRangeException(LocalizedFormats.SIGNIFICANCE_LEVEL,
1135 alpha, 0.0, 0.5);
1136 }
1137
1138 }
1139
1140 /**
1141 * Check sample data.
1142 *
1143 * @param data Sample data.
1144 * @throws NullArgumentException if {@code data} is {@code null}.
1145 * @throws NumberIsTooSmallException if there is not enough sample data.
1146 */
1147 private void checkSampleData(final double[] data)
1148 throws NullArgumentException, NumberIsTooSmallException {
1149
1150 if (data == null) {
1151 throw new NullArgumentException();
1152 }
1153 if (data.length < 2) {
1154 throw new NumberIsTooSmallException(
1155 LocalizedFormats.INSUFFICIENT_DATA_FOR_T_STATISTIC,
1156 data.length, 2, true);
1157 }
1158
1159 }
1160
1161 /**
1162 * Check sample data.
1163 *
1164 * @param stat Statistical summary.
1165 * @throws NullArgumentException if {@code data} is {@code null}.
1166 * @throws NumberIsTooSmallException if there is not enough sample data.
1167 */
1168 private void checkSampleData(final StatisticalSummary stat)
1169 throws NullArgumentException, NumberIsTooSmallException {
1170
1171 if (stat == null) {
1172 throw new NullArgumentException();
1173 }
1174 if (stat.getN() < 2) {
1175 throw new NumberIsTooSmallException(
1176 LocalizedFormats.INSUFFICIENT_DATA_FOR_T_STATISTIC,
1177 stat.getN(), 2, true);
1178 }
1179
1180 }
1181
1182 }