Classes in this File | Line Coverage | Branch Coverage | Complexity | ||||||||
TTestImpl |
|
| 2.3448275862068964;2.345 |
1 | /* |
|
2 | * Copyright 2004-2005 The Apache Software Foundation. |
|
3 | * |
|
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
|
5 | * you may not use this file except in compliance with the License. |
|
6 | * You may obtain a copy of the License at |
|
7 | * |
|
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
|
9 | * |
|
10 | * Unless required by applicable law or agreed to in writing, software |
|
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
|
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
13 | * See the License for the specific language governing permissions and |
|
14 | * limitations under the License. |
|
15 | */ |
|
16 | package org.apache.commons.math.stat.inference; |
|
17 | ||
18 | import org.apache.commons.math.MathException; |
|
19 | import org.apache.commons.math.distribution.DistributionFactory; |
|
20 | import org.apache.commons.math.distribution.TDistribution; |
|
21 | import org.apache.commons.math.stat.StatUtils; |
|
22 | import org.apache.commons.math.stat.descriptive.StatisticalSummary; |
|
23 | ||
24 | /** |
|
25 | * Implements t-test statistics defined in the {@link TTest} interface. |
|
26 | * <p> |
|
27 | * Uses commons-math {@link org.apache.commons.math.distribution.TDistribution} |
|
28 | * implementation to estimate exact p-values. |
|
29 | * |
|
30 | * @version $Revision$ $Date: 2005-05-01 22:14:49 -0700 (Sun, 01 May 2005) $ |
|
31 | */ |
|
32 | public class TTestImpl implements TTest { |
|
33 | ||
34 | /** Cached DistributionFactory used to create TDistribution instances */ |
|
35 | 30 | private DistributionFactory distributionFactory = null; |
36 | ||
37 | /** |
|
38 | * Default constructor. |
|
39 | */ |
|
40 | public TTestImpl() { |
|
41 | 30 | super(); |
42 | 30 | } |
43 | ||
44 | /** |
|
45 | * Computes a paired, 2-sample t-statistic based on the data in the input |
|
46 | * arrays. The t-statistic returned is equivalent to what would be returned by |
|
47 | * computing the one-sample t-statistic {@link #t(double, double[])}, with |
|
48 | * <code>mu = 0</code> and the sample array consisting of the (signed) |
|
49 | * differences between corresponding entries in <code>sample1</code> and |
|
50 | * <code>sample2.</code> |
|
51 | * <p> |
|
52 | * <strong>Preconditions</strong>: <ul> |
|
53 | * <li>The input arrays must have the same length and their common length |
|
54 | * must be at least 2. |
|
55 | * </li></ul> |
|
56 | * |
|
57 | * @param sample1 array of sample data values |
|
58 | * @param sample2 array of sample data values |
|
59 | * @return t statistic |
|
60 | * @throws IllegalArgumentException if the precondition is not met |
|
61 | * @throws MathException if the statistic can not be computed do to a |
|
62 | * convergence or other numerical error. |
|
63 | */ |
|
64 | public double pairedT(double[] sample1, double[] sample2) |
|
65 | throws IllegalArgumentException, MathException { |
|
66 | 6 | if ((sample1 == null) || (sample2 == null || |
67 | Math.min(sample1.length, sample2.length) < 2)) { |
|
68 | 0 | throw new IllegalArgumentException("insufficient data for t statistic"); |
69 | } |
|
70 | 6 | double meanDifference = StatUtils.meanDifference(sample1, sample2); |
71 | 6 | return t(meanDifference, 0, |
72 | StatUtils.varianceDifference(sample1, sample2, meanDifference), |
|
73 | (double) sample1.length); |
|
74 | } |
|
75 | ||
76 | /** |
|
77 | * Returns the <i>observed significance level</i>, or |
|
78 | * <i> p-value</i>, associated with a paired, two-sample, two-tailed t-test |
|
79 | * based on the data in the input arrays. |
|
80 | * <p> |
|
81 | * The number returned is the smallest significance level |
|
82 | * at which one can reject the null hypothesis that the mean of the paired |
|
83 | * differences is 0 in favor of the two-sided alternative that the mean paired |
|
84 | * difference is not equal to 0. For a one-sided test, divide the returned |
|
85 | * value by 2. |
|
86 | * <p> |
|
87 | * This test is equivalent to a one-sample t-test computed using |
|
88 | * {@link #tTest(double, double[])} with <code>mu = 0</code> and the sample |
|
89 | * array consisting of the signed differences between corresponding elements of |
|
90 | * <code>sample1</code> and <code>sample2.</code> |
|
91 | * <p> |
|
92 | * <strong>Usage Note:</strong><br> |
|
93 | * The validity of the p-value depends on the assumptions of the parametric |
|
94 | * t-test procedure, as discussed |
|
95 | * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html"> |
|
96 | * here</a> |
|
97 | * <p> |
|
98 | * <strong>Preconditions</strong>: <ul> |
|
99 | * <li>The input array lengths must be the same and their common length must |
|
100 | * be at least 2. |
|
101 | * </li></ul> |
|
102 | * |
|
103 | * @param sample1 array of sample data values |
|
104 | * @param sample2 array of sample data values |
|
105 | * @return p-value for t-test |
|
106 | * @throws IllegalArgumentException if the precondition is not met |
|
107 | * @throws MathException if an error occurs computing the p-value |
|
108 | */ |
|
109 | public double pairedTTest(double[] sample1, double[] sample2) |
|
110 | throws IllegalArgumentException, MathException { |
|
111 | 24 | double meanDifference = StatUtils.meanDifference(sample1, sample2); |
112 | 24 | return tTest(meanDifference, 0, |
113 | StatUtils.varianceDifference(sample1, sample2, meanDifference), |
|
114 | (double) sample1.length); |
|
115 | } |
|
116 | ||
117 | /** |
|
118 | * Performs a paired t-test evaluating the null hypothesis that the |
|
119 | * mean of the paired differences between <code>sample1</code> and |
|
120 | * <code>sample2</code> is 0 in favor of the two-sided alternative that the |
|
121 | * mean paired difference is not equal to 0, with significance level |
|
122 | * <code>alpha</code>. |
|
123 | * <p> |
|
124 | * Returns <code>true</code> iff the null hypothesis can be rejected with |
|
125 | * confidence <code>1 - alpha</code>. To perform a 1-sided test, use |
|
126 | * <code>alpha * 2</code> |
|
127 | * <p> |
|
128 | * <strong>Usage Note:</strong><br> |
|
129 | * The validity of the test depends on the assumptions of the parametric |
|
130 | * t-test procedure, as discussed |
|
131 | * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html"> |
|
132 | * here</a> |
|
133 | * <p> |
|
134 | * <strong>Preconditions</strong>: <ul> |
|
135 | * <li>The input array lengths must be the same and their common length |
|
136 | * must be at least 2. |
|
137 | * </li> |
|
138 | * <li> <code> 0 < alpha < 0.5 </code> |
|
139 | * </li></ul> |
|
140 | * |
|
141 | * @param sample1 array of sample data values |
|
142 | * @param sample2 array of sample data values |
|
143 | * @param alpha significance level of the test |
|
144 | * @return true if the null hypothesis can be rejected with |
|
145 | * confidence 1 - alpha |
|
146 | * @throws IllegalArgumentException if the preconditions are not met |
|
147 | * @throws MathException if an error occurs performing the test |
|
148 | */ |
|
149 | public boolean pairedTTest(double[] sample1, double[] sample2, double alpha) |
|
150 | throws IllegalArgumentException, MathException { |
|
151 | 12 | if ((alpha <= 0) || (alpha > 0.5)) { |
152 | 0 | throw new IllegalArgumentException("bad significance level: " + alpha); |
153 | } |
|
154 | 12 | return (pairedTTest(sample1, sample2) < alpha); |
155 | } |
|
156 | ||
157 | /** |
|
158 | * Computes a <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc22.htm#formula"> |
|
159 | * t statistic </a> given observed values and a comparison constant. |
|
160 | * <p> |
|
161 | * This statistic can be used to perform a one sample t-test for the mean. |
|
162 | * <p> |
|
163 | * <strong>Preconditions</strong>: <ul> |
|
164 | * <li>The observed array length must be at least 2. |
|
165 | * </li></ul> |
|
166 | * |
|
167 | * @param mu comparison constant |
|
168 | * @param observed array of values |
|
169 | * @return t statistic |
|
170 | * @throws IllegalArgumentException if input array length is less than 2 |
|
171 | */ |
|
172 | public double t(double mu, double[] observed) |
|
173 | throws IllegalArgumentException { |
|
174 | 30 | if ((observed == null) || (observed.length < 2)) { |
175 | 18 | throw new IllegalArgumentException("insufficient data for t statistic"); |
176 | } |
|
177 | 12 | return t(StatUtils.mean(observed), mu, StatUtils.variance(observed), |
178 | observed.length); |
|
179 | } |
|
180 | ||
181 | /** |
|
182 | * Computes a <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc22.htm#formula"> |
|
183 | * t statistic </a> to use in comparing the mean of the dataset described by |
|
184 | * <code>sampleStats</code> to <code>mu</code>. |
|
185 | * <p> |
|
186 | * This statistic can be used to perform a one sample t-test for the mean. |
|
187 | * <p> |
|
188 | * <strong>Preconditions</strong>: <ul> |
|
189 | * <li><code>observed.getN() > = 2</code>. |
|
190 | * </li></ul> |
|
191 | * |
|
192 | * @param mu comparison constant |
|
193 | * @param sampleStats DescriptiveStatistics holding sample summary statitstics |
|
194 | * @return t statistic |
|
195 | * @throws IllegalArgumentException if the precondition is not met |
|
196 | */ |
|
197 | public double t(double mu, StatisticalSummary sampleStats) |
|
198 | throws IllegalArgumentException { |
|
199 | 30 | if ((sampleStats == null) || (sampleStats.getN() < 2)) { |
200 | 18 | throw new IllegalArgumentException("insufficient data for t statistic"); |
201 | } |
|
202 | 12 | return t(sampleStats.getMean(), mu, sampleStats.getVariance(), |
203 | sampleStats.getN()); |
|
204 | } |
|
205 | ||
206 | /** |
|
207 | * Computes a 2-sample t statistic, under the hypothesis of equal |
|
208 | * subpopulation variances. To compute a t-statistic without the |
|
209 | * equal variances hypothesis, use {@link #t(double[], double[])}. |
|
210 | * <p> |
|
211 | * This statistic can be used to perform a (homoscedastic) two-sample |
|
212 | * t-test to compare sample means. |
|
213 | * <p> |
|
214 | * The t-statisitc is |
|
215 | * <p> |
|
216 | * <code> t = (m1 - m2) / (sqrt(1/n1 +1/n2) sqrt(var))</code> |
|
217 | * <p> |
|
218 | * where <strong><code>n1</code></strong> is the size of first sample; |
|
219 | * <strong><code> n2</code></strong> is the size of second sample; |
|
220 | * <strong><code> m1</code></strong> is the mean of first sample; |
|
221 | * <strong><code> m2</code></strong> is the mean of second sample</li> |
|
222 | * </ul> |
|
223 | * and <strong><code>var</code></strong> is the pooled variance estimate: |
|
224 | * <p> |
|
225 | * <code>var = sqrt(((n1 - 1)var1 + (n2 - 1)var2) / ((n1-1) + (n2-1)))</code> |
|
226 | * <p> |
|
227 | * with <strong><code>var1<code></strong> the variance of the first sample and |
|
228 | * <strong><code>var2</code></strong> the variance of the second sample. |
|
229 | * <p> |
|
230 | * <strong>Preconditions</strong>: <ul> |
|
231 | * <li>The observed array lengths must both be at least 2. |
|
232 | * </li></ul> |
|
233 | * |
|
234 | * @param sample1 array of sample data values |
|
235 | * @param sample2 array of sample data values |
|
236 | * @return t statistic |
|
237 | * @throws IllegalArgumentException if the precondition is not met |
|
238 | */ |
|
239 | public double homoscedasticT(double[] sample1, double[] sample2) |
|
240 | throws IllegalArgumentException { |
|
241 | 6 | if ((sample1 == null) || (sample2 == null || |
242 | Math.min(sample1.length, sample2.length) < 2)) { |
|
243 | 0 | throw new IllegalArgumentException("insufficient data for t statistic"); |
244 | } |
|
245 | 6 | return homoscedasticT(StatUtils.mean(sample1), StatUtils.mean(sample2), |
246 | StatUtils.variance(sample1), StatUtils.variance(sample2), |
|
247 | (double) sample1.length, (double) sample2.length); |
|
248 | } |
|
249 | ||
250 | /** |
|
251 | * Computes a 2-sample t statistic, without the hypothesis of equal |
|
252 | * subpopulation variances. To compute a t-statistic assuming equal |
|
253 | * variances, use {@link #homoscedasticT(double[], double[])}. |
|
254 | * <p> |
|
255 | * This statistic can be used to perform a two-sample t-test to compare |
|
256 | * sample means. |
|
257 | * <p> |
|
258 | * The t-statisitc is |
|
259 | * <p> |
|
260 | * <code> t = (m1 - m2) / sqrt(var1/n1 + var2/n2)</code> |
|
261 | * <p> |
|
262 | * where <strong><code>n1</code></strong> is the size of the first sample |
|
263 | * <strong><code> n2</code></strong> is the size of the second sample; |
|
264 | * <strong><code> m1</code></strong> is the mean of the first sample; |
|
265 | * <strong><code> m2</code></strong> is the mean of the second sample; |
|
266 | * <strong><code> var1</code></strong> is the variance of the first sample; |
|
267 | * <strong><code> var2</code></strong> is the variance of the second sample; |
|
268 | * <p> |
|
269 | * <strong>Preconditions</strong>: <ul> |
|
270 | * <li>The observed array lengths must both be at least 2. |
|
271 | * </li></ul> |
|
272 | * |
|
273 | * @param sample1 array of sample data values |
|
274 | * @param sample2 array of sample data values |
|
275 | * @return t statistic |
|
276 | * @throws IllegalArgumentException if the precondition is not met |
|
277 | */ |
|
278 | public double t(double[] sample1, double[] sample2) |
|
279 | throws IllegalArgumentException { |
|
280 | 18 | if ((sample1 == null) || (sample2 == null || |
281 | Math.min(sample1.length, sample2.length) < 2)) { |
|
282 | 6 | throw new IllegalArgumentException("insufficient data for t statistic"); |
283 | } |
|
284 | 12 | return t(StatUtils.mean(sample1), StatUtils.mean(sample2), |
285 | StatUtils.variance(sample1), StatUtils.variance(sample2), |
|
286 | (double) sample1.length, (double) sample2.length); |
|
287 | } |
|
288 | ||
289 | /** |
|
290 | * Computes a 2-sample t statistic </a>, comparing the means of the datasets |
|
291 | * described by two {@link StatisticalSummary} instances, without the |
|
292 | * assumption of equal subpopulation variances. Use |
|
293 | * {@link #homoscedasticT(StatisticalSummary, StatisticalSummary)} to |
|
294 | * compute a t-statistic under the equal variances assumption. |
|
295 | * <p> |
|
296 | * This statistic can be used to perform a two-sample t-test to compare |
|
297 | * sample means. |
|
298 | * <p> |
|
299 | * The returned t-statisitc is |
|
300 | * <p> |
|
301 | * <code> t = (m1 - m2) / sqrt(var1/n1 + var2/n2)</code> |
|
302 | * <p> |
|
303 | * where <strong><code>n1</code></strong> is the size of the first sample; |
|
304 | * <strong><code> n2</code></strong> is the size of the second sample; |
|
305 | * <strong><code> m1</code></strong> is the mean of the first sample; |
|
306 | * <strong><code> m2</code></strong> is the mean of the second sample |
|
307 | * <strong><code> var1</code></strong> is the variance of the first sample; |
|
308 | * <strong><code> var2</code></strong> is the variance of the second sample |
|
309 | * <p> |
|
310 | * <strong>Preconditions</strong>: <ul> |
|
311 | * <li>The datasets described by the two Univariates must each contain |
|
312 | * at least 2 observations. |
|
313 | * </li></ul> |
|
314 | * |
|
315 | * @param sampleStats1 StatisticalSummary describing data from the first sample |
|
316 | * @param sampleStats2 StatisticalSummary describing data from the second sample |
|
317 | * @return t statistic |
|
318 | * @throws IllegalArgumentException if the precondition is not met |
|
319 | */ |
|
320 | public double t(StatisticalSummary sampleStats1, |
|
321 | StatisticalSummary sampleStats2) |
|
322 | throws IllegalArgumentException { |
|
323 | 12 | if ((sampleStats1 == null) || |
324 | (sampleStats2 == null || |
|
325 | Math.min(sampleStats1.getN(), sampleStats2.getN()) < 2)) { |
|
326 | 6 | throw new IllegalArgumentException("insufficient data for t statistic"); |
327 | } |
|
328 | 6 | return t(sampleStats1.getMean(), sampleStats2.getMean(), |
329 | sampleStats1.getVariance(), sampleStats2.getVariance(), |
|
330 | (double) sampleStats1.getN(), (double) sampleStats2.getN()); |
|
331 | } |
|
332 | ||
333 | /** |
|
334 | * Computes a 2-sample t statistic, comparing the means of the datasets |
|
335 | * described by two {@link StatisticalSummary} instances, under the |
|
336 | * assumption of equal subpopulation variances. To compute a t-statistic |
|
337 | * without the equal variances assumption, use |
|
338 | * {@link #t(StatisticalSummary, StatisticalSummary)}. |
|
339 | * <p> |
|
340 | * This statistic can be used to perform a (homoscedastic) two-sample |
|
341 | * t-test to compare sample means. |
|
342 | * <p> |
|
343 | * The t-statisitc returned is |
|
344 | * <p> |
|
345 | * <code> t = (m1 - m2) / (sqrt(1/n1 +1/n2) sqrt(var))</code> |
|
346 | * <p> |
|
347 | * where <strong><code>n1</code></strong> is the size of first sample; |
|
348 | * <strong><code> n2</code></strong> is the size of second sample; |
|
349 | * <strong><code> m1</code></strong> is the mean of first sample; |
|
350 | * <strong><code> m2</code></strong> is the mean of second sample |
|
351 | * and <strong><code>var</code></strong> is the pooled variance estimate: |
|
352 | * <p> |
|
353 | * <code>var = sqrt(((n1 - 1)var1 + (n2 - 1)var2) / ((n1-1) + (n2-1)))</code> |
|
354 | * <p> |
|
355 | * with <strong><code>var1<code></strong> the variance of the first sample and |
|
356 | * <strong><code>var2</code></strong> the variance of the second sample. |
|
357 | * <p> |
|
358 | * <strong>Preconditions</strong>: <ul> |
|
359 | * <li>The datasets described by the two Univariates must each contain |
|
360 | * at least 2 observations. |
|
361 | * </li></ul> |
|
362 | * |
|
363 | * @param sampleStats1 StatisticalSummary describing data from the first sample |
|
364 | * @param sampleStats2 StatisticalSummary describing data from the second sample |
|
365 | * @return t statistic |
|
366 | * @throws IllegalArgumentException if the precondition is not met |
|
367 | */ |
|
368 | public double homoscedasticT(StatisticalSummary sampleStats1, |
|
369 | StatisticalSummary sampleStats2) |
|
370 | throws IllegalArgumentException { |
|
371 | 0 | if ((sampleStats1 == null) || |
372 | (sampleStats2 == null || |
|
373 | Math.min(sampleStats1.getN(), sampleStats2.getN()) < 2)) { |
|
374 | 0 | throw new IllegalArgumentException("insufficient data for t statistic"); |
375 | } |
|
376 | 0 | return homoscedasticT(sampleStats1.getMean(), sampleStats2.getMean(), |
377 | sampleStats1.getVariance(), sampleStats2.getVariance(), |
|
378 | (double) sampleStats1.getN(), (double) sampleStats2.getN()); |
|
379 | } |
|
380 | ||
381 | /** |
|
382 | * Returns the <i>observed significance level</i>, or |
|
383 | * <i>p-value</i>, associated with a one-sample, two-tailed t-test |
|
384 | * comparing the mean of the input array with the constant <code>mu</code>. |
|
385 | * <p> |
|
386 | * The number returned is the smallest significance level |
|
387 | * at which one can reject the null hypothesis that the mean equals |
|
388 | * <code>mu</code> in favor of the two-sided alternative that the mean |
|
389 | * is different from <code>mu</code>. For a one-sided test, divide the |
|
390 | * returned value by 2. |
|
391 | * <p> |
|
392 | * <strong>Usage Note:</strong><br> |
|
393 | * The validity of the test depends on the assumptions of the parametric |
|
394 | * t-test procedure, as discussed |
|
395 | * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">here</a> |
|
396 | * <p> |
|
397 | * <strong>Preconditions</strong>: <ul> |
|
398 | * <li>The observed array length must be at least 2. |
|
399 | * </li></ul> |
|
400 | * |
|
401 | * @param mu constant value to compare sample mean against |
|
402 | * @param sample array of sample data values |
|
403 | * @return p-value |
|
404 | * @throws IllegalArgumentException if the precondition is not met |
|
405 | * @throws MathException if an error occurs computing the p-value |
|
406 | */ |
|
407 | public double tTest(double mu, double[] sample) |
|
408 | throws IllegalArgumentException, MathException { |
|
409 | 30 | if ((sample == null) || (sample.length < 2)) { |
410 | 6 | throw new IllegalArgumentException("insufficient data for t statistic"); |
411 | } |
|
412 | 24 | return tTest( StatUtils.mean(sample), mu, StatUtils.variance(sample), |
413 | sample.length); |
|
414 | } |
|
415 | ||
416 | /** |
|
417 | * Performs a <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm"> |
|
418 | * two-sided t-test</a> evaluating the null hypothesis that the mean of the population from |
|
419 | * which <code>sample</code> is drawn equals <code>mu</code>. |
|
420 | * <p> |
|
421 | * Returns <code>true</code> iff the null hypothesis can be |
|
422 | * rejected with confidence <code>1 - alpha</code>. To |
|
423 | * perform a 1-sided test, use <code>alpha * 2</code> |
|
424 | * <p> |
|
425 | * <strong>Examples:</strong><br><ol> |
|
426 | * <li>To test the (2-sided) hypothesis <code>sample mean = mu </code> at |
|
427 | * the 95% level, use <br><code>tTest(mu, sample, 0.05) </code> |
|
428 | * </li> |
|
429 | * <li>To test the (one-sided) hypothesis <code> sample mean < mu </code> |
|
430 | * at the 99% level, first verify that the measured sample mean is less |
|
431 | * than <code>mu</code> and then use |
|
432 | * <br><code>tTest(mu, sample, 0.02) </code> |
|
433 | * </li></ol> |
|
434 | * <p> |
|
435 | * <strong>Usage Note:</strong><br> |
|
436 | * The validity of the test depends on the assumptions of the one-sample |
|
437 | * parametric t-test procedure, as discussed |
|
438 | * <a href="http://www.basic.nwu.edu/statguidefiles/sg_glos.html#one-sample">here</a> |
|
439 | * <p> |
|
440 | * <strong>Preconditions</strong>: <ul> |
|
441 | * <li>The observed array length must be at least 2. |
|
442 | * </li></ul> |
|
443 | * |
|
444 | * @param mu constant value to compare sample mean against |
|
445 | * @param sample array of sample data values |
|
446 | * @param alpha significance level of the test |
|
447 | * @return p-value |
|
448 | * @throws IllegalArgumentException if the precondition is not met |
|
449 | * @throws MathException if an error computing the p-value |
|
450 | */ |
|
451 | public boolean tTest(double mu, double[] sample, double alpha) |
|
452 | throws IllegalArgumentException, MathException { |
|
453 | 18 | if ((alpha <= 0) || (alpha > 0.5)) { |
454 | 6 | throw new IllegalArgumentException("bad significance level: " + alpha); |
455 | } |
|
456 | 12 | return (tTest(mu, sample) < alpha); |
457 | } |
|
458 | ||
459 | /** |
|
460 | * Returns the <i>observed significance level</i>, or |
|
461 | * <i>p-value</i>, associated with a one-sample, two-tailed t-test |
|
462 | * comparing the mean of the dataset described by <code>sampleStats</code> |
|
463 | * with the constant <code>mu</code>. |
|
464 | * <p> |
|
465 | * The number returned is the smallest significance level |
|
466 | * at which one can reject the null hypothesis that the mean equals |
|
467 | * <code>mu</code> in favor of the two-sided alternative that the mean |
|
468 | * is different from <code>mu</code>. For a one-sided test, divide the |
|
469 | * returned value by 2. |
|
470 | * <p> |
|
471 | * <strong>Usage Note:</strong><br> |
|
472 | * The validity of the test depends on the assumptions of the parametric |
|
473 | * t-test procedure, as discussed |
|
474 | * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html"> |
|
475 | * here</a> |
|
476 | * <p> |
|
477 | * <strong>Preconditions</strong>: <ul> |
|
478 | * <li>The sample must contain at least 2 observations. |
|
479 | * </li></ul> |
|
480 | * |
|
481 | * @param mu constant value to compare sample mean against |
|
482 | * @param sampleStats StatisticalSummary describing sample data |
|
483 | * @return p-value |
|
484 | * @throws IllegalArgumentException if the precondition is not met |
|
485 | * @throws MathException if an error occurs computing the p-value |
|
486 | */ |
|
487 | public double tTest(double mu, StatisticalSummary sampleStats) |
|
488 | throws IllegalArgumentException, MathException { |
|
489 | 30 | if ((sampleStats == null) || (sampleStats.getN() < 2)) { |
490 | 6 | throw new IllegalArgumentException("insufficient data for t statistic"); |
491 | } |
|
492 | 24 | return tTest(sampleStats.getMean(), mu, sampleStats.getVariance(), |
493 | sampleStats.getN()); |
|
494 | } |
|
495 | ||
496 | /** |
|
497 | * Performs a <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm"> |
|
498 | * two-sided t-test</a> evaluating the null hypothesis that the mean of the |
|
499 | * population from which the dataset described by <code>stats</code> is |
|
500 | * drawn equals <code>mu</code>. |
|
501 | * <p> |
|
502 | * Returns <code>true</code> iff the null hypothesis can be rejected with |
|
503 | * confidence <code>1 - alpha</code>. To perform a 1-sided test, use |
|
504 | * <code>alpha * 2.</code> |
|
505 | * <p> |
|
506 | * <strong>Examples:</strong><br><ol> |
|
507 | * <li>To test the (2-sided) hypothesis <code>sample mean = mu </code> at |
|
508 | * the 95% level, use <br><code>tTest(mu, sampleStats, 0.05) </code> |
|
509 | * </li> |
|
510 | * <li>To test the (one-sided) hypothesis <code> sample mean < mu </code> |
|
511 | * at the 99% level, first verify that the measured sample mean is less |
|
512 | * than <code>mu</code> and then use |
|
513 | * <br><code>tTest(mu, sampleStats, 0.02) </code> |
|
514 | * </li></ol> |
|
515 | * <p> |
|
516 | * <strong>Usage Note:</strong><br> |
|
517 | * The validity of the test depends on the assumptions of the one-sample |
|
518 | * parametric t-test procedure, as discussed |
|
519 | * <a href="http://www.basic.nwu.edu/statguidefiles/sg_glos.html#one-sample">here</a> |
|
520 | * <p> |
|
521 | * <strong>Preconditions</strong>: <ul> |
|
522 | * <li>The sample must include at least 2 observations. |
|
523 | * </li></ul> |
|
524 | * |
|
525 | * @param mu constant value to compare sample mean against |
|
526 | * @param sampleStats StatisticalSummary describing sample data values |
|
527 | * @param alpha significance level of the test |
|
528 | * @return p-value |
|
529 | * @throws IllegalArgumentException if the precondition is not met |
|
530 | * @throws MathException if an error occurs computing the p-value |
|
531 | */ |
|
532 | public boolean tTest( double mu, StatisticalSummary sampleStats, |
|
533 | double alpha) |
|
534 | throws IllegalArgumentException, MathException { |
|
535 | 18 | if ((alpha <= 0) || (alpha > 0.5)) { |
536 | 6 | throw new IllegalArgumentException("bad significance level: " + alpha); |
537 | } |
|
538 | 12 | return (tTest(mu, sampleStats) < alpha); |
539 | } |
|
540 | ||
541 | /** |
|
542 | * Returns the <i>observed significance level</i>, or |
|
543 | * <i>p-value</i>, associated with a two-sample, two-tailed t-test |
|
544 | * comparing the means of the input arrays. |
|
545 | * <p> |
|
546 | * The number returned is the smallest significance level |
|
547 | * at which one can reject the null hypothesis that the two means are |
|
548 | * equal in favor of the two-sided alternative that they are different. |
|
549 | * For a one-sided test, divide the returned value by 2. |
|
550 | * <p> |
|
551 | * The test does not assume that the underlying popuation variances are |
|
552 | * equal and it uses approximated degrees of freedom computed from the |
|
553 | * sample data to compute the p-value. The t-statistic used is as defined in |
|
554 | * {@link #t(double[], double[])} and the Welch-Satterthwaite approximation |
|
555 | * to the degrees of freedom is used, |
|
556 | * as described |
|
557 | * <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm"> |
|
558 | * here.</a> To perform the test under the assumption of equal subpopulation |
|
559 | * variances, use {@link #homoscedasticTTest(double[], double[])}. |
|
560 | * <p> |
|
561 | * <strong>Usage Note:</strong><br> |
|
562 | * The validity of the p-value depends on the assumptions of the parametric |
|
563 | * t-test procedure, as discussed |
|
564 | * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html"> |
|
565 | * here</a> |
|
566 | * <p> |
|
567 | * <strong>Preconditions</strong>: <ul> |
|
568 | * <li>The observed array lengths must both be at least 2. |
|
569 | * </li></ul> |
|
570 | * |
|
571 | * @param sample1 array of sample data values |
|
572 | * @param sample2 array of sample data values |
|
573 | * @return p-value for t-test |
|
574 | * @throws IllegalArgumentException if the precondition is not met |
|
575 | * @throws MathException if an error occurs computing the p-value |
|
576 | */ |
|
577 | public double tTest(double[] sample1, double[] sample2) |
|
578 | throws IllegalArgumentException, MathException { |
|
579 | 36 | if ((sample1 == null) || (sample2 == null || |
580 | Math.min(sample1.length, sample2.length) < 2)) { |
|
581 | 12 | throw new IllegalArgumentException("insufficient data"); |
582 | } |
|
583 | 24 | return tTest(StatUtils.mean(sample1), StatUtils.mean(sample2), |
584 | StatUtils.variance(sample1), StatUtils.variance(sample2), |
|
585 | (double) sample1.length, (double) sample2.length); |
|
586 | } |
|
587 | ||
588 | /** |
|
589 | * Returns the <i>observed significance level</i>, or |
|
590 | * <i>p-value</i>, associated with a two-sample, two-tailed t-test |
|
591 | * comparing the means of the input arrays, under the assumption that |
|
592 | * the two samples are drawn from subpopulations with equal variances. |
|
593 | * To perform the test without the equal variances assumption, use |
|
594 | * {@link #tTest(double[], double[])}. |
|
595 | * <p> |
|
596 | * The number returned is the smallest significance level |
|
597 | * at which one can reject the null hypothesis that the two means are |
|
598 | * equal in favor of the two-sided alternative that they are different. |
|
599 | * For a one-sided test, divide the returned value by 2. |
|
600 | * <p> |
|
601 | * A pooled variance estimate is used to compute the t-statistic. See |
|
602 | * {@link #homoscedasticT(double[], double[])}. The sum of the sample sizes |
|
603 | * minus 2 is used as the degrees of freedom. |
|
604 | * <p> |
|
605 | * <strong>Usage Note:</strong><br> |
|
606 | * The validity of the p-value depends on the assumptions of the parametric |
|
607 | * t-test procedure, as discussed |
|
608 | * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html"> |
|
609 | * here</a> |
|
610 | * <p> |
|
611 | * <strong>Preconditions</strong>: <ul> |
|
612 | * <li>The observed array lengths must both be at least 2. |
|
613 | * </li></ul> |
|
614 | * |
|
615 | * @param sample1 array of sample data values |
|
616 | * @param sample2 array of sample data values |
|
617 | * @return p-value for t-test |
|
618 | * @throws IllegalArgumentException if the precondition is not met |
|
619 | * @throws MathException if an error occurs computing the p-value |
|
620 | */ |
|
621 | public double homoscedasticTTest(double[] sample1, double[] sample2) |
|
622 | throws IllegalArgumentException, MathException { |
|
623 | 12 | if ((sample1 == null) || (sample2 == null || |
624 | Math.min(sample1.length, sample2.length) < 2)) { |
|
625 | 0 | throw new IllegalArgumentException("insufficient data"); |
626 | } |
|
627 | 12 | return homoscedasticTTest(StatUtils.mean(sample1), |
628 | StatUtils.mean(sample2), StatUtils.variance(sample1), |
|
629 | StatUtils.variance(sample2), (double) sample1.length, |
|
630 | (double) sample2.length); |
|
631 | } |
|
632 | ||
633 | ||
634 | /** |
|
635 | * Performs a |
|
636 | * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm"> |
|
637 | * two-sided t-test</a> evaluating the null hypothesis that <code>sample1</code> |
|
638 | * and <code>sample2</code> are drawn from populations with the same mean, |
|
639 | * with significance level <code>alpha</code>. This test does not assume |
|
640 | * that the subpopulation variances are equal. To perform the test assuming |
|
641 | * equal variances, use |
|
642 | * {@link #homoscedasticTTest(double[], double[], double)}. |
|
643 | * <p> |
|
644 | * Returns <code>true</code> iff the null hypothesis that the means are |
|
645 | * equal can be rejected with confidence <code>1 - alpha</code>. To |
|
646 | * perform a 1-sided test, use <code>alpha / 2</code> |
|
647 | * <p> |
|
648 | * See {@link #t(double[], double[])} for the formula used to compute the |
|
649 | * t-statistic. Degrees of freedom are approximated using the |
|
650 | * <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm"> |
|
651 | * Welch-Satterthwaite approximation.</a> |
|
652 | |
|
653 | * <p> |
|
654 | * <strong>Examples:</strong><br><ol> |
|
655 | * <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at |
|
656 | * the 95% level, use |
|
657 | * <br><code>tTest(sample1, sample2, 0.05). </code> |
|
658 | * </li> |
|
659 | * <li>To test the (one-sided) hypothesis <code> mean 1 < mean 2 </code> at |
|
660 | * the 99% level, first verify that the measured mean of <code>sample 1</code> |
|
661 | * is less than the mean of <code>sample 2</code> and then use |
|
662 | * <br><code>tTest(sample1, sample2, 0.02) </code> |
|
663 | * </li></ol> |
|
664 | * <p> |
|
665 | * <strong>Usage Note:</strong><br> |
|
666 | * The validity of the test depends on the assumptions of the parametric |
|
667 | * t-test procedure, as discussed |
|
668 | * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html"> |
|
669 | * here</a> |
|
670 | * <p> |
|
671 | * <strong>Preconditions</strong>: <ul> |
|
672 | * <li>The observed array lengths must both be at least 2. |
|
673 | * </li> |
|
674 | * <li> <code> 0 < alpha < 0.5 </code> |
|
675 | * </li></ul> |
|
676 | * |
|
677 | * @param sample1 array of sample data values |
|
678 | * @param sample2 array of sample data values |
|
679 | * @param alpha significance level of the test |
|
680 | * @return true if the null hypothesis can be rejected with |
|
681 | * confidence 1 - alpha |
|
682 | * @throws IllegalArgumentException if the preconditions are not met |
|
683 | * @throws MathException if an error occurs performing the test |
|
684 | */ |
|
685 | public boolean tTest(double[] sample1, double[] sample2, |
|
686 | double alpha) |
|
687 | throws IllegalArgumentException, MathException { |
|
688 | 24 | if ((alpha <= 0) || (alpha > 0.5)) { |
689 | 6 | throw new IllegalArgumentException("bad significance level: " + alpha); |
690 | } |
|
691 | 18 | return (tTest(sample1, sample2) < alpha); |
692 | } |
|
693 | ||
694 | /** |
|
695 | * Performs a |
|
696 | * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm"> |
|
697 | * two-sided t-test</a> evaluating the null hypothesis that <code>sample1</code> |
|
698 | * and <code>sample2</code> are drawn from populations with the same mean, |
|
699 | * with significance level <code>alpha</code>, assuming that the |
|
700 | * subpopulation variances are equal. Use |
|
701 | * {@link #tTest(double[], double[], double)} to perform the test without |
|
702 | * the assumption of equal variances. |
|
703 | * <p> |
|
704 | * Returns <code>true</code> iff the null hypothesis that the means are |
|
705 | * equal can be rejected with confidence <code>1 - alpha</code>. To |
|
706 | * perform a 1-sided test, use <code>alpha * 2.</code> To perform the test |
|
707 | * without the assumption of equal subpopulation variances, use |
|
708 | * {@link #tTest(double[], double[], double)}. |
|
709 | * <p> |
|
710 | * A pooled variance estimate is used to compute the t-statistic. See |
|
711 | * {@link #t(double[], double[])} for the formula. The sum of the sample |
|
712 | * sizes minus 2 is used as the degrees of freedom. |
|
713 | * <p> |
|
714 | * <strong>Examples:</strong><br><ol> |
|
715 | * <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at |
|
716 | * the 95% level, use <br><code>tTest(sample1, sample2, 0.05). </code> |
|
717 | * </li> |
|
718 | * <li>To test the (one-sided) hypothesis <code> mean 1 < mean 2, </code> |
|
719 | * at the 99% level, first verify that the measured mean of |
|
720 | * <code>sample 1</code> is less than the mean of <code>sample 2</code> |
|
721 | * and then use |
|
722 | * <br><code>tTest(sample1, sample2, 0.02) </code> |
|
723 | * </li></ol> |
|
724 | * <p> |
|
725 | * <strong>Usage Note:</strong><br> |
|
726 | * The validity of the test depends on the assumptions of the parametric |
|
727 | * t-test procedure, as discussed |
|
728 | * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html"> |
|
729 | * here</a> |
|
730 | * <p> |
|
731 | * <strong>Preconditions</strong>: <ul> |
|
732 | * <li>The observed array lengths must both be at least 2. |
|
733 | * </li> |
|
734 | * <li> <code> 0 < alpha < 0.5 </code> |
|
735 | * </li></ul> |
|
736 | * |
|
737 | * @param sample1 array of sample data values |
|
738 | * @param sample2 array of sample data values |
|
739 | * @param alpha significance level of the test |
|
740 | * @return true if the null hypothesis can be rejected with |
|
741 | * confidence 1 - alpha |
|
742 | * @throws IllegalArgumentException if the preconditions are not met |
|
743 | * @throws MathException if an error occurs performing the test |
|
744 | */ |
|
745 | public boolean homoscedasticTTest(double[] sample1, double[] sample2, |
|
746 | double alpha) |
|
747 | throws IllegalArgumentException, MathException { |
|
748 | 12 | if ((alpha <= 0) || (alpha > 0.5)) { |
749 | 0 | throw new IllegalArgumentException("bad significance level: " + alpha); |
750 | } |
|
751 | 12 | return (homoscedasticTTest(sample1, sample2) < alpha); |
752 | } |
|
753 | ||
754 | /** |
|
755 | * Returns the <i>observed significance level</i>, or |
|
756 | * <i>p-value</i>, associated with a two-sample, two-tailed t-test |
|
757 | * comparing the means of the datasets described by two StatisticalSummary |
|
758 | * instances. |
|
759 | * <p> |
|
760 | * The number returned is the smallest significance level |
|
761 | * at which one can reject the null hypothesis that the two means are |
|
762 | * equal in favor of the two-sided alternative that they are different. |
|
763 | * For a one-sided test, divide the returned value by 2. |
|
764 | * <p> |
|
765 | * The test does not assume that the underlying popuation variances are |
|
766 | * equal and it uses approximated degrees of freedom computed from the |
|
767 | * sample data to compute the p-value. To perform the test assuming |
|
768 | * equal variances, use |
|
769 | * {@link #homoscedasticTTest(StatisticalSummary, StatisticalSummary)}. |
|
770 | * <p> |
|
771 | * <strong>Usage Note:</strong><br> |
|
772 | * The validity of the p-value depends on the assumptions of the parametric |
|
773 | * t-test procedure, as discussed |
|
774 | * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html"> |
|
775 | * here</a> |
|
776 | * <p> |
|
777 | * <strong>Preconditions</strong>: <ul> |
|
778 | * <li>The datasets described by the two Univariates must each contain |
|
779 | * at least 2 observations. |
|
780 | * </li></ul> |
|
781 | * |
|
782 | * @param sampleStats1 StatisticalSummary describing data from the first sample |
|
783 | * @param sampleStats2 StatisticalSummary describing data from the second sample |
|
784 | * @return p-value for t-test |
|
785 | * @throws IllegalArgumentException if the precondition is not met |
|
786 | * @throws MathException if an error occurs computing the p-value |
|
787 | */ |
|
788 | public double tTest(StatisticalSummary sampleStats1, StatisticalSummary sampleStats2) |
|
789 | throws IllegalArgumentException, MathException { |
|
790 | 30 | if ((sampleStats1 == null) || (sampleStats2 == null || |
791 | Math.min(sampleStats1.getN(), sampleStats2.getN()) < 2)) { |
|
792 | 12 | throw new IllegalArgumentException("insufficient data for t statistic"); |
793 | } |
|
794 | 18 | return tTest(sampleStats1.getMean(), sampleStats2.getMean(), sampleStats1.getVariance(), |
795 | sampleStats2.getVariance(), (double) sampleStats1.getN(), |
|
796 | (double) sampleStats2.getN()); |
|
797 | } |
|
798 | ||
799 | /** |
|
800 | * Returns the <i>observed significance level</i>, or |
|
801 | * <i>p-value</i>, associated with a two-sample, two-tailed t-test |
|
802 | * comparing the means of the datasets described by two StatisticalSummary |
|
803 | * instances, under the hypothesis of equal subpopulation variances. To |
|
804 | * perform a test without the equal variances assumption, use |
|
805 | * {@link #tTest(StatisticalSummary, StatisticalSummary)}. |
|
806 | * <p> |
|
807 | * The number returned is the smallest significance level |
|
808 | * at which one can reject the null hypothesis that the two means are |
|
809 | * equal in favor of the two-sided alternative that they are different. |
|
810 | * For a one-sided test, divide the returned value by 2. |
|
811 | * <p> |
|
812 | * See {@link #homoscedasticT(double[], double[])} for the formula used to |
|
813 | * compute the t-statistic. The sum of the sample sizes minus 2 is used as |
|
814 | * the degrees of freedom. |
|
815 | * <p> |
|
816 | * <strong>Usage Note:</strong><br> |
|
817 | * The validity of the p-value depends on the assumptions of the parametric |
|
818 | * t-test procedure, as discussed |
|
819 | * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">here</a> |
|
820 | * <p> |
|
821 | * <strong>Preconditions</strong>: <ul> |
|
822 | * <li>The datasets described by the two Univariates must each contain |
|
823 | * at least 2 observations. |
|
824 | * </li></ul> |
|
825 | * |
|
826 | * @param sampleStats1 StatisticalSummary describing data from the first sample |
|
827 | * @param sampleStats2 StatisticalSummary describing data from the second sample |
|
828 | * @return p-value for t-test |
|
829 | * @throws IllegalArgumentException if the precondition is not met |
|
830 | * @throws MathException if an error occurs computing the p-value |
|
831 | */ |
|
832 | public double homoscedasticTTest(StatisticalSummary sampleStats1, |
|
833 | StatisticalSummary sampleStats2) |
|
834 | throws IllegalArgumentException, MathException { |
|
835 | 6 | if ((sampleStats1 == null) || (sampleStats2 == null || |
836 | Math.min(sampleStats1.getN(), sampleStats2.getN()) < 2)) { |
|
837 | 0 | throw new IllegalArgumentException("insufficient data for t statistic"); |
838 | } |
|
839 | 6 | return homoscedasticTTest(sampleStats1.getMean(), |
840 | sampleStats2.getMean(), sampleStats1.getVariance(), |
|
841 | sampleStats2.getVariance(), (double) sampleStats1.getN(), |
|
842 | (double) sampleStats2.getN()); |
|
843 | } |
|
844 | ||
845 | /** |
|
846 | * Performs a |
|
847 | * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm"> |
|
848 | * two-sided t-test</a> evaluating the null hypothesis that |
|
849 | * <code>sampleStats1</code> and <code>sampleStats2</code> describe |
|
850 | * datasets drawn from populations with the same mean, with significance |
|
851 | * level <code>alpha</code>. This test does not assume that the |
|
852 | * subpopulation variances are equal. To perform the test under the equal |
|
853 | * variances assumption, use |
|
854 | * {@link #homoscedasticTTest(StatisticalSummary, StatisticalSummary)}. |
|
855 | * <p> |
|
856 | * Returns <code>true</code> iff the null hypothesis that the means are |
|
857 | * equal can be rejected with confidence <code>1 - alpha</code>. To |
|
858 | * perform a 1-sided test, use <code>alpha * 2</code> |
|
859 | * <p> |
|
860 | * See {@link #t(double[], double[])} for the formula used to compute the |
|
861 | * t-statistic. Degrees of freedom are approximated using the |
|
862 | * <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm"> |
|
863 | * Welch-Satterthwaite approximation.</a> |
|
864 | * <p> |
|
865 | * <strong>Examples:</strong><br><ol> |
|
866 | * <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at |
|
867 | * the 95%, use |
|
868 | * <br><code>tTest(sampleStats1, sampleStats2, 0.05) </code> |
|
869 | * </li> |
|
870 | * <li>To test the (one-sided) hypothesis <code> mean 1 < mean 2 </code> |
|
871 | * at the 99% level, first verify that the measured mean of |
|
872 | * <code>sample 1</code> is less than the mean of <code>sample 2</code> |
|
873 | * and then use |
|
874 | * <br><code>tTest(sampleStats1, sampleStats2, 0.02) </code> |
|
875 | * </li></ol> |
|
876 | * <p> |
|
877 | * <strong>Usage Note:</strong><br> |
|
878 | * The validity of the test depends on the assumptions of the parametric |
|
879 | * t-test procedure, as discussed |
|
880 | * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html"> |
|
881 | * here</a> |
|
882 | * <p> |
|
883 | * <strong>Preconditions</strong>: <ul> |
|
884 | * <li>The datasets described by the two Univariates must each contain |
|
885 | * at least 2 observations. |
|
886 | * </li> |
|
887 | * <li> <code> 0 < alpha < 0.5 </code> |
|
888 | * </li></ul> |
|
889 | * |
|
890 | * @param sampleStats1 StatisticalSummary describing sample data values |
|
891 | * @param sampleStats2 StatisticalSummary describing sample data values |
|
892 | * @param alpha significance level of the test |
|
893 | * @return true if the null hypothesis can be rejected with |
|
894 | * confidence 1 - alpha |
|
895 | * @throws IllegalArgumentException if the preconditions are not met |
|
896 | * @throws MathException if an error occurs performing the test |
|
897 | */ |
|
898 | public boolean tTest(StatisticalSummary sampleStats1, |
|
899 | StatisticalSummary sampleStats2, double alpha) |
|
900 | throws IllegalArgumentException, MathException { |
|
901 | 24 | if ((alpha <= 0) || (alpha > 0.5)) { |
902 | 6 | throw new IllegalArgumentException("bad significance level: " + alpha); |
903 | } |
|
904 | 18 | return (tTest(sampleStats1, sampleStats2) < alpha); |
905 | } |
|
906 | ||
907 | //----------------------------------------------- Protected methods |
|
908 | ||
909 | /** |
|
910 | * Gets a DistributionFactory to use in creating TDistribution instances. |
|
911 | * @return a distribution factory. |
|
912 | */ |
|
913 | protected DistributionFactory getDistributionFactory() { |
|
914 | 132 | if (distributionFactory == null) { |
915 | 16 | distributionFactory = DistributionFactory.newInstance(); |
916 | } |
|
917 | 132 | return distributionFactory; |
918 | } |
|
919 | ||
920 | /** |
|
921 | * Computes approximate degrees of freedom for 2-sample t-test. |
|
922 | * |
|
923 | * @param v1 first sample variance |
|
924 | * @param v2 second sample variance |
|
925 | * @param n1 first sample n |
|
926 | * @param n2 second sample n |
|
927 | * @return approximate degrees of freedom |
|
928 | */ |
|
929 | protected double df(double v1, double v2, double n1, double n2) { |
|
930 | 42 | return (((v1 / n1) + (v2 / n2)) * ((v1 / n1) + (v2 / n2))) / |
931 | ((v1 * v1) / (n1 * n1 * (n1 - 1d)) + (v2 * v2) / |
|
932 | (n2 * n2 * (n2 - 1d))); |
|
933 | } |
|
934 | ||
935 | /** |
|
936 | * Computes t test statistic for 1-sample t-test. |
|
937 | * |
|
938 | * @param m sample mean |
|
939 | * @param mu constant to test against |
|
940 | * @param v sample variance |
|
941 | * @param n sample n |
|
942 | * @return t test statistic |
|
943 | */ |
|
944 | protected double t(double m, double mu, double v, double n) { |
|
945 | 102 | return (m - mu) / Math.sqrt(v / n); |
946 | } |
|
947 | ||
948 | /** |
|
949 | * Computes t test statistic for 2-sample t-test. |
|
950 | * <p> |
|
951 | * Does not assume that subpopulation variances are equal. |
|
952 | * |
|
953 | * @param m1 first sample mean |
|
954 | * @param m2 second sample mean |
|
955 | * @param v1 first sample variance |
|
956 | * @param v2 second sample variance |
|
957 | * @param n1 first sample n |
|
958 | * @param n2 second sample n |
|
959 | * @return t test statistic |
|
960 | */ |
|
961 | protected double t(double m1, double m2, double v1, double v2, double n1, |
|
962 | double n2) { |
|
963 | 60 | return (m1 - m2) / Math.sqrt((v1 / n1) + (v2 / n2)); |
964 | } |
|
965 | ||
966 | /** |
|
967 | * Computes t test statistic for 2-sample t-test under the hypothesis |
|
968 | * of equal subpopulation variances. |
|
969 | * |
|
970 | * @param m1 first sample mean |
|
971 | * @param m2 second sample mean |
|
972 | * @param v1 first sample variance |
|
973 | * @param v2 second sample variance |
|
974 | * @param n1 first sample n |
|
975 | * @param n2 second sample n |
|
976 | * @return t test statistic |
|
977 | */ |
|
978 | protected double homoscedasticT(double m1, double m2, double v1, |
|
979 | double v2, double n1, double n2) { |
|
980 | 24 | double pooledVariance = ((n1 - 1) * v1 + (n2 -1) * v2 ) / (n1 + n2 - 2); |
981 | 24 | return (m1 - m2) / Math.sqrt(pooledVariance * (1d / n1 + 1d / n2)); |
982 | } |
|
983 | ||
984 | /** |
|
985 | * Computes p-value for 2-sided, 1-sample t-test. |
|
986 | * |
|
987 | * @param m sample mean |
|
988 | * @param mu constant to test against |
|
989 | * @param v sample variance |
|
990 | * @param n sample n |
|
991 | * @return p-value |
|
992 | * @throws MathException if an error occurs computing the p-value |
|
993 | */ |
|
994 | protected double tTest(double m, double mu, double v, double n) |
|
995 | throws MathException { |
|
996 | 72 | double t = Math.abs(t(m, mu, v, n)); |
997 | 72 | TDistribution tDistribution = |
998 | getDistributionFactory().createTDistribution(n - 1); |
|
999 | 72 | return 1.0 - tDistribution.cumulativeProbability(-t, t); |
1000 | } |
|
1001 | ||
1002 | /** |
|
1003 | * Computes p-value for 2-sided, 2-sample t-test. |
|
1004 | * <p> |
|
1005 | * Does not assume subpopulation variances are equal. Degrees of freedom |
|
1006 | * are estimated from the data. |
|
1007 | * |
|
1008 | * @param m1 first sample mean |
|
1009 | * @param m2 second sample mean |
|
1010 | * @param v1 first sample variance |
|
1011 | * @param v2 second sample variance |
|
1012 | * @param n1 first sample n |
|
1013 | * @param n2 second sample n |
|
1014 | * @return p-value |
|
1015 | * @throws MathException if an error occurs computing the p-value |
|
1016 | */ |
|
1017 | protected double tTest(double m1, double m2, double v1, double v2, |
|
1018 | double n1, double n2) |
|
1019 | throws MathException { |
|
1020 | 42 | double t = Math.abs(t(m1, m2, v1, v2, n1, n2)); |
1021 | 42 | double degreesOfFreedom = 0; |
1022 | 42 | degreesOfFreedom= df(v1, v2, n1, n2); |
1023 | 42 | TDistribution tDistribution = |
1024 | getDistributionFactory().createTDistribution(degreesOfFreedom); |
|
1025 | 42 | return 1.0 - tDistribution.cumulativeProbability(-t, t); |
1026 | } |
|
1027 | ||
1028 | /** |
|
1029 | * Computes p-value for 2-sided, 2-sample t-test, under the assumption |
|
1030 | * of equal subpopulation variances. |
|
1031 | * <p> |
|
1032 | * The sum of the sample sizes minus 2 is used as degrees of freedom. |
|
1033 | * |
|
1034 | * @param m1 first sample mean |
|
1035 | * @param m2 second sample mean |
|
1036 | * @param v1 first sample variance |
|
1037 | * @param v2 second sample variance |
|
1038 | * @param n1 first sample n |
|
1039 | * @param n2 second sample n |
|
1040 | * @return p-value |
|
1041 | * @throws MathException if an error occurs computing the p-value |
|
1042 | */ |
|
1043 | protected double homoscedasticTTest(double m1, double m2, double v1, |
|
1044 | double v2, double n1, double n2) |
|
1045 | throws MathException { |
|
1046 | 18 | double t = Math.abs(homoscedasticT(m1, m2, v1, v2, n1, n2)); |
1047 | 18 | double degreesOfFreedom = 0; |
1048 | 18 | degreesOfFreedom = (double) (n1 + n2 - 2); |
1049 | 18 | TDistribution tDistribution = |
1050 | getDistributionFactory().createTDistribution(degreesOfFreedom); |
|
1051 | 18 | return 1.0 - tDistribution.cumulativeProbability(-t, t); |
1052 | } |
|
1053 | } |