Coverage Report - org.apache.commons.math.stat.inference.TTestImpl

Classes in this File Line Coverage Branch Coverage Complexity
TTestImpl
89% 
96% 
2.345

 1  
 /*
 2  
  * Copyright 2004-2005 The Apache Software Foundation.
 3  
  *
 4  
  * Licensed under the Apache License, Version 2.0 (the "License");
 5  
  * you may not use this file except in compliance with the License.
 6  
  * You may obtain a copy of the License at
 7  
  *
 8  
  *      http://www.apache.org/licenses/LICENSE-2.0
 9  
  *
 10  
  * Unless required by applicable law or agreed to in writing, software
 11  
  * distributed under the License is distributed on an "AS IS" BASIS,
 12  
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13  
  * See the License for the specific language governing permissions and
 14  
  * limitations under the License.
 15  
  */
 16  
 package org.apache.commons.math.stat.inference;
 17  
 
 18  
 import org.apache.commons.math.MathException;
 19  
 import org.apache.commons.math.distribution.DistributionFactory;
 20  
 import org.apache.commons.math.distribution.TDistribution;
 21  
 import org.apache.commons.math.stat.StatUtils;
 22  
 import org.apache.commons.math.stat.descriptive.StatisticalSummary;
 23  
 
 24  
 /**
 25  
  * Implements t-test statistics defined in the {@link TTest} interface.
 26  
  * <p>
 27  
  * Uses commons-math {@link org.apache.commons.math.distribution.TDistribution}
 28  
  * implementation to estimate exact p-values.
 29  
  *
 30  
  * @version $Revision$ $Date: 2005-05-01 22:14:49 -0700 (Sun, 01 May 2005) $
 31  
  */
 32  
 public class TTestImpl implements TTest  {
 33  
 
 34  
     /** Cached DistributionFactory used to create TDistribution instances */
 35  30
     private DistributionFactory distributionFactory = null;
 36  
     
 37  
     /**
 38  
      * Default constructor.
 39  
      */
 40  
     public TTestImpl() {
 41  30
         super();
 42  30
     }
 43  
     
 44  
     /**
 45  
      * Computes a paired, 2-sample t-statistic based on the data in the input 
 46  
      * arrays.  The t-statistic returned is equivalent to what would be returned by
 47  
      * computing the one-sample t-statistic {@link #t(double, double[])}, with
 48  
      * <code>mu = 0</code> and the sample array consisting of the (signed) 
 49  
      * differences between corresponding entries in <code>sample1</code> and 
 50  
      * <code>sample2.</code>
 51  
      * <p>
 52  
      * <strong>Preconditions</strong>: <ul>
 53  
      * <li>The input arrays must have the same length and their common length
 54  
      * must be at least 2.
 55  
      * </li></ul>
 56  
      *
 57  
      * @param sample1 array of sample data values
 58  
      * @param sample2 array of sample data values
 59  
      * @return t statistic
 60  
      * @throws IllegalArgumentException if the precondition is not met
 61  
      * @throws MathException if the statistic can not be computed do to a
 62  
      *         convergence or other numerical error.
 63  
      */
 64  
     public double pairedT(double[] sample1, double[] sample2)
 65  
         throws IllegalArgumentException, MathException {
 66  6
         if ((sample1 == null) || (sample2 == null ||
 67  
                 Math.min(sample1.length, sample2.length) < 2)) {
 68  0
             throw new IllegalArgumentException("insufficient data for t statistic");
 69  
         }
 70  6
         double meanDifference = StatUtils.meanDifference(sample1, sample2);
 71  6
         return t(meanDifference, 0,  
 72  
                 StatUtils.varianceDifference(sample1, sample2, meanDifference),
 73  
                 (double) sample1.length);
 74  
     }
 75  
 
 76  
      /**
 77  
      * Returns the <i>observed significance level</i>, or 
 78  
      * <i> p-value</i>, associated with a paired, two-sample, two-tailed t-test 
 79  
      * based on the data in the input arrays.
 80  
      * <p>
 81  
      * The number returned is the smallest significance level
 82  
      * at which one can reject the null hypothesis that the mean of the paired
 83  
      * differences is 0 in favor of the two-sided alternative that the mean paired 
 84  
      * difference is not equal to 0. For a one-sided test, divide the returned 
 85  
      * value by 2.
 86  
      * <p>
 87  
      * This test is equivalent to a one-sample t-test computed using
 88  
      * {@link #tTest(double, double[])} with <code>mu = 0</code> and the sample
 89  
      * array consisting of the signed differences between corresponding elements of 
 90  
      * <code>sample1</code> and <code>sample2.</code>
 91  
      * <p>
 92  
      * <strong>Usage Note:</strong><br>
 93  
      * The validity of the p-value depends on the assumptions of the parametric
 94  
      * t-test procedure, as discussed 
 95  
      * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
 96  
      * here</a>
 97  
      * <p>
 98  
      * <strong>Preconditions</strong>: <ul>
 99  
      * <li>The input array lengths must be the same and their common length must
 100  
      * be at least 2.
 101  
      * </li></ul>
 102  
      *
 103  
      * @param sample1 array of sample data values
 104  
      * @param sample2 array of sample data values
 105  
      * @return p-value for t-test
 106  
      * @throws IllegalArgumentException if the precondition is not met
 107  
      * @throws MathException if an error occurs computing the p-value
 108  
      */
 109  
     public double pairedTTest(double[] sample1, double[] sample2)
 110  
         throws IllegalArgumentException, MathException {
 111  24
         double meanDifference = StatUtils.meanDifference(sample1, sample2);
 112  24
         return tTest(meanDifference, 0, 
 113  
                 StatUtils.varianceDifference(sample1, sample2, meanDifference), 
 114  
                 (double) sample1.length);
 115  
     }
 116  
 
 117  
      /**
 118  
      * Performs a paired t-test evaluating the null hypothesis that the 
 119  
      * mean of the paired differences between <code>sample1</code> and
 120  
      * <code>sample2</code> is 0 in favor of the two-sided alternative that the 
 121  
      * mean paired difference is not equal to 0, with significance level 
 122  
      * <code>alpha</code>.
 123  
      * <p>
 124  
      * Returns <code>true</code> iff the null hypothesis can be rejected with 
 125  
      * confidence <code>1 - alpha</code>.  To perform a 1-sided test, use 
 126  
      * <code>alpha * 2</code>
 127  
      * <p>
 128  
      * <strong>Usage Note:</strong><br>
 129  
      * The validity of the test depends on the assumptions of the parametric
 130  
      * t-test procedure, as discussed 
 131  
      * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
 132  
      * here</a>
 133  
      * <p>
 134  
      * <strong>Preconditions</strong>: <ul>
 135  
      * <li>The input array lengths must be the same and their common length 
 136  
      * must be at least 2.
 137  
      * </li>
 138  
      * <li> <code> 0 < alpha < 0.5 </code>
 139  
      * </li></ul>
 140  
      *
 141  
      * @param sample1 array of sample data values
 142  
      * @param sample2 array of sample data values
 143  
      * @param alpha significance level of the test
 144  
      * @return true if the null hypothesis can be rejected with 
 145  
      * confidence 1 - alpha
 146  
      * @throws IllegalArgumentException if the preconditions are not met
 147  
      * @throws MathException if an error occurs performing the test
 148  
      */
 149  
     public boolean pairedTTest(double[] sample1, double[] sample2, double alpha)
 150  
         throws IllegalArgumentException, MathException {
 151  12
         if ((alpha <= 0) || (alpha > 0.5)) {
 152  0
             throw new IllegalArgumentException("bad significance level: " + alpha);
 153  
         }
 154  12
         return (pairedTTest(sample1, sample2) < alpha);
 155  
     }
 156  
 
 157  
     /**
 158  
      * Computes a <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc22.htm#formula"> 
 159  
      * t statistic </a> given observed values and a comparison constant.
 160  
      * <p>
 161  
      * This statistic can be used to perform a one sample t-test for the mean.
 162  
      * <p>
 163  
      * <strong>Preconditions</strong>: <ul>
 164  
      * <li>The observed array length must be at least 2.
 165  
      * </li></ul>
 166  
      *
 167  
      * @param mu comparison constant
 168  
      * @param observed array of values
 169  
      * @return t statistic
 170  
      * @throws IllegalArgumentException if input array length is less than 2
 171  
      */
 172  
     public double t(double mu, double[] observed)
 173  
     throws IllegalArgumentException {
 174  30
         if ((observed == null) || (observed.length < 2)) {
 175  18
             throw new IllegalArgumentException("insufficient data for t statistic");
 176  
         }
 177  12
         return t(StatUtils.mean(observed), mu, StatUtils.variance(observed),
 178  
                 observed.length);
 179  
     }
 180  
 
 181  
     /**
 182  
      * Computes a <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc22.htm#formula">
 183  
      * t statistic </a> to use in comparing the mean of the dataset described by 
 184  
      * <code>sampleStats</code> to <code>mu</code>.
 185  
      * <p>
 186  
      * This statistic can be used to perform a one sample t-test for the mean.
 187  
      * <p>
 188  
      * <strong>Preconditions</strong>: <ul>
 189  
      * <li><code>observed.getN() > = 2</code>.
 190  
      * </li></ul>
 191  
      *
 192  
      * @param mu comparison constant
 193  
      * @param sampleStats DescriptiveStatistics holding sample summary statitstics
 194  
      * @return t statistic
 195  
      * @throws IllegalArgumentException if the precondition is not met
 196  
      */
 197  
     public double t(double mu, StatisticalSummary sampleStats)
 198  
     throws IllegalArgumentException {
 199  30
         if ((sampleStats == null) || (sampleStats.getN() < 2)) {
 200  18
             throw new IllegalArgumentException("insufficient data for t statistic");
 201  
         }
 202  12
         return t(sampleStats.getMean(), mu, sampleStats.getVariance(),
 203  
                 sampleStats.getN());
 204  
     }
 205  
 
 206  
     /**
 207  
      * Computes a 2-sample t statistic,  under the hypothesis of equal 
 208  
      * subpopulation variances.  To compute a t-statistic without the
 209  
      * equal variances hypothesis, use {@link #t(double[], double[])}.
 210  
      * <p>
 211  
      * This statistic can be used to perform a (homoscedastic) two-sample
 212  
      * t-test to compare sample means.   
 213  
      * <p>
 214  
      * The t-statisitc is
 215  
      * <p>
 216  
      * &nbsp;&nbsp;<code>  t = (m1 - m2) / (sqrt(1/n1 +1/n2) sqrt(var))</code>
 217  
      * <p>
 218  
      * where <strong><code>n1</code></strong> is the size of first sample; 
 219  
      * <strong><code> n2</code></strong> is the size of second sample; 
 220  
      * <strong><code> m1</code></strong> is the mean of first sample;  
 221  
      * <strong><code> m2</code></strong> is the mean of second sample</li>
 222  
      * </ul>
 223  
      * and <strong><code>var</code></strong> is the pooled variance estimate:
 224  
      * <p>
 225  
      * <code>var = sqrt(((n1 - 1)var1 + (n2 - 1)var2) / ((n1-1) + (n2-1)))</code>
 226  
      * <p> 
 227  
      * with <strong><code>var1<code></strong> the variance of the first sample and
 228  
      * <strong><code>var2</code></strong> the variance of the second sample.
 229  
      * <p>
 230  
      * <strong>Preconditions</strong>: <ul>
 231  
      * <li>The observed array lengths must both be at least 2.
 232  
      * </li></ul>
 233  
      *
 234  
      * @param sample1 array of sample data values
 235  
      * @param sample2 array of sample data values
 236  
      * @return t statistic
 237  
      * @throws IllegalArgumentException if the precondition is not met
 238  
      */
 239  
     public double homoscedasticT(double[] sample1, double[] sample2)
 240  
     throws IllegalArgumentException {
 241  6
         if ((sample1 == null) || (sample2 == null ||
 242  
                 Math.min(sample1.length, sample2.length) < 2)) {
 243  0
             throw new IllegalArgumentException("insufficient data for t statistic");
 244  
         }
 245  6
         return homoscedasticT(StatUtils.mean(sample1), StatUtils.mean(sample2),
 246  
                 StatUtils.variance(sample1), StatUtils.variance(sample2),
 247  
                 (double) sample1.length, (double) sample2.length);
 248  
     }
 249  
     
 250  
     /**
 251  
      * Computes a 2-sample t statistic, without the hypothesis of equal
 252  
      * subpopulation variances.  To compute a t-statistic assuming equal
 253  
      * variances, use {@link #homoscedasticT(double[], double[])}.
 254  
      * <p>
 255  
      * This statistic can be used to perform a two-sample t-test to compare
 256  
      * sample means.
 257  
      * <p>
 258  
      * The t-statisitc is
 259  
      * <p>
 260  
      * &nbsp;&nbsp; <code>  t = (m1 - m2) / sqrt(var1/n1 + var2/n2)</code>
 261  
      * <p>
 262  
      *  where <strong><code>n1</code></strong> is the size of the first sample
 263  
      * <strong><code> n2</code></strong> is the size of the second sample; 
 264  
      * <strong><code> m1</code></strong> is the mean of the first sample;  
 265  
      * <strong><code> m2</code></strong> is the mean of the second sample;
 266  
      * <strong><code> var1</code></strong> is the variance of the first sample;
 267  
      * <strong><code> var2</code></strong> is the variance of the second sample;  
 268  
      * <p>
 269  
      * <strong>Preconditions</strong>: <ul>
 270  
      * <li>The observed array lengths must both be at least 2.
 271  
      * </li></ul>
 272  
      *
 273  
      * @param sample1 array of sample data values
 274  
      * @param sample2 array of sample data values
 275  
      * @return t statistic
 276  
      * @throws IllegalArgumentException if the precondition is not met
 277  
      */
 278  
     public double t(double[] sample1, double[] sample2)
 279  
     throws IllegalArgumentException {
 280  18
         if ((sample1 == null) || (sample2 == null ||
 281  
                 Math.min(sample1.length, sample2.length) < 2)) {
 282  6
             throw new IllegalArgumentException("insufficient data for t statistic");
 283  
         }
 284  12
         return t(StatUtils.mean(sample1), StatUtils.mean(sample2),
 285  
                 StatUtils.variance(sample1), StatUtils.variance(sample2),
 286  
                 (double) sample1.length, (double) sample2.length);
 287  
     }
 288  
 
 289  
     /**
 290  
      * Computes a 2-sample t statistic </a>, comparing the means of the datasets
 291  
      * described by two {@link StatisticalSummary} instances, without the
 292  
      * assumption of equal subpopulation variances.  Use 
 293  
      * {@link #homoscedasticT(StatisticalSummary, StatisticalSummary)} to
 294  
      * compute a t-statistic under the equal variances assumption.
 295  
      * <p>
 296  
      * This statistic can be used to perform a two-sample t-test to compare
 297  
      * sample means.
 298  
      * <p>
 299  
       * The returned  t-statisitc is
 300  
      * <p>
 301  
      * &nbsp;&nbsp; <code>  t = (m1 - m2) / sqrt(var1/n1 + var2/n2)</code>
 302  
      * <p>
 303  
      * where <strong><code>n1</code></strong> is the size of the first sample; 
 304  
      * <strong><code> n2</code></strong> is the size of the second sample; 
 305  
      * <strong><code> m1</code></strong> is the mean of the first sample;  
 306  
      * <strong><code> m2</code></strong> is the mean of the second sample
 307  
      * <strong><code> var1</code></strong> is the variance of the first sample;  
 308  
      * <strong><code> var2</code></strong> is the variance of the second sample
 309  
      * <p>
 310  
      * <strong>Preconditions</strong>: <ul>
 311  
      * <li>The datasets described by the two Univariates must each contain
 312  
      * at least 2 observations.
 313  
      * </li></ul>
 314  
      *
 315  
      * @param sampleStats1 StatisticalSummary describing data from the first sample
 316  
      * @param sampleStats2 StatisticalSummary describing data from the second sample
 317  
      * @return t statistic
 318  
      * @throws IllegalArgumentException if the precondition is not met
 319  
      */
 320  
     public double t(StatisticalSummary sampleStats1, 
 321  
             StatisticalSummary sampleStats2)
 322  
     throws IllegalArgumentException {
 323  12
         if ((sampleStats1 == null) ||
 324  
                 (sampleStats2 == null ||
 325  
                         Math.min(sampleStats1.getN(), sampleStats2.getN()) < 2)) {
 326  6
             throw new IllegalArgumentException("insufficient data for t statistic");
 327  
         }
 328  6
         return t(sampleStats1.getMean(), sampleStats2.getMean(), 
 329  
                 sampleStats1.getVariance(), sampleStats2.getVariance(),
 330  
                 (double) sampleStats1.getN(), (double) sampleStats2.getN());
 331  
     }
 332  
     
 333  
     /**
 334  
      * Computes a 2-sample t statistic, comparing the means of the datasets
 335  
      * described by two {@link StatisticalSummary} instances, under the
 336  
      * assumption of equal subpopulation variances.  To compute a t-statistic
 337  
      * without the equal variances assumption, use 
 338  
      * {@link #t(StatisticalSummary, StatisticalSummary)}.
 339  
      * <p>
 340  
      * This statistic can be used to perform a (homoscedastic) two-sample
 341  
      * t-test to compare sample means.
 342  
      * <p>
 343  
      * The t-statisitc returned is
 344  
      * <p>
 345  
      * &nbsp;&nbsp;<code>  t = (m1 - m2) / (sqrt(1/n1 +1/n2) sqrt(var))</code>
 346  
      * <p>
 347  
      * where <strong><code>n1</code></strong> is the size of first sample; 
 348  
      * <strong><code> n2</code></strong> is the size of second sample; 
 349  
      * <strong><code> m1</code></strong> is the mean of first sample;  
 350  
      * <strong><code> m2</code></strong> is the mean of second sample
 351  
      * and <strong><code>var</code></strong> is the pooled variance estimate:
 352  
      * <p>
 353  
      * <code>var = sqrt(((n1 - 1)var1 + (n2 - 1)var2) / ((n1-1) + (n2-1)))</code>
 354  
      * <p> 
 355  
      * with <strong><code>var1<code></strong> the variance of the first sample and
 356  
      * <strong><code>var2</code></strong> the variance of the second sample.
 357  
      * <p>
 358  
      * <strong>Preconditions</strong>: <ul>
 359  
      * <li>The datasets described by the two Univariates must each contain
 360  
      * at least 2 observations.
 361  
      * </li></ul>
 362  
      *
 363  
      * @param sampleStats1 StatisticalSummary describing data from the first sample
 364  
      * @param sampleStats2 StatisticalSummary describing data from the second sample
 365  
      * @return t statistic
 366  
      * @throws IllegalArgumentException if the precondition is not met
 367  
      */
 368  
     public double homoscedasticT(StatisticalSummary sampleStats1, 
 369  
             StatisticalSummary sampleStats2)
 370  
     throws IllegalArgumentException {
 371  0
         if ((sampleStats1 == null) ||
 372  
                 (sampleStats2 == null ||
 373  
                         Math.min(sampleStats1.getN(), sampleStats2.getN()) < 2)) {
 374  0
             throw new IllegalArgumentException("insufficient data for t statistic");
 375  
         }
 376  0
         return homoscedasticT(sampleStats1.getMean(), sampleStats2.getMean(), 
 377  
                 sampleStats1.getVariance(), sampleStats2.getVariance(), 
 378  
                 (double) sampleStats1.getN(), (double) sampleStats2.getN());
 379  
     }
 380  
 
 381  
      /**
 382  
      * Returns the <i>observed significance level</i>, or 
 383  
      * <i>p-value</i>, associated with a one-sample, two-tailed t-test 
 384  
      * comparing the mean of the input array with the constant <code>mu</code>.
 385  
      * <p>
 386  
      * The number returned is the smallest significance level
 387  
      * at which one can reject the null hypothesis that the mean equals 
 388  
      * <code>mu</code> in favor of the two-sided alternative that the mean
 389  
      * is different from <code>mu</code>. For a one-sided test, divide the 
 390  
      * returned value by 2.
 391  
      * <p>
 392  
      * <strong>Usage Note:</strong><br>
 393  
      * The validity of the test depends on the assumptions of the parametric
 394  
      * t-test procedure, as discussed 
 395  
      * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">here</a>
 396  
      * <p>
 397  
      * <strong>Preconditions</strong>: <ul>
 398  
      * <li>The observed array length must be at least 2.
 399  
      * </li></ul>
 400  
      *
 401  
      * @param mu constant value to compare sample mean against
 402  
      * @param sample array of sample data values
 403  
      * @return p-value
 404  
      * @throws IllegalArgumentException if the precondition is not met
 405  
      * @throws MathException if an error occurs computing the p-value
 406  
      */
 407  
     public double tTest(double mu, double[] sample)
 408  
     throws IllegalArgumentException, MathException {
 409  30
         if ((sample == null) || (sample.length < 2)) {
 410  6
             throw new IllegalArgumentException("insufficient data for t statistic");
 411  
         }
 412  24
         return tTest( StatUtils.mean(sample), mu, StatUtils.variance(sample),
 413  
                 sample.length);
 414  
     }
 415  
 
 416  
     /**
 417  
      * Performs a <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
 418  
      * two-sided t-test</a> evaluating the null hypothesis that the mean of the population from
 419  
      * which <code>sample</code> is drawn equals <code>mu</code>.
 420  
      * <p>
 421  
      * Returns <code>true</code> iff the null hypothesis can be 
 422  
      * rejected with confidence <code>1 - alpha</code>.  To 
 423  
      * perform a 1-sided test, use <code>alpha * 2</code>
 424  
      * <p>
 425  
      * <strong>Examples:</strong><br><ol>
 426  
      * <li>To test the (2-sided) hypothesis <code>sample mean = mu </code> at
 427  
      * the 95% level, use <br><code>tTest(mu, sample, 0.05) </code>
 428  
      * </li>
 429  
      * <li>To test the (one-sided) hypothesis <code> sample mean < mu </code>
 430  
      * at the 99% level, first verify that the measured sample mean is less 
 431  
      * than <code>mu</code> and then use 
 432  
      * <br><code>tTest(mu, sample, 0.02) </code>
 433  
      * </li></ol>
 434  
      * <p>
 435  
      * <strong>Usage Note:</strong><br>
 436  
      * The validity of the test depends on the assumptions of the one-sample 
 437  
      * parametric t-test procedure, as discussed 
 438  
      * <a href="http://www.basic.nwu.edu/statguidefiles/sg_glos.html#one-sample">here</a>
 439  
      * <p>
 440  
      * <strong>Preconditions</strong>: <ul>
 441  
      * <li>The observed array length must be at least 2.
 442  
      * </li></ul>
 443  
      *
 444  
      * @param mu constant value to compare sample mean against
 445  
      * @param sample array of sample data values
 446  
      * @param alpha significance level of the test
 447  
      * @return p-value
 448  
      * @throws IllegalArgumentException if the precondition is not met
 449  
      * @throws MathException if an error computing the p-value
 450  
      */
 451  
     public boolean tTest(double mu, double[] sample, double alpha)
 452  
     throws IllegalArgumentException, MathException {
 453  18
         if ((alpha <= 0) || (alpha > 0.5)) {
 454  6
             throw new IllegalArgumentException("bad significance level: " + alpha);
 455  
         }
 456  12
         return (tTest(mu, sample) < alpha);
 457  
     }
 458  
 
 459  
     /**
 460  
      * Returns the <i>observed significance level</i>, or 
 461  
      * <i>p-value</i>, associated with a one-sample, two-tailed t-test 
 462  
      * comparing the mean of the dataset described by <code>sampleStats</code>
 463  
      * with the constant <code>mu</code>.
 464  
      * <p>
 465  
      * The number returned is the smallest significance level
 466  
      * at which one can reject the null hypothesis that the mean equals 
 467  
      * <code>mu</code> in favor of the two-sided alternative that the mean
 468  
      * is different from <code>mu</code>. For a one-sided test, divide the 
 469  
      * returned value by 2.
 470  
      * <p>
 471  
      * <strong>Usage Note:</strong><br>
 472  
      * The validity of the test depends on the assumptions of the parametric
 473  
      * t-test procedure, as discussed 
 474  
      * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
 475  
      * here</a>
 476  
      * <p>
 477  
      * <strong>Preconditions</strong>: <ul>
 478  
      * <li>The sample must contain at least 2 observations.
 479  
      * </li></ul>
 480  
      *
 481  
      * @param mu constant value to compare sample mean against
 482  
      * @param sampleStats StatisticalSummary describing sample data
 483  
      * @return p-value
 484  
      * @throws IllegalArgumentException if the precondition is not met
 485  
      * @throws MathException if an error occurs computing the p-value
 486  
      */
 487  
     public double tTest(double mu, StatisticalSummary sampleStats)
 488  
     throws IllegalArgumentException, MathException {
 489  30
         if ((sampleStats == null) || (sampleStats.getN() < 2)) {
 490  6
             throw new IllegalArgumentException("insufficient data for t statistic");
 491  
         }
 492  24
         return tTest(sampleStats.getMean(), mu, sampleStats.getVariance(),
 493  
                 sampleStats.getN());
 494  
     }
 495  
 
 496  
      /**
 497  
      * Performs a <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
 498  
      * two-sided t-test</a> evaluating the null hypothesis that the mean of the
 499  
      * population from which the dataset described by <code>stats</code> is
 500  
      * drawn equals <code>mu</code>.
 501  
      * <p>
 502  
      * Returns <code>true</code> iff the null hypothesis can be rejected with
 503  
      * confidence <code>1 - alpha</code>.  To  perform a 1-sided test, use
 504  
      * <code>alpha * 2.</code>
 505  
      * <p>
 506  
      * <strong>Examples:</strong><br><ol>
 507  
      * <li>To test the (2-sided) hypothesis <code>sample mean = mu </code> at
 508  
      * the 95% level, use <br><code>tTest(mu, sampleStats, 0.05) </code>
 509  
      * </li>
 510  
      * <li>To test the (one-sided) hypothesis <code> sample mean < mu </code>
 511  
      * at the 99% level, first verify that the measured sample mean is less 
 512  
      * than <code>mu</code> and then use 
 513  
      * <br><code>tTest(mu, sampleStats, 0.02) </code>
 514  
      * </li></ol>
 515  
      * <p>
 516  
      * <strong>Usage Note:</strong><br>
 517  
      * The validity of the test depends on the assumptions of the one-sample 
 518  
      * parametric t-test procedure, as discussed 
 519  
      * <a href="http://www.basic.nwu.edu/statguidefiles/sg_glos.html#one-sample">here</a>
 520  
      * <p>
 521  
      * <strong>Preconditions</strong>: <ul>
 522  
      * <li>The sample must include at least 2 observations.
 523  
      * </li></ul>
 524  
      *
 525  
      * @param mu constant value to compare sample mean against
 526  
      * @param sampleStats StatisticalSummary describing sample data values
 527  
      * @param alpha significance level of the test
 528  
      * @return p-value
 529  
      * @throws IllegalArgumentException if the precondition is not met
 530  
      * @throws MathException if an error occurs computing the p-value
 531  
      */
 532  
     public boolean tTest( double mu, StatisticalSummary sampleStats,
 533  
             double alpha)
 534  
     throws IllegalArgumentException, MathException {
 535  18
         if ((alpha <= 0) || (alpha > 0.5)) {
 536  6
             throw new IllegalArgumentException("bad significance level: " + alpha);
 537  
         }
 538  12
         return (tTest(mu, sampleStats) < alpha);
 539  
     }
 540  
 
 541  
     /**
 542  
      * Returns the <i>observed significance level</i>, or 
 543  
      * <i>p-value</i>, associated with a two-sample, two-tailed t-test 
 544  
      * comparing the means of the input arrays.
 545  
      * <p>
 546  
      * The number returned is the smallest significance level
 547  
      * at which one can reject the null hypothesis that the two means are
 548  
      * equal in favor of the two-sided alternative that they are different. 
 549  
      * For a one-sided test, divide the returned value by 2.
 550  
      * <p>
 551  
      * The test does not assume that the underlying popuation variances are
 552  
      * equal  and it uses approximated degrees of freedom computed from the 
 553  
      * sample data to compute the p-value.  The t-statistic used is as defined in
 554  
      * {@link #t(double[], double[])} and the Welch-Satterthwaite approximation
 555  
      * to the degrees of freedom is used, 
 556  
      * as described 
 557  
      * <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">
 558  
      * here.</a>  To perform the test under the assumption of equal subpopulation
 559  
      * variances, use {@link #homoscedasticTTest(double[], double[])}. 
 560  
      * <p>
 561  
      * <strong>Usage Note:</strong><br>
 562  
      * The validity of the p-value depends on the assumptions of the parametric
 563  
      * t-test procedure, as discussed 
 564  
      * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
 565  
      * here</a>
 566  
      * <p>
 567  
      * <strong>Preconditions</strong>: <ul>
 568  
      * <li>The observed array lengths must both be at least 2.
 569  
      * </li></ul>
 570  
      *
 571  
      * @param sample1 array of sample data values
 572  
      * @param sample2 array of sample data values
 573  
      * @return p-value for t-test
 574  
      * @throws IllegalArgumentException if the precondition is not met
 575  
      * @throws MathException if an error occurs computing the p-value
 576  
      */
 577  
     public double tTest(double[] sample1, double[] sample2)
 578  
     throws IllegalArgumentException, MathException {
 579  36
         if ((sample1 == null) || (sample2 == null ||
 580  
                 Math.min(sample1.length, sample2.length) < 2)) {
 581  12
             throw new IllegalArgumentException("insufficient data");
 582  
         }
 583  24
         return tTest(StatUtils.mean(sample1), StatUtils.mean(sample2),
 584  
                 StatUtils.variance(sample1), StatUtils.variance(sample2),
 585  
                 (double) sample1.length, (double) sample2.length);
 586  
     }
 587  
     
 588  
     /**
 589  
      * Returns the <i>observed significance level</i>, or 
 590  
      * <i>p-value</i>, associated with a two-sample, two-tailed t-test 
 591  
      * comparing the means of the input arrays, under the assumption that
 592  
      * the two samples are drawn from subpopulations with equal variances.
 593  
      * To perform the test without the equal variances assumption, use
 594  
      * {@link #tTest(double[], double[])}.
 595  
      * <p>
 596  
      * The number returned is the smallest significance level
 597  
      * at which one can reject the null hypothesis that the two means are
 598  
      * equal in favor of the two-sided alternative that they are different. 
 599  
      * For a one-sided test, divide the returned value by 2.
 600  
      * <p>
 601  
      * A pooled variance estimate is used to compute the t-statistic.  See
 602  
      * {@link #homoscedasticT(double[], double[])}. The sum of the sample sizes
 603  
      * minus 2 is used as the degrees of freedom.
 604  
      * <p>
 605  
      * <strong>Usage Note:</strong><br>
 606  
      * The validity of the p-value depends on the assumptions of the parametric
 607  
      * t-test procedure, as discussed 
 608  
      * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
 609  
      * here</a>
 610  
      * <p>
 611  
      * <strong>Preconditions</strong>: <ul>
 612  
      * <li>The observed array lengths must both be at least 2.
 613  
      * </li></ul>
 614  
      *
 615  
      * @param sample1 array of sample data values
 616  
      * @param sample2 array of sample data values
 617  
      * @return p-value for t-test
 618  
      * @throws IllegalArgumentException if the precondition is not met
 619  
      * @throws MathException if an error occurs computing the p-value
 620  
      */
 621  
     public double homoscedasticTTest(double[] sample1, double[] sample2)
 622  
     throws IllegalArgumentException, MathException {
 623  12
         if ((sample1 == null) || (sample2 == null ||
 624  
                 Math.min(sample1.length, sample2.length) < 2)) {
 625  0
             throw new IllegalArgumentException("insufficient data");
 626  
         }
 627  12
         return homoscedasticTTest(StatUtils.mean(sample1), 
 628  
                 StatUtils.mean(sample2), StatUtils.variance(sample1),
 629  
                 StatUtils.variance(sample2), (double) sample1.length, 
 630  
                 (double) sample2.length);
 631  
     }
 632  
     
 633  
 
 634  
      /**
 635  
      * Performs a 
 636  
      * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
 637  
      * two-sided t-test</a> evaluating the null hypothesis that <code>sample1</code> 
 638  
      * and <code>sample2</code> are drawn from populations with the same mean, 
 639  
      * with significance level <code>alpha</code>.  This test does not assume
 640  
      * that the subpopulation variances are equal.  To perform the test assuming
 641  
      * equal variances, use 
 642  
      * {@link #homoscedasticTTest(double[], double[], double)}.
 643  
      * <p>
 644  
      * Returns <code>true</code> iff the null hypothesis that the means are
 645  
      * equal can be rejected with confidence <code>1 - alpha</code>.  To 
 646  
      * perform a 1-sided test, use <code>alpha / 2</code>
 647  
      * <p>
 648  
      * See {@link #t(double[], double[])} for the formula used to compute the
 649  
      * t-statistic.  Degrees of freedom are approximated using the
 650  
      * <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">
 651  
      * Welch-Satterthwaite approximation.</a>
 652  
       
 653  
      * <p>
 654  
      * <strong>Examples:</strong><br><ol>
 655  
      * <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at
 656  
      * the 95% level,  use 
 657  
      * <br><code>tTest(sample1, sample2, 0.05). </code>
 658  
      * </li>
 659  
      * <li>To test the (one-sided) hypothesis <code> mean 1 < mean 2 </code> at
 660  
      * the 99% level, first verify that the measured  mean of <code>sample 1</code>
 661  
      * is less than the mean of <code>sample 2</code> and then use 
 662  
      * <br><code>tTest(sample1, sample2, 0.02) </code>
 663  
      * </li></ol>
 664  
      * <p>
 665  
      * <strong>Usage Note:</strong><br>
 666  
      * The validity of the test depends on the assumptions of the parametric
 667  
      * t-test procedure, as discussed 
 668  
      * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
 669  
      * here</a>
 670  
      * <p>
 671  
      * <strong>Preconditions</strong>: <ul>
 672  
      * <li>The observed array lengths must both be at least 2.
 673  
      * </li>
 674  
      * <li> <code> 0 < alpha < 0.5 </code>
 675  
      * </li></ul>
 676  
      *
 677  
      * @param sample1 array of sample data values
 678  
      * @param sample2 array of sample data values
 679  
      * @param alpha significance level of the test
 680  
      * @return true if the null hypothesis can be rejected with 
 681  
      * confidence 1 - alpha
 682  
      * @throws IllegalArgumentException if the preconditions are not met
 683  
      * @throws MathException if an error occurs performing the test
 684  
      */
 685  
     public boolean tTest(double[] sample1, double[] sample2,
 686  
             double alpha)
 687  
     throws IllegalArgumentException, MathException {
 688  24
         if ((alpha <= 0) || (alpha > 0.5)) {
 689  6
             throw new IllegalArgumentException("bad significance level: " + alpha);
 690  
         }
 691  18
         return (tTest(sample1, sample2) < alpha);
 692  
     }
 693  
     
 694  
     /**
 695  
      * Performs a 
 696  
      * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
 697  
      * two-sided t-test</a> evaluating the null hypothesis that <code>sample1</code> 
 698  
      * and <code>sample2</code> are drawn from populations with the same mean, 
 699  
      * with significance level <code>alpha</code>,  assuming that the
 700  
      * subpopulation variances are equal.  Use 
 701  
      * {@link #tTest(double[], double[], double)} to perform the test without
 702  
      * the assumption of equal variances.
 703  
      * <p>
 704  
      * Returns <code>true</code> iff the null hypothesis that the means are
 705  
      * equal can be rejected with confidence <code>1 - alpha</code>.  To 
 706  
      * perform a 1-sided test, use <code>alpha * 2.</code>  To perform the test
 707  
      * without the assumption of equal subpopulation variances, use 
 708  
      * {@link #tTest(double[], double[], double)}.
 709  
      * <p>
 710  
      * A pooled variance estimate is used to compute the t-statistic. See
 711  
      * {@link #t(double[], double[])} for the formula. The sum of the sample
 712  
      * sizes minus 2 is used as the degrees of freedom.
 713  
      * <p>
 714  
      * <strong>Examples:</strong><br><ol>
 715  
      * <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at
 716  
      * the 95% level, use <br><code>tTest(sample1, sample2, 0.05). </code>
 717  
      * </li>
 718  
      * <li>To test the (one-sided) hypothesis <code> mean 1 < mean 2, </code>
 719  
      * at the 99% level, first verify that the measured mean of 
 720  
      * <code>sample 1</code> is less than the mean of <code>sample 2</code>
 721  
      * and then use
 722  
      * <br><code>tTest(sample1, sample2, 0.02) </code>
 723  
      * </li></ol>
 724  
      * <p>
 725  
      * <strong>Usage Note:</strong><br>
 726  
      * The validity of the test depends on the assumptions of the parametric
 727  
      * t-test procedure, as discussed 
 728  
      * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
 729  
      * here</a>
 730  
      * <p>
 731  
      * <strong>Preconditions</strong>: <ul>
 732  
      * <li>The observed array lengths must both be at least 2.
 733  
      * </li>
 734  
      * <li> <code> 0 < alpha < 0.5 </code>
 735  
      * </li></ul>
 736  
      *
 737  
      * @param sample1 array of sample data values
 738  
      * @param sample2 array of sample data values
 739  
      * @param alpha significance level of the test
 740  
      * @return true if the null hypothesis can be rejected with 
 741  
      * confidence 1 - alpha
 742  
      * @throws IllegalArgumentException if the preconditions are not met
 743  
      * @throws MathException if an error occurs performing the test
 744  
      */
 745  
     public boolean homoscedasticTTest(double[] sample1, double[] sample2,
 746  
             double alpha)
 747  
     throws IllegalArgumentException, MathException {
 748  12
         if ((alpha <= 0) || (alpha > 0.5)) {
 749  0
             throw new IllegalArgumentException("bad significance level: " + alpha);
 750  
         }
 751  12
         return (homoscedasticTTest(sample1, sample2) < alpha);
 752  
     }
 753  
 
 754  
      /**
 755  
      * Returns the <i>observed significance level</i>, or 
 756  
      * <i>p-value</i>, associated with a two-sample, two-tailed t-test 
 757  
      * comparing the means of the datasets described by two StatisticalSummary
 758  
      * instances.
 759  
      * <p>
 760  
      * The number returned is the smallest significance level
 761  
      * at which one can reject the null hypothesis that the two means are
 762  
      * equal in favor of the two-sided alternative that they are different. 
 763  
      * For a one-sided test, divide the returned value by 2.
 764  
      * <p>
 765  
      * The test does not assume that the underlying popuation variances are
 766  
      * equal  and it uses approximated degrees of freedom computed from the 
 767  
      * sample data to compute the p-value.   To perform the test assuming
 768  
      * equal variances, use 
 769  
      * {@link #homoscedasticTTest(StatisticalSummary, StatisticalSummary)}.
 770  
      * <p>
 771  
      * <strong>Usage Note:</strong><br>
 772  
      * The validity of the p-value depends on the assumptions of the parametric
 773  
      * t-test procedure, as discussed 
 774  
      * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
 775  
      * here</a>
 776  
      * <p>
 777  
      * <strong>Preconditions</strong>: <ul>
 778  
      * <li>The datasets described by the two Univariates must each contain
 779  
      * at least 2 observations.
 780  
      * </li></ul>
 781  
      *
 782  
      * @param sampleStats1  StatisticalSummary describing data from the first sample
 783  
      * @param sampleStats2  StatisticalSummary describing data from the second sample
 784  
      * @return p-value for t-test
 785  
      * @throws IllegalArgumentException if the precondition is not met
 786  
      * @throws MathException if an error occurs computing the p-value
 787  
      */
 788  
     public double tTest(StatisticalSummary sampleStats1, StatisticalSummary sampleStats2)
 789  
     throws IllegalArgumentException, MathException {
 790  30
         if ((sampleStats1 == null) || (sampleStats2 == null ||
 791  
                 Math.min(sampleStats1.getN(), sampleStats2.getN()) < 2)) {
 792  12
             throw new IllegalArgumentException("insufficient data for t statistic");
 793  
         }
 794  18
         return tTest(sampleStats1.getMean(), sampleStats2.getMean(), sampleStats1.getVariance(),
 795  
                 sampleStats2.getVariance(), (double) sampleStats1.getN(), 
 796  
                 (double) sampleStats2.getN());
 797  
     }
 798  
     
 799  
     /**
 800  
      * Returns the <i>observed significance level</i>, or 
 801  
      * <i>p-value</i>, associated with a two-sample, two-tailed t-test 
 802  
      * comparing the means of the datasets described by two StatisticalSummary
 803  
      * instances, under the hypothesis of equal subpopulation variances. To
 804  
      * perform a test without the equal variances assumption, use
 805  
      * {@link #tTest(StatisticalSummary, StatisticalSummary)}.
 806  
      * <p>
 807  
      * The number returned is the smallest significance level
 808  
      * at which one can reject the null hypothesis that the two means are
 809  
      * equal in favor of the two-sided alternative that they are different. 
 810  
      * For a one-sided test, divide the returned value by 2.
 811  
      * <p>
 812  
      * See {@link #homoscedasticT(double[], double[])} for the formula used to
 813  
      * compute the t-statistic. The sum of the  sample sizes minus 2 is used as
 814  
      * the degrees of freedom.
 815  
      * <p>
 816  
      * <strong>Usage Note:</strong><br>
 817  
      * The validity of the p-value depends on the assumptions of the parametric
 818  
      * t-test procedure, as discussed 
 819  
      * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">here</a>
 820  
      * <p>
 821  
      * <strong>Preconditions</strong>: <ul>
 822  
      * <li>The datasets described by the two Univariates must each contain
 823  
      * at least 2 observations.
 824  
      * </li></ul>
 825  
      *
 826  
      * @param sampleStats1  StatisticalSummary describing data from the first sample
 827  
      * @param sampleStats2  StatisticalSummary describing data from the second sample
 828  
      * @return p-value for t-test
 829  
      * @throws IllegalArgumentException if the precondition is not met
 830  
      * @throws MathException if an error occurs computing the p-value
 831  
      */
 832  
     public double homoscedasticTTest(StatisticalSummary sampleStats1, 
 833  
             StatisticalSummary sampleStats2)
 834  
     throws IllegalArgumentException, MathException {
 835  6
         if ((sampleStats1 == null) || (sampleStats2 == null ||
 836  
                 Math.min(sampleStats1.getN(), sampleStats2.getN()) < 2)) {
 837  0
             throw new IllegalArgumentException("insufficient data for t statistic");
 838  
         }
 839  6
         return homoscedasticTTest(sampleStats1.getMean(),
 840  
                 sampleStats2.getMean(), sampleStats1.getVariance(),
 841  
                 sampleStats2.getVariance(), (double) sampleStats1.getN(), 
 842  
                 (double) sampleStats2.getN());
 843  
     }
 844  
 
 845  
     /**
 846  
      * Performs a 
 847  
      * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
 848  
      * two-sided t-test</a> evaluating the null hypothesis that 
 849  
      * <code>sampleStats1</code> and <code>sampleStats2</code> describe
 850  
      * datasets drawn from populations with the same mean, with significance
 851  
      * level <code>alpha</code>.   This test does not assume that the
 852  
      * subpopulation variances are equal.  To perform the test under the equal
 853  
      * variances assumption, use
 854  
      * {@link #homoscedasticTTest(StatisticalSummary, StatisticalSummary)}.
 855  
      * <p>
 856  
      * Returns <code>true</code> iff the null hypothesis that the means are
 857  
      * equal can be rejected with confidence <code>1 - alpha</code>.  To 
 858  
      * perform a 1-sided test, use <code>alpha * 2</code>
 859  
      * <p>
 860  
      * See {@link #t(double[], double[])} for the formula used to compute the
 861  
      * t-statistic.  Degrees of freedom are approximated using the
 862  
      * <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">
 863  
      * Welch-Satterthwaite approximation.</a>
 864  
      * <p>
 865  
      * <strong>Examples:</strong><br><ol>
 866  
      * <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at
 867  
      * the 95%, use 
 868  
      * <br><code>tTest(sampleStats1, sampleStats2, 0.05) </code>
 869  
      * </li>
 870  
      * <li>To test the (one-sided) hypothesis <code> mean 1 < mean 2 </code>
 871  
      * at the 99% level,  first verify that the measured mean of  
 872  
      * <code>sample 1</code> is less than  the mean of <code>sample 2</code>
 873  
      * and then use 
 874  
      * <br><code>tTest(sampleStats1, sampleStats2, 0.02) </code>
 875  
      * </li></ol>
 876  
      * <p>
 877  
      * <strong>Usage Note:</strong><br>
 878  
      * The validity of the test depends on the assumptions of the parametric
 879  
      * t-test procedure, as discussed 
 880  
      * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
 881  
      * here</a>
 882  
      * <p>
 883  
      * <strong>Preconditions</strong>: <ul>
 884  
      * <li>The datasets described by the two Univariates must each contain
 885  
      * at least 2 observations.
 886  
      * </li>
 887  
      * <li> <code> 0 < alpha < 0.5 </code>
 888  
      * </li></ul>
 889  
      *
 890  
      * @param sampleStats1 StatisticalSummary describing sample data values
 891  
      * @param sampleStats2 StatisticalSummary describing sample data values
 892  
      * @param alpha significance level of the test
 893  
      * @return true if the null hypothesis can be rejected with 
 894  
      * confidence 1 - alpha
 895  
      * @throws IllegalArgumentException if the preconditions are not met
 896  
      * @throws MathException if an error occurs performing the test
 897  
      */
 898  
     public boolean tTest(StatisticalSummary sampleStats1,
 899  
             StatisticalSummary sampleStats2, double alpha)
 900  
     throws IllegalArgumentException, MathException {
 901  24
         if ((alpha <= 0) || (alpha > 0.5)) {
 902  6
             throw new IllegalArgumentException("bad significance level: " + alpha);
 903  
         }
 904  18
         return (tTest(sampleStats1, sampleStats2) < alpha);
 905  
     }
 906  
     
 907  
     //----------------------------------------------- Protected methods 
 908  
 
 909  
     /**
 910  
      * Gets a DistributionFactory to use in creating TDistribution instances.
 911  
      * @return a distribution factory.
 912  
      */
 913  
     protected DistributionFactory getDistributionFactory() {
 914  132
         if (distributionFactory == null) {
 915  16
             distributionFactory = DistributionFactory.newInstance();
 916  
         }
 917  132
         return distributionFactory;
 918  
     }
 919  
     
 920  
     /**
 921  
      * Computes approximate degrees of freedom for 2-sample t-test.
 922  
      * 
 923  
      * @param v1 first sample variance
 924  
      * @param v2 second sample variance
 925  
      * @param n1 first sample n
 926  
      * @param n2 second sample n
 927  
      * @return approximate degrees of freedom
 928  
      */
 929  
     protected double df(double v1, double v2, double n1, double n2) {
 930  42
         return (((v1 / n1) + (v2 / n2)) * ((v1 / n1) + (v2 / n2))) /
 931  
         ((v1 * v1) / (n1 * n1 * (n1 - 1d)) + (v2 * v2) /
 932  
                 (n2 * n2 * (n2 - 1d)));
 933  
     }
 934  
 
 935  
     /**
 936  
      * Computes t test statistic for 1-sample t-test.
 937  
      * 
 938  
      * @param m sample mean
 939  
      * @param mu constant to test against
 940  
      * @param v sample variance
 941  
      * @param n sample n
 942  
      * @return t test statistic
 943  
      */
 944  
     protected double t(double m, double mu, double v, double n) {
 945  102
         return (m - mu) / Math.sqrt(v / n);
 946  
     }
 947  
     
 948  
     /**
 949  
      * Computes t test statistic for 2-sample t-test.
 950  
      * <p>
 951  
      * Does not assume that subpopulation variances are equal.
 952  
      * 
 953  
      * @param m1 first sample mean
 954  
      * @param m2 second sample mean
 955  
      * @param v1 first sample variance
 956  
      * @param v2 second sample variance
 957  
      * @param n1 first sample n
 958  
      * @param n2 second sample n
 959  
      * @return t test statistic
 960  
      */
 961  
     protected double t(double m1, double m2,  double v1, double v2, double n1,
 962  
             double n2)  {
 963  60
             return (m1 - m2) / Math.sqrt((v1 / n1) + (v2 / n2));
 964  
     }
 965  
     
 966  
     /**
 967  
      * Computes t test statistic for 2-sample t-test under the hypothesis
 968  
      * of equal subpopulation variances.
 969  
      * 
 970  
      * @param m1 first sample mean
 971  
      * @param m2 second sample mean
 972  
      * @param v1 first sample variance
 973  
      * @param v2 second sample variance
 974  
      * @param n1 first sample n
 975  
      * @param n2 second sample n
 976  
      * @return t test statistic
 977  
      */
 978  
     protected double homoscedasticT(double m1, double m2,  double v1,
 979  
             double v2, double n1, double n2)  {
 980  24
             double pooledVariance = ((n1  - 1) * v1 + (n2 -1) * v2 ) / (n1 + n2 - 2); 
 981  24
             return (m1 - m2) / Math.sqrt(pooledVariance * (1d / n1 + 1d / n2));
 982  
     }
 983  
     
 984  
     /**
 985  
      * Computes p-value for 2-sided, 1-sample t-test.
 986  
      * 
 987  
      * @param m sample mean
 988  
      * @param mu constant to test against
 989  
      * @param v sample variance
 990  
      * @param n sample n
 991  
      * @return p-value
 992  
      * @throws MathException if an error occurs computing the p-value
 993  
      */
 994  
     protected double tTest(double m, double mu, double v, double n)
 995  
     throws MathException {
 996  72
         double t = Math.abs(t(m, mu, v, n));
 997  72
         TDistribution tDistribution = 
 998  
             getDistributionFactory().createTDistribution(n - 1);
 999  72
         return 1.0 - tDistribution.cumulativeProbability(-t, t);
 1000  
     }
 1001  
 
 1002  
     /**
 1003  
      * Computes p-value for 2-sided, 2-sample t-test.
 1004  
      * <p>
 1005  
      * Does not assume subpopulation variances are equal. Degrees of freedom
 1006  
      * are estimated from the data.
 1007  
      * 
 1008  
      * @param m1 first sample mean
 1009  
      * @param m2 second sample mean
 1010  
      * @param v1 first sample variance
 1011  
      * @param v2 second sample variance
 1012  
      * @param n1 first sample n
 1013  
      * @param n2 second sample n
 1014  
      * @return p-value
 1015  
      * @throws MathException if an error occurs computing the p-value
 1016  
      */
 1017  
     protected double tTest(double m1, double m2, double v1, double v2, 
 1018  
             double n1, double n2)
 1019  
     throws MathException {
 1020  42
         double t = Math.abs(t(m1, m2, v1, v2, n1, n2));
 1021  42
         double degreesOfFreedom = 0;
 1022  42
         degreesOfFreedom= df(v1, v2, n1, n2);
 1023  42
         TDistribution tDistribution =
 1024  
             getDistributionFactory().createTDistribution(degreesOfFreedom);
 1025  42
         return 1.0 - tDistribution.cumulativeProbability(-t, t);
 1026  
     }
 1027  
     
 1028  
     /**
 1029  
      * Computes p-value for 2-sided, 2-sample t-test, under the assumption
 1030  
      * of equal subpopulation variances.
 1031  
      * <p>
 1032  
      * The sum of the sample sizes minus 2 is used as degrees of freedom.
 1033  
      * 
 1034  
      * @param m1 first sample mean
 1035  
      * @param m2 second sample mean
 1036  
      * @param v1 first sample variance
 1037  
      * @param v2 second sample variance
 1038  
      * @param n1 first sample n
 1039  
      * @param n2 second sample n
 1040  
      * @return p-value
 1041  
      * @throws MathException if an error occurs computing the p-value
 1042  
      */
 1043  
     protected double homoscedasticTTest(double m1, double m2, double v1,
 1044  
             double v2, double n1, double n2)
 1045  
     throws MathException {
 1046  18
         double t = Math.abs(homoscedasticT(m1, m2, v1, v2, n1, n2));
 1047  18
         double degreesOfFreedom = 0;
 1048  18
             degreesOfFreedom = (double) (n1 + n2 - 2);
 1049  18
         TDistribution tDistribution =
 1050  
             getDistributionFactory().createTDistribution(degreesOfFreedom);
 1051  18
         return 1.0 - tDistribution.cumulativeProbability(-t, t);
 1052  
     }   
 1053  
 }