| Classes in this File | Line Coverage | Branch Coverage | Complexity | ||||||||
| ChiSquareTestImpl |
|
| 3.6923076923076925;3.692 |
| 1 | /* |
|
| 2 | * Copyright 2004 The Apache Software Foundation. |
|
| 3 | * |
|
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
|
| 5 | * you may not use this file except in compliance with the License. |
|
| 6 | * You may obtain a copy of the License at |
|
| 7 | * |
|
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
|
| 9 | * |
|
| 10 | * Unless required by applicable law or agreed to in writing, software |
|
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
|
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
| 13 | * See the License for the specific language governing permissions and |
|
| 14 | * limitations under the License. |
|
| 15 | */ |
|
| 16 | package org.apache.commons.math.stat.inference; |
|
| 17 | ||
| 18 | import org.apache.commons.math.MathException; |
|
| 19 | import org.apache.commons.math.distribution.DistributionFactory; |
|
| 20 | import org.apache.commons.math.distribution.ChiSquaredDistribution; |
|
| 21 | ||
| 22 | /** |
|
| 23 | * Implements Chi-Square test statistics defined in the {@link ChiSquareTest} interface. |
|
| 24 | * |
|
| 25 | * @version $Revision$ $Date: 2005-02-26 05:11:52 -0800 (Sat, 26 Feb 2005) $ |
|
| 26 | */ |
|
| 27 | public class ChiSquareTestImpl implements ChiSquareTest { |
|
| 28 | ||
| 29 | /** Cached DistributionFactory used to create ChiSquaredDistribution instances */ |
|
| 30 | 112 | private DistributionFactory distributionFactory = null; |
| 31 | ||
| 32 | /** |
|
| 33 | * Construct a ChiSquareTestImpl |
|
| 34 | */ |
|
| 35 | public ChiSquareTestImpl() { |
|
| 36 | 112 | super(); |
| 37 | 112 | } |
| 38 | ||
| 39 | /** |
|
| 40 | * @param observed array of observed frequency counts |
|
| 41 | * @param expected array of expected frequency counts |
|
| 42 | * @return chi-square test statistic |
|
| 43 | * @throws IllegalArgumentException if preconditions are not met |
|
| 44 | * or length is less than 2 |
|
| 45 | */ |
|
| 46 | public double chiSquare(double[] expected, long[] observed) |
|
| 47 | throws IllegalArgumentException { |
|
| 48 | 130 | double sumSq = 0.0d; |
| 49 | 130 | double dev = 0.0d; |
| 50 | 130 | if ((expected.length < 2) || (expected.length != observed.length)) { |
| 51 | 12 | throw new IllegalArgumentException( |
| 52 | "observed, expected array lengths incorrect"); |
|
| 53 | } |
|
| 54 | 118 | if (!isPositive(expected) || !isNonNegative(observed)) { |
| 55 | 12 | throw new IllegalArgumentException( |
| 56 | "observed counts must be non-negative and expected counts must be postive"); |
|
| 57 | } |
|
| 58 | 766 | for (int i = 0; i < observed.length; i++) { |
| 59 | 660 | dev = ((double) observed[i] - expected[i]); |
| 60 | 660 | sumSq += dev * dev / expected[i]; |
| 61 | } |
|
| 62 | 106 | return sumSq; |
| 63 | } |
|
| 64 | ||
| 65 | /** |
|
| 66 | * @param observed array of observed frequency counts |
|
| 67 | * @param expected array of exptected frequency counts |
|
| 68 | * @return p-value |
|
| 69 | * @throws IllegalArgumentException if preconditions are not met |
|
| 70 | * @throws MathException if an error occurs computing the p-value |
|
| 71 | */ |
|
| 72 | public double chiSquareTest(double[] expected, long[] observed) |
|
| 73 | throws IllegalArgumentException, MathException { |
|
| 74 | 42 | ChiSquaredDistribution chiSquaredDistribution = |
| 75 | getDistributionFactory().createChiSquareDistribution( |
|
| 76 | (double) expected.length - 1); |
|
| 77 | 42 | return 1 - chiSquaredDistribution.cumulativeProbability( |
| 78 | chiSquare(expected, observed)); |
|
| 79 | } |
|
| 80 | ||
| 81 | /** |
|
| 82 | * @param observed array of observed frequency counts |
|
| 83 | * @param expected array of exptected frequency counts |
|
| 84 | * @param alpha significance level of the test |
|
| 85 | * @return true iff null hypothesis can be rejected with confidence |
|
| 86 | * 1 - alpha |
|
| 87 | * @throws IllegalArgumentException if preconditions are not met |
|
| 88 | * @throws MathException if an error occurs performing the test |
|
| 89 | */ |
|
| 90 | public boolean chiSquareTest(double[] expected, long[] observed, |
|
| 91 | double alpha) throws IllegalArgumentException, MathException { |
|
| 92 | 30 | if ((alpha <= 0) || (alpha > 0.5)) { |
| 93 | 6 | throw new IllegalArgumentException( |
| 94 | "bad significance level: " + alpha); |
|
| 95 | } |
|
| 96 | 24 | return (chiSquareTest(expected, observed) < alpha); |
| 97 | } |
|
| 98 | ||
| 99 | /** |
|
| 100 | * @param counts array representation of 2-way table |
|
| 101 | * @return chi-square test statistic |
|
| 102 | * @throws IllegalArgumentException if preconditions are not met |
|
| 103 | */ |
|
| 104 | public double chiSquare(long[][] counts) throws IllegalArgumentException { |
|
| 105 | ||
| 106 | 78 | checkArray(counts); |
| 107 | 54 | int nRows = counts.length; |
| 108 | 54 | int nCols = counts[0].length; |
| 109 | ||
| 110 | // compute row, column and total sums |
|
| 111 | 54 | double[] rowSum = new double[nRows]; |
| 112 | 54 | double[] colSum = new double[nCols]; |
| 113 | 54 | double total = 0.0d; |
| 114 | 216 | for (int row = 0; row < nRows; row++) { |
| 115 | 594 | for (int col = 0; col < nCols; col++) { |
| 116 | 432 | rowSum[row] += (double) counts[row][col]; |
| 117 | 432 | colSum[col] += (double) counts[row][col]; |
| 118 | 432 | total += (double) counts[row][col]; |
| 119 | } |
|
| 120 | } |
|
| 121 | ||
| 122 | // compute expected counts and chi-square |
|
| 123 | 54 | double sumSq = 0.0d; |
| 124 | 54 | double expected = 0.0d; |
| 125 | 216 | for (int row = 0; row < nRows; row++) { |
| 126 | 594 | for (int col = 0; col < nCols; col++) { |
| 127 | 432 | expected = (rowSum[row] * colSum[col]) / total; |
| 128 | 432 | sumSq += (((double) counts[row][col] - expected) * |
| 129 | ((double) counts[row][col] - expected)) / expected; |
|
| 130 | } |
|
| 131 | } |
|
| 132 | 54 | return sumSq; |
| 133 | } |
|
| 134 | ||
| 135 | /** |
|
| 136 | * @param counts array representation of 2-way table |
|
| 137 | * @return p-value |
|
| 138 | * @throws IllegalArgumentException if preconditions are not met |
|
| 139 | * @throws MathException if an error occurs computing the p-value |
|
| 140 | */ |
|
| 141 | public double chiSquareTest(long[][] counts) |
|
| 142 | throws IllegalArgumentException, MathException { |
|
| 143 | 36 | checkArray(counts); |
| 144 | 36 | double df = ((double) counts.length -1) * ((double) counts[0].length - 1); |
| 145 | 36 | ChiSquaredDistribution chiSquaredDistribution = |
| 146 | getDistributionFactory().createChiSquareDistribution(df); |
|
| 147 | 36 | return 1 - chiSquaredDistribution.cumulativeProbability(chiSquare(counts)); |
| 148 | } |
|
| 149 | ||
| 150 | /** |
|
| 151 | * @param counts array representation of 2-way table |
|
| 152 | * @param alpha significance level of the test |
|
| 153 | * @return true iff null hypothesis can be rejected with confidence |
|
| 154 | * 1 - alpha |
|
| 155 | * @throws IllegalArgumentException if preconditions are not met |
|
| 156 | * @throws MathException if an error occurs performing the test |
|
| 157 | */ |
|
| 158 | public boolean chiSquareTest(long[][] counts, double alpha) |
|
| 159 | throws IllegalArgumentException, MathException { |
|
| 160 | 24 | if ((alpha <= 0) || (alpha > 0.5)) { |
| 161 | 6 | throw new IllegalArgumentException("bad significance level: " + alpha); |
| 162 | } |
|
| 163 | 18 | return (chiSquareTest(counts) < alpha); |
| 164 | } |
|
| 165 | ||
| 166 | /** |
|
| 167 | * Checks to make sure that the input long[][] array is rectangular, |
|
| 168 | * has at least 2 rows and 2 columns, and has all non-negative entries, |
|
| 169 | * throwing IllegalArgumentException if any of these checks fail. |
|
| 170 | * |
|
| 171 | * @param in input 2-way table to check |
|
| 172 | * @throws IllegalArgumentException if the array is not valid |
|
| 173 | */ |
|
| 174 | private void checkArray(long[][] in) throws IllegalArgumentException { |
|
| 175 | ||
| 176 | 114 | if (in.length < 2) { |
| 177 | 6 | throw new IllegalArgumentException("Input table must have at least two rows"); |
| 178 | } |
|
| 179 | ||
| 180 | 108 | if (in[0].length < 2) { |
| 181 | 6 | throw new IllegalArgumentException("Input table must have at least two columns"); |
| 182 | } |
|
| 183 | ||
| 184 | 102 | if (!isRectangular(in)) { |
| 185 | 6 | throw new IllegalArgumentException("Input table must be rectangular"); |
| 186 | } |
|
| 187 | ||
| 188 | 96 | if (!isNonNegative(in)) { |
| 189 | 6 | throw new IllegalArgumentException("All entries in input 2-way table must be non-negative"); |
| 190 | } |
|
| 191 | ||
| 192 | 90 | } |
| 193 | ||
| 194 | //--------------------- Protected methods --------------------------------- |
|
| 195 | /** |
|
| 196 | * Gets a DistributionFactory to use in creating ChiSquaredDistribution instances. |
|
| 197 | * |
|
| 198 | * @return a DistributionFactory |
|
| 199 | */ |
|
| 200 | protected DistributionFactory getDistributionFactory() { |
|
| 201 | 78 | if (distributionFactory == null) { |
| 202 | 16 | distributionFactory = DistributionFactory.newInstance(); |
| 203 | } |
|
| 204 | 78 | return distributionFactory; |
| 205 | } |
|
| 206 | ||
| 207 | //--------------------- Private array methods -- should find a utility home for these |
|
| 208 | ||
| 209 | /** |
|
| 210 | * Returns true iff input array is rectangular. |
|
| 211 | * |
|
| 212 | * @param in array to be tested |
|
| 213 | * @return true if the array is rectangular |
|
| 214 | * @throws NullPointerException if input array is null |
|
| 215 | * @throws ArrayIndexOutOfBoundsException if input array is empty |
|
| 216 | */ |
|
| 217 | private boolean isRectangular(long[][] in) { |
|
| 218 | 300 | for (int i = 1; i < in.length; i++) { |
| 219 | 204 | if (in[i].length != in[0].length) { |
| 220 | 6 | return false; |
| 221 | } |
|
| 222 | } |
|
| 223 | 96 | return true; |
| 224 | } |
|
| 225 | ||
| 226 | /** |
|
| 227 | * Returns true iff all entries of the input array are > 0. |
|
| 228 | * Returns true if the array is non-null, but empty |
|
| 229 | * |
|
| 230 | * @param in array to be tested |
|
| 231 | * @return true if all entries of the array are positive |
|
| 232 | * @throws NullPointerException if input array is null |
|
| 233 | */ |
|
| 234 | private boolean isPositive(double[] in) { |
|
| 235 | 796 | for (int i = 0; i < in.length; i ++) { |
| 236 | 684 | if (in[i] <= 0) { |
| 237 | 6 | return false; |
| 238 | } |
|
| 239 | } |
|
| 240 | 112 | return true; |
| 241 | } |
|
| 242 | ||
| 243 | /** |
|
| 244 | * Returns true iff all entries of the input array are >= 0. |
|
| 245 | * Returns true if the array is non-null, but empty |
|
| 246 | * |
|
| 247 | * @param in array to be tested |
|
| 248 | * @return true if all entries of the array are non-negative |
|
| 249 | * @throws NullPointerException if input array is null |
|
| 250 | */ |
|
| 251 | private boolean isNonNegative(long[] in) { |
|
| 252 | 772 | for (int i = 0; i < in.length; i ++) { |
| 253 | 666 | if (in[i] < 0) { |
| 254 | 6 | return false; |
| 255 | } |
|
| 256 | } |
|
| 257 | 106 | return true; |
| 258 | } |
|
| 259 | ||
| 260 | /** |
|
| 261 | * Returns true iff all entries of (all subarrays of) the input array are >= 0. |
|
| 262 | * Returns true if the array is non-null, but empty |
|
| 263 | * |
|
| 264 | * @param in array to be tested |
|
| 265 | * @return true if all entries of the array are non-negative |
|
| 266 | * @throws NullPointerException if input array is null |
|
| 267 | */ |
|
| 268 | private boolean isNonNegative(long[][] in) { |
|
| 269 | 366 | for (int i = 0; i < in.length; i ++) { |
| 270 | 1002 | for (int j = 0; j < in[i].length; j++) { |
| 271 | 732 | if (in[i][j] < 0) { |
| 272 | 6 | return false; |
| 273 | } |
|
| 274 | } |
|
| 275 | } |
|
| 276 | 90 | return true; |
| 277 | } |
|
| 278 | ||
| 279 | } |