Classes in this File | Line Coverage | Branch Coverage | Complexity | ||||||||
ChiSquareTestImpl |
|
| 3.6923076923076925;3.692 |
1 | /* |
|
2 | * Copyright 2004 The Apache Software Foundation. |
|
3 | * |
|
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
|
5 | * you may not use this file except in compliance with the License. |
|
6 | * You may obtain a copy of the License at |
|
7 | * |
|
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
|
9 | * |
|
10 | * Unless required by applicable law or agreed to in writing, software |
|
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
|
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
13 | * See the License for the specific language governing permissions and |
|
14 | * limitations under the License. |
|
15 | */ |
|
16 | package org.apache.commons.math.stat.inference; |
|
17 | ||
18 | import org.apache.commons.math.MathException; |
|
19 | import org.apache.commons.math.distribution.DistributionFactory; |
|
20 | import org.apache.commons.math.distribution.ChiSquaredDistribution; |
|
21 | ||
22 | /** |
|
23 | * Implements Chi-Square test statistics defined in the {@link ChiSquareTest} interface. |
|
24 | * |
|
25 | * @version $Revision$ $Date: 2005-02-26 05:11:52 -0800 (Sat, 26 Feb 2005) $ |
|
26 | */ |
|
27 | public class ChiSquareTestImpl implements ChiSquareTest { |
|
28 | ||
29 | /** Cached DistributionFactory used to create ChiSquaredDistribution instances */ |
|
30 | 112 | private DistributionFactory distributionFactory = null; |
31 | ||
32 | /** |
|
33 | * Construct a ChiSquareTestImpl |
|
34 | */ |
|
35 | public ChiSquareTestImpl() { |
|
36 | 112 | super(); |
37 | 112 | } |
38 | ||
39 | /** |
|
40 | * @param observed array of observed frequency counts |
|
41 | * @param expected array of expected frequency counts |
|
42 | * @return chi-square test statistic |
|
43 | * @throws IllegalArgumentException if preconditions are not met |
|
44 | * or length is less than 2 |
|
45 | */ |
|
46 | public double chiSquare(double[] expected, long[] observed) |
|
47 | throws IllegalArgumentException { |
|
48 | 130 | double sumSq = 0.0d; |
49 | 130 | double dev = 0.0d; |
50 | 130 | if ((expected.length < 2) || (expected.length != observed.length)) { |
51 | 12 | throw new IllegalArgumentException( |
52 | "observed, expected array lengths incorrect"); |
|
53 | } |
|
54 | 118 | if (!isPositive(expected) || !isNonNegative(observed)) { |
55 | 12 | throw new IllegalArgumentException( |
56 | "observed counts must be non-negative and expected counts must be postive"); |
|
57 | } |
|
58 | 766 | for (int i = 0; i < observed.length; i++) { |
59 | 660 | dev = ((double) observed[i] - expected[i]); |
60 | 660 | sumSq += dev * dev / expected[i]; |
61 | } |
|
62 | 106 | return sumSq; |
63 | } |
|
64 | ||
65 | /** |
|
66 | * @param observed array of observed frequency counts |
|
67 | * @param expected array of exptected frequency counts |
|
68 | * @return p-value |
|
69 | * @throws IllegalArgumentException if preconditions are not met |
|
70 | * @throws MathException if an error occurs computing the p-value |
|
71 | */ |
|
72 | public double chiSquareTest(double[] expected, long[] observed) |
|
73 | throws IllegalArgumentException, MathException { |
|
74 | 42 | ChiSquaredDistribution chiSquaredDistribution = |
75 | getDistributionFactory().createChiSquareDistribution( |
|
76 | (double) expected.length - 1); |
|
77 | 42 | return 1 - chiSquaredDistribution.cumulativeProbability( |
78 | chiSquare(expected, observed)); |
|
79 | } |
|
80 | ||
81 | /** |
|
82 | * @param observed array of observed frequency counts |
|
83 | * @param expected array of exptected frequency counts |
|
84 | * @param alpha significance level of the test |
|
85 | * @return true iff null hypothesis can be rejected with confidence |
|
86 | * 1 - alpha |
|
87 | * @throws IllegalArgumentException if preconditions are not met |
|
88 | * @throws MathException if an error occurs performing the test |
|
89 | */ |
|
90 | public boolean chiSquareTest(double[] expected, long[] observed, |
|
91 | double alpha) throws IllegalArgumentException, MathException { |
|
92 | 30 | if ((alpha <= 0) || (alpha > 0.5)) { |
93 | 6 | throw new IllegalArgumentException( |
94 | "bad significance level: " + alpha); |
|
95 | } |
|
96 | 24 | return (chiSquareTest(expected, observed) < alpha); |
97 | } |
|
98 | ||
99 | /** |
|
100 | * @param counts array representation of 2-way table |
|
101 | * @return chi-square test statistic |
|
102 | * @throws IllegalArgumentException if preconditions are not met |
|
103 | */ |
|
104 | public double chiSquare(long[][] counts) throws IllegalArgumentException { |
|
105 | ||
106 | 78 | checkArray(counts); |
107 | 54 | int nRows = counts.length; |
108 | 54 | int nCols = counts[0].length; |
109 | ||
110 | // compute row, column and total sums |
|
111 | 54 | double[] rowSum = new double[nRows]; |
112 | 54 | double[] colSum = new double[nCols]; |
113 | 54 | double total = 0.0d; |
114 | 216 | for (int row = 0; row < nRows; row++) { |
115 | 594 | for (int col = 0; col < nCols; col++) { |
116 | 432 | rowSum[row] += (double) counts[row][col]; |
117 | 432 | colSum[col] += (double) counts[row][col]; |
118 | 432 | total += (double) counts[row][col]; |
119 | } |
|
120 | } |
|
121 | ||
122 | // compute expected counts and chi-square |
|
123 | 54 | double sumSq = 0.0d; |
124 | 54 | double expected = 0.0d; |
125 | 216 | for (int row = 0; row < nRows; row++) { |
126 | 594 | for (int col = 0; col < nCols; col++) { |
127 | 432 | expected = (rowSum[row] * colSum[col]) / total; |
128 | 432 | sumSq += (((double) counts[row][col] - expected) * |
129 | ((double) counts[row][col] - expected)) / expected; |
|
130 | } |
|
131 | } |
|
132 | 54 | return sumSq; |
133 | } |
|
134 | ||
135 | /** |
|
136 | * @param counts array representation of 2-way table |
|
137 | * @return p-value |
|
138 | * @throws IllegalArgumentException if preconditions are not met |
|
139 | * @throws MathException if an error occurs computing the p-value |
|
140 | */ |
|
141 | public double chiSquareTest(long[][] counts) |
|
142 | throws IllegalArgumentException, MathException { |
|
143 | 36 | checkArray(counts); |
144 | 36 | double df = ((double) counts.length -1) * ((double) counts[0].length - 1); |
145 | 36 | ChiSquaredDistribution chiSquaredDistribution = |
146 | getDistributionFactory().createChiSquareDistribution(df); |
|
147 | 36 | return 1 - chiSquaredDistribution.cumulativeProbability(chiSquare(counts)); |
148 | } |
|
149 | ||
150 | /** |
|
151 | * @param counts array representation of 2-way table |
|
152 | * @param alpha significance level of the test |
|
153 | * @return true iff null hypothesis can be rejected with confidence |
|
154 | * 1 - alpha |
|
155 | * @throws IllegalArgumentException if preconditions are not met |
|
156 | * @throws MathException if an error occurs performing the test |
|
157 | */ |
|
158 | public boolean chiSquareTest(long[][] counts, double alpha) |
|
159 | throws IllegalArgumentException, MathException { |
|
160 | 24 | if ((alpha <= 0) || (alpha > 0.5)) { |
161 | 6 | throw new IllegalArgumentException("bad significance level: " + alpha); |
162 | } |
|
163 | 18 | return (chiSquareTest(counts) < alpha); |
164 | } |
|
165 | ||
166 | /** |
|
167 | * Checks to make sure that the input long[][] array is rectangular, |
|
168 | * has at least 2 rows and 2 columns, and has all non-negative entries, |
|
169 | * throwing IllegalArgumentException if any of these checks fail. |
|
170 | * |
|
171 | * @param in input 2-way table to check |
|
172 | * @throws IllegalArgumentException if the array is not valid |
|
173 | */ |
|
174 | private void checkArray(long[][] in) throws IllegalArgumentException { |
|
175 | ||
176 | 114 | if (in.length < 2) { |
177 | 6 | throw new IllegalArgumentException("Input table must have at least two rows"); |
178 | } |
|
179 | ||
180 | 108 | if (in[0].length < 2) { |
181 | 6 | throw new IllegalArgumentException("Input table must have at least two columns"); |
182 | } |
|
183 | ||
184 | 102 | if (!isRectangular(in)) { |
185 | 6 | throw new IllegalArgumentException("Input table must be rectangular"); |
186 | } |
|
187 | ||
188 | 96 | if (!isNonNegative(in)) { |
189 | 6 | throw new IllegalArgumentException("All entries in input 2-way table must be non-negative"); |
190 | } |
|
191 | ||
192 | 90 | } |
193 | ||
194 | //--------------------- Protected methods --------------------------------- |
|
195 | /** |
|
196 | * Gets a DistributionFactory to use in creating ChiSquaredDistribution instances. |
|
197 | * |
|
198 | * @return a DistributionFactory |
|
199 | */ |
|
200 | protected DistributionFactory getDistributionFactory() { |
|
201 | 78 | if (distributionFactory == null) { |
202 | 16 | distributionFactory = DistributionFactory.newInstance(); |
203 | } |
|
204 | 78 | return distributionFactory; |
205 | } |
|
206 | ||
207 | //--------------------- Private array methods -- should find a utility home for these |
|
208 | ||
209 | /** |
|
210 | * Returns true iff input array is rectangular. |
|
211 | * |
|
212 | * @param in array to be tested |
|
213 | * @return true if the array is rectangular |
|
214 | * @throws NullPointerException if input array is null |
|
215 | * @throws ArrayIndexOutOfBoundsException if input array is empty |
|
216 | */ |
|
217 | private boolean isRectangular(long[][] in) { |
|
218 | 300 | for (int i = 1; i < in.length; i++) { |
219 | 204 | if (in[i].length != in[0].length) { |
220 | 6 | return false; |
221 | } |
|
222 | } |
|
223 | 96 | return true; |
224 | } |
|
225 | ||
226 | /** |
|
227 | * Returns true iff all entries of the input array are > 0. |
|
228 | * Returns true if the array is non-null, but empty |
|
229 | * |
|
230 | * @param in array to be tested |
|
231 | * @return true if all entries of the array are positive |
|
232 | * @throws NullPointerException if input array is null |
|
233 | */ |
|
234 | private boolean isPositive(double[] in) { |
|
235 | 796 | for (int i = 0; i < in.length; i ++) { |
236 | 684 | if (in[i] <= 0) { |
237 | 6 | return false; |
238 | } |
|
239 | } |
|
240 | 112 | return true; |
241 | } |
|
242 | ||
243 | /** |
|
244 | * Returns true iff all entries of the input array are >= 0. |
|
245 | * Returns true if the array is non-null, but empty |
|
246 | * |
|
247 | * @param in array to be tested |
|
248 | * @return true if all entries of the array are non-negative |
|
249 | * @throws NullPointerException if input array is null |
|
250 | */ |
|
251 | private boolean isNonNegative(long[] in) { |
|
252 | 772 | for (int i = 0; i < in.length; i ++) { |
253 | 666 | if (in[i] < 0) { |
254 | 6 | return false; |
255 | } |
|
256 | } |
|
257 | 106 | return true; |
258 | } |
|
259 | ||
260 | /** |
|
261 | * Returns true iff all entries of (all subarrays of) the input array are >= 0. |
|
262 | * Returns true if the array is non-null, but empty |
|
263 | * |
|
264 | * @param in array to be tested |
|
265 | * @return true if all entries of the array are non-negative |
|
266 | * @throws NullPointerException if input array is null |
|
267 | */ |
|
268 | private boolean isNonNegative(long[][] in) { |
|
269 | 366 | for (int i = 0; i < in.length; i ++) { |
270 | 1002 | for (int j = 0; j < in[i].length; j++) { |
271 | 732 | if (in[i][j] < 0) { |
272 | 6 | return false; |
273 | } |
|
274 | } |
|
275 | } |
|
276 | 90 | return true; |
277 | } |
|
278 | ||
279 | } |