Classes in this File | Line Coverage | Branch Coverage | Complexity | ||||||||
HypergeometricDistributionImpl |
|
| 1.9411764705882353;1.941 |
1 | /* |
|
2 | * Copyright 2003-2004 The Apache Software Foundation. |
|
3 | * |
|
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
|
5 | * you may not use this file except in compliance with the License. |
|
6 | * You may obtain a copy of the License at |
|
7 | * |
|
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
|
9 | * |
|
10 | * Unless required by applicable law or agreed to in writing, software |
|
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
|
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
13 | * See the License for the specific language governing permissions and |
|
14 | * limitations under the License. |
|
15 | */ |
|
16 | ||
17 | package org.apache.commons.math.distribution; |
|
18 | ||
19 | import java.io.Serializable; |
|
20 | ||
21 | import org.apache.commons.math.util.MathUtils; |
|
22 | ||
23 | /** |
|
24 | * The default implementation of {@link HypergeometricDistribution}. |
|
25 | * |
|
26 | * @version $Revision$ $Date: 2005-08-26 07:05:45 -0700 (Fri, 26 Aug 2005) $ |
|
27 | */ |
|
28 | public class HypergeometricDistributionImpl extends AbstractIntegerDistribution |
|
29 | implements HypergeometricDistribution, Serializable |
|
30 | { |
|
31 | ||
32 | /** Serializable version identifier */ |
|
33 | static final long serialVersionUID = -436928820673516179L; |
|
34 | ||
35 | /** The number of successes in the population. */ |
|
36 | private int numberOfSuccesses; |
|
37 | ||
38 | /** The population size. */ |
|
39 | private int populationSize; |
|
40 | ||
41 | /** The sample size. */ |
|
42 | private int sampleSize; |
|
43 | ||
44 | /** |
|
45 | * Construct a new hypergeometric distribution with the given the population |
|
46 | * size, the number of successes in the population, and the sample size. |
|
47 | * @param populationSize the population size. |
|
48 | * @param numberOfSuccesses number of successes in the population. |
|
49 | * @param sampleSize the sample size. |
|
50 | */ |
|
51 | public HypergeometricDistributionImpl(int populationSize, |
|
52 | int numberOfSuccesses, int sampleSize) { |
|
53 | 46 | super(); |
54 | 46 | if (numberOfSuccesses > populationSize) { |
55 | 4 | throw new IllegalArgumentException( |
56 | "number of successes must be less than or equal to " + |
|
57 | "population size"); |
|
58 | } |
|
59 | 42 | if (sampleSize > populationSize) { |
60 | 2 | throw new IllegalArgumentException( |
61 | "sample size must be less than or equal to population size"); |
|
62 | } |
|
63 | 40 | setPopulationSize(populationSize); |
64 | 40 | setSampleSize(sampleSize); |
65 | 38 | setNumberOfSuccesses(numberOfSuccesses); |
66 | 36 | } |
67 | ||
68 | /** |
|
69 | * For this disbution, X, this method returns P(X ≤ x). |
|
70 | * @param x the value at which the PDF is evaluated. |
|
71 | * @return PDF for this distribution. |
|
72 | */ |
|
73 | public double cumulativeProbability(int x) { |
|
74 | double ret; |
|
75 | ||
76 | 272 | int n = getPopulationSize(); |
77 | 272 | int m = getNumberOfSuccesses(); |
78 | 272 | int k = getSampleSize(); |
79 | ||
80 | 272 | int[] domain = getDomain(n, m, k); |
81 | 272 | if (x < domain[0]) { |
82 | 32 | ret = 0.0; |
83 | 240 | } else if(x >= domain[1]) { |
84 | 40 | ret = 1.0; |
85 | } else { |
|
86 | 200 | ret = innerCumulativeProbability(domain[0], x, 1, n, m, k); |
87 | } |
|
88 | ||
89 | 272 | return ret; |
90 | } |
|
91 | ||
92 | /** |
|
93 | * Return the domain for the given hypergeometric distribution parameters. |
|
94 | * @param n the population size. |
|
95 | * @param m number of successes in the population. |
|
96 | * @param k the sample size. |
|
97 | * @return a two element array containing the lower and upper bounds of the |
|
98 | * hypergeometric distribution. |
|
99 | */ |
|
100 | private int[] getDomain(int n, int m, int k){ |
|
101 | 462 | return new int[]{ |
102 | getLowerDomain(n, m, k), |
|
103 | getUpperDomain(m, k) |
|
104 | }; |
|
105 | } |
|
106 | ||
107 | /** |
|
108 | * Access the domain value lower bound, based on <code>p</code>, used to |
|
109 | * bracket a PDF root. |
|
110 | * |
|
111 | * @param p the desired probability for the critical value |
|
112 | * @return domain value lower bound, i.e. |
|
113 | * P(X < <i>lower bound</i>) < <code>p</code> |
|
114 | */ |
|
115 | protected int getDomainLowerBound(double p) { |
|
116 | 36 | return getLowerDomain(getPopulationSize(), getNumberOfSuccesses(), |
117 | getSampleSize()); |
|
118 | } |
|
119 | ||
120 | /** |
|
121 | * Access the domain value upper bound, based on <code>p</code>, used to |
|
122 | * bracket a PDF root. |
|
123 | * |
|
124 | * @param p the desired probability for the critical value |
|
125 | * @return domain value upper bound, i.e. |
|
126 | * P(X < <i>upper bound</i>) > <code>p</code> |
|
127 | */ |
|
128 | protected int getDomainUpperBound(double p) { |
|
129 | 36 | return getUpperDomain(getSampleSize(), getNumberOfSuccesses()); |
130 | } |
|
131 | ||
132 | /** |
|
133 | * Return the lowest domain value for the given hypergeometric distribution |
|
134 | * parameters. |
|
135 | * @param n the population size. |
|
136 | * @param m number of successes in the population. |
|
137 | * @param k the sample size. |
|
138 | * @return the lowest domain value of the hypergeometric distribution. |
|
139 | */ |
|
140 | private int getLowerDomain(int n, int m, int k) { |
|
141 | 498 | return Math.max(0, m - (n - k)); |
142 | } |
|
143 | ||
144 | /** |
|
145 | * Access the number of successes. |
|
146 | * @return the number of successes. |
|
147 | */ |
|
148 | public int getNumberOfSuccesses() { |
|
149 | 534 | return numberOfSuccesses; |
150 | } |
|
151 | ||
152 | /** |
|
153 | * Access the population size. |
|
154 | * @return the population size. |
|
155 | */ |
|
156 | public int getPopulationSize() { |
|
157 | 500 | return populationSize; |
158 | } |
|
159 | ||
160 | /** |
|
161 | * Access the sample size. |
|
162 | * @return the sample size. |
|
163 | */ |
|
164 | public int getSampleSize() { |
|
165 | 534 | return sampleSize; |
166 | } |
|
167 | ||
168 | /** |
|
169 | * Return the highest domain value for the given hypergeometric distribution |
|
170 | * parameters. |
|
171 | * @param m number of successes in the population. |
|
172 | * @param k the sample size. |
|
173 | * @return the highest domain value of the hypergeometric distribution. |
|
174 | */ |
|
175 | private int getUpperDomain(int m, int k){ |
|
176 | 498 | return Math.min(k, m); |
177 | } |
|
178 | ||
179 | /** |
|
180 | * For this disbution, X, this method returns P(X = x). |
|
181 | * |
|
182 | * @param x the value at which the PMF is evaluated. |
|
183 | * @return PMF for this distribution. |
|
184 | */ |
|
185 | public double probability(int x) { |
|
186 | double ret; |
|
187 | ||
188 | 118 | int n = getPopulationSize(); |
189 | 118 | int m = getNumberOfSuccesses(); |
190 | 118 | int k = getSampleSize(); |
191 | ||
192 | 118 | int[] domain = getDomain(n, m, k); |
193 | 118 | if(x < domain[0] || x > domain[1]){ |
194 | 28 | ret = 0.0; |
195 | } else { |
|
196 | 90 | ret = probability(n, m, k, x); |
197 | } |
|
198 | ||
199 | 118 | return ret; |
200 | } |
|
201 | ||
202 | /** |
|
203 | * For the disbution, X, defined by the given hypergeometric distribution |
|
204 | * parameters, this method returns P(X = x). |
|
205 | * |
|
206 | * @param n the population size. |
|
207 | * @param m number of successes in the population. |
|
208 | * @param k the sample size. |
|
209 | * @param x the value at which the PMF is evaluated. |
|
210 | * @return PMF for the distribution. |
|
211 | */ |
|
212 | private double probability(int n, int m, int k, int x) { |
|
213 | 5942 | return Math.exp(MathUtils.binomialCoefficientLog(m, x) + |
214 | MathUtils.binomialCoefficientLog(n - m, k - x) - |
|
215 | MathUtils.binomialCoefficientLog(n, k)); |
|
216 | } |
|
217 | ||
218 | /** |
|
219 | * Modify the number of successes. |
|
220 | * @param num the new number of successes. |
|
221 | * @throws IllegalArgumentException if <code>num</code> is negative. |
|
222 | */ |
|
223 | public void setNumberOfSuccesses(int num) { |
|
224 | 38 | if(num < 0){ |
225 | 2 | throw new IllegalArgumentException( |
226 | "number of successes must be non-negative."); |
|
227 | } |
|
228 | 36 | numberOfSuccesses = num; |
229 | 36 | } |
230 | ||
231 | /** |
|
232 | * Modify the population size. |
|
233 | * @param size the new population size. |
|
234 | * @throws IllegalArgumentException if <code>size</code> is not positive. |
|
235 | */ |
|
236 | public void setPopulationSize(int size) { |
|
237 | 44 | if(size <= 0){ |
238 | 2 | throw new IllegalArgumentException( |
239 | "population size must be positive."); |
|
240 | } |
|
241 | 42 | populationSize = size; |
242 | 42 | } |
243 | ||
244 | /** |
|
245 | * Modify the sample size. |
|
246 | * @param size the new sample size. |
|
247 | * @throws IllegalArgumentException if <code>size</code> is negative. |
|
248 | */ |
|
249 | public void setSampleSize(int size) { |
|
250 | 40 | if (size < 0) { |
251 | 2 | throw new IllegalArgumentException( |
252 | "sample size must be non-negative."); |
|
253 | } |
|
254 | 38 | sampleSize = size; |
255 | 38 | } |
256 | ||
257 | /** |
|
258 | * For this disbution, X, this method returns P(X ≥ x). |
|
259 | * @param x the value at which the CDF is evaluated. |
|
260 | * @return upper tail CDF for this distribution. |
|
261 | * @since 1.1 |
|
262 | */ |
|
263 | public double upperCumulativeProbability(int x) { |
|
264 | double ret; |
|
265 | ||
266 | 72 | int n = getPopulationSize(); |
267 | 72 | int m = getNumberOfSuccesses(); |
268 | 72 | int k = getSampleSize(); |
269 | ||
270 | 72 | int[] domain = getDomain(n, m, k); |
271 | 72 | if (x < domain[0]) { |
272 | 0 | ret = 1.0; |
273 | 72 | } else if(x > domain[1]) { |
274 | 0 | ret = 0.0; |
275 | } else { |
|
276 | 72 | ret = innerCumulativeProbability(domain[1], x, -1, n, m, k); |
277 | } |
|
278 | ||
279 | 72 | return ret; |
280 | } |
|
281 | ||
282 | /** |
|
283 | * For this disbution, X, this method returns P(x0 ≤ X ≤ x1). This |
|
284 | * probability is computed by summing the point probabilities for the values |
|
285 | * x0, x0 + 1, x0 + 2, ..., x1, in the order directed by dx. |
|
286 | * @param x0 the inclusive, lower bound |
|
287 | * @param x1 the inclusive, upper bound |
|
288 | * @param dx the direction of summation. 1 indicates summing from x0 to x1. |
|
289 | * 0 indicates summing from x1 to x0. |
|
290 | * @param n the population size. |
|
291 | * @param m number of successes in the population. |
|
292 | * @param k the sample size. |
|
293 | * @return P(x0 ≤ X ≤ x1). |
|
294 | */ |
|
295 | private double innerCumulativeProbability( |
|
296 | int x0, int x1, int dx, int n, int m, int k) |
|
297 | { |
|
298 | 272 | double ret = probability(n, m, k, x0); |
299 | 5852 | while (x0 != x1) { |
300 | 5580 | x0 += dx; |
301 | 5580 | ret += probability(n, m, k, x0); |
302 | } |
|
303 | 272 | return ret; |
304 | } |
|
305 | } |