Coverage Report - org.apache.commons.math.distribution.HypergeometricDistributionImpl

Classes in this File Line Coverage Branch Coverage Complexity
HypergeometricDistributionImpl
97% 
100% 
1.941

 1  
 /*
 2  
  * Copyright 2003-2004 The Apache Software Foundation.
 3  
  *
 4  
  * Licensed under the Apache License, Version 2.0 (the "License");
 5  
  * you may not use this file except in compliance with the License.
 6  
  * You may obtain a copy of the License at
 7  
  *
 8  
  *      http://www.apache.org/licenses/LICENSE-2.0
 9  
  *
 10  
  * Unless required by applicable law or agreed to in writing, software
 11  
  * distributed under the License is distributed on an "AS IS" BASIS,
 12  
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13  
  * See the License for the specific language governing permissions and
 14  
  * limitations under the License.
 15  
  */
 16  
 
 17  
 package org.apache.commons.math.distribution;
 18  
 
 19  
 import java.io.Serializable;
 20  
 
 21  
 import org.apache.commons.math.util.MathUtils;
 22  
 
 23  
 /**
 24  
  * The default implementation of {@link HypergeometricDistribution}.
 25  
  *
 26  
  * @version $Revision$ $Date: 2005-08-26 07:05:45 -0700 (Fri, 26 Aug 2005) $
 27  
  */
 28  
 public class HypergeometricDistributionImpl extends AbstractIntegerDistribution
 29  
     implements HypergeometricDistribution, Serializable 
 30  
 {
 31  
 
 32  
     /** Serializable version identifier */
 33  
     static final long serialVersionUID = -436928820673516179L;
 34  
 
 35  
     /** The number of successes in the population. */
 36  
     private int numberOfSuccesses;
 37  
     
 38  
     /** The population size. */
 39  
     private int populationSize;
 40  
     
 41  
     /** The sample size. */
 42  
     private int sampleSize;
 43  
     
 44  
     /**
 45  
      * Construct a new hypergeometric distribution with the given the population
 46  
      * size, the number of successes in the population, and the sample size.
 47  
      * @param populationSize the population size.
 48  
      * @param numberOfSuccesses number of successes in the population.
 49  
      * @param sampleSize the sample size.
 50  
      */
 51  
     public HypergeometricDistributionImpl(int populationSize,
 52  
         int numberOfSuccesses, int sampleSize) {
 53  46
         super();
 54  46
         if (numberOfSuccesses > populationSize) {
 55  4
             throw new IllegalArgumentException(
 56  
                     "number of successes must be less than or equal to " +
 57  
                     "population size");
 58  
         }
 59  42
         if (sampleSize > populationSize) {
 60  2
             throw new IllegalArgumentException(
 61  
             "sample size must be less than or equal to population size");
 62  
         }
 63  40
         setPopulationSize(populationSize);
 64  40
         setSampleSize(sampleSize);
 65  38
         setNumberOfSuccesses(numberOfSuccesses);
 66  36
     }
 67  
 
 68  
     /**
 69  
      * For this disbution, X, this method returns P(X ≤ x).
 70  
      * @param x the value at which the PDF is evaluated.
 71  
      * @return PDF for this distribution. 
 72  
      */
 73  
     public double cumulativeProbability(int x) {
 74  
         double ret;
 75  
         
 76  272
         int n = getPopulationSize();
 77  272
         int m = getNumberOfSuccesses();
 78  272
         int k = getSampleSize();
 79  
 
 80  272
         int[] domain = getDomain(n, m, k);
 81  272
         if (x < domain[0]) {
 82  32
             ret = 0.0;
 83  240
         } else if(x >= domain[1]) {
 84  40
             ret = 1.0;
 85  
         } else {
 86  200
             ret = innerCumulativeProbability(domain[0], x, 1, n, m, k);
 87  
         }
 88  
         
 89  272
         return ret;
 90  
     }
 91  
 
 92  
     /**
 93  
      * Return the domain for the given hypergeometric distribution parameters.
 94  
      * @param n the population size.
 95  
      * @param m number of successes in the population.
 96  
      * @param k the sample size.
 97  
      * @return a two element array containing the lower and upper bounds of the
 98  
      *         hypergeometric distribution.  
 99  
      */
 100  
     private int[] getDomain(int n, int m, int k){
 101  462
         return new int[]{
 102  
             getLowerDomain(n, m, k),
 103  
             getUpperDomain(m, k)
 104  
         };
 105  
     }
 106  
     
 107  
     /**
 108  
      * Access the domain value lower bound, based on <code>p</code>, used to
 109  
      * bracket a PDF root.
 110  
      * 
 111  
      * @param p the desired probability for the critical value
 112  
      * @return domain value lower bound, i.e.
 113  
      *         P(X &lt; <i>lower bound</i>) &lt; <code>p</code> 
 114  
      */
 115  
     protected int getDomainLowerBound(double p) {
 116  36
         return getLowerDomain(getPopulationSize(), getNumberOfSuccesses(),
 117  
             getSampleSize());
 118  
     }
 119  
     
 120  
     /**
 121  
      * Access the domain value upper bound, based on <code>p</code>, used to
 122  
      * bracket a PDF root.
 123  
      * 
 124  
      * @param p the desired probability for the critical value
 125  
      * @return domain value upper bound, i.e.
 126  
      *         P(X &lt; <i>upper bound</i>) &gt; <code>p</code> 
 127  
      */
 128  
     protected int getDomainUpperBound(double p) {
 129  36
         return getUpperDomain(getSampleSize(), getNumberOfSuccesses());
 130  
     }
 131  
 
 132  
     /**
 133  
      * Return the lowest domain value for the given hypergeometric distribution
 134  
      * parameters.
 135  
      * @param n the population size.
 136  
      * @param m number of successes in the population.
 137  
      * @param k the sample size.
 138  
      * @return the lowest domain value of the hypergeometric distribution.  
 139  
      */
 140  
     private int getLowerDomain(int n, int m, int k) {
 141  498
         return Math.max(0, m - (n - k));
 142  
     }
 143  
 
 144  
     /**
 145  
      * Access the number of successes.
 146  
      * @return the number of successes.
 147  
      */
 148  
     public int getNumberOfSuccesses() {
 149  534
         return numberOfSuccesses;
 150  
     }
 151  
 
 152  
     /**
 153  
      * Access the population size.
 154  
      * @return the population size.
 155  
      */
 156  
     public int getPopulationSize() {
 157  500
         return populationSize;
 158  
     }
 159  
 
 160  
     /**
 161  
      * Access the sample size.
 162  
      * @return the sample size.
 163  
      */
 164  
     public int getSampleSize() {
 165  534
         return sampleSize;
 166  
     }
 167  
 
 168  
     /**
 169  
      * Return the highest domain value for the given hypergeometric distribution
 170  
      * parameters.
 171  
      * @param m number of successes in the population.
 172  
      * @param k the sample size.
 173  
      * @return the highest domain value of the hypergeometric distribution.  
 174  
      */
 175  
     private int getUpperDomain(int m, int k){
 176  498
         return Math.min(k, m);
 177  
     }
 178  
 
 179  
     /**
 180  
      * For this disbution, X, this method returns P(X = x).
 181  
      * 
 182  
      * @param x the value at which the PMF is evaluated.
 183  
      * @return PMF for this distribution. 
 184  
      */
 185  
     public double probability(int x) {
 186  
         double ret;
 187  
         
 188  118
         int n = getPopulationSize();
 189  118
         int m = getNumberOfSuccesses();
 190  118
         int k = getSampleSize();
 191  
 
 192  118
         int[] domain = getDomain(n, m, k);
 193  118
         if(x < domain[0] || x > domain[1]){
 194  28
             ret = 0.0;
 195  
         } else {
 196  90
             ret = probability(n, m, k, x);
 197  
         }
 198  
         
 199  118
         return ret;
 200  
     }
 201  
     
 202  
     /**
 203  
      * For the disbution, X, defined by the given hypergeometric distribution
 204  
      * parameters, this method returns P(X = x).
 205  
      * 
 206  
      * @param n the population size.
 207  
      * @param m number of successes in the population.
 208  
      * @param k the sample size.
 209  
      * @param x the value at which the PMF is evaluated.
 210  
      * @return PMF for the distribution. 
 211  
      */
 212  
     private double probability(int n, int m, int k, int x) {
 213  5942
         return Math.exp(MathUtils.binomialCoefficientLog(m, x) +
 214  
             MathUtils.binomialCoefficientLog(n - m, k - x) -
 215  
             MathUtils.binomialCoefficientLog(n, k));
 216  
     }
 217  
 
 218  
     /**
 219  
      * Modify the number of successes.
 220  
      * @param num the new number of successes.
 221  
      * @throws IllegalArgumentException if <code>num</code> is negative.
 222  
      */
 223  
     public void setNumberOfSuccesses(int num) {
 224  38
         if(num < 0){
 225  2
             throw new IllegalArgumentException(
 226  
                 "number of successes must be non-negative.");
 227  
         }
 228  36
         numberOfSuccesses = num;
 229  36
     }
 230  
 
 231  
     /**
 232  
      * Modify the population size.
 233  
      * @param size the new population size.
 234  
      * @throws IllegalArgumentException if <code>size</code> is not positive.
 235  
      */
 236  
     public void setPopulationSize(int size) {
 237  44
         if(size <= 0){
 238  2
             throw new IllegalArgumentException(
 239  
                 "population size must be positive.");
 240  
         }
 241  42
         populationSize = size;
 242  42
     }
 243  
     
 244  
         /**
 245  
      * Modify the sample size.
 246  
      * @param size the new sample size.
 247  
      * @throws IllegalArgumentException if <code>size</code> is negative.
 248  
      */
 249  
     public void setSampleSize(int size) {
 250  40
         if (size < 0) {
 251  2
             throw new IllegalArgumentException(
 252  
                 "sample size must be non-negative.");
 253  
         }    
 254  38
         sampleSize = size;
 255  38
     }
 256  
 
 257  
     /**
 258  
      * For this disbution, X, this method returns P(X &ge; x).
 259  
      * @param x the value at which the CDF is evaluated.
 260  
      * @return upper tail CDF for this distribution.
 261  
      * @since 1.1
 262  
      */
 263  
         public double upperCumulativeProbability(int x) {
 264  
             double ret;
 265  
             
 266  72
         int n = getPopulationSize();
 267  72
         int m = getNumberOfSuccesses();
 268  72
         int k = getSampleSize();
 269  
 
 270  72
         int[] domain = getDomain(n, m, k);
 271  72
         if (x < domain[0]) {
 272  0
             ret = 1.0;
 273  72
         } else if(x > domain[1]) {
 274  0
             ret = 0.0;
 275  
         } else {
 276  72
                 ret = innerCumulativeProbability(domain[1], x, -1, n, m, k);
 277  
         }
 278  
         
 279  72
         return ret;
 280  
     }
 281  
         
 282  
     /**
 283  
      * For this disbution, X, this method returns P(x0 &le; X &le; x1).  This
 284  
      * probability is computed by summing the point probabilities for the values
 285  
      * x0, x0 + 1, x0 + 2, ..., x1, in the order directed by dx. 
 286  
      * @param x0 the inclusive, lower bound
 287  
      * @param x1 the inclusive, upper bound
 288  
      * @param dx the direction of summation. 1 indicates summing from x0 to x1.
 289  
      *           0 indicates summing from x1 to x0.
 290  
      * @param n the population size.
 291  
      * @param m number of successes in the population.
 292  
      * @param k the sample size.
 293  
      * @return P(x0 &le; X &le; x1). 
 294  
      */
 295  
     private double innerCumulativeProbability(
 296  
             int x0, int x1, int dx, int n, int m, int k)
 297  
     {
 298  272
             double ret = probability(n, m, k, x0);
 299  5852
             while (x0 != x1) {
 300  5580
                     x0 += dx;
 301  5580
                     ret += probability(n, m, k, x0);
 302  
             }
 303  272
                 return ret;
 304  
         }
 305  
 }