View Javadoc

1   /*
2    * Copyright 2003-2004 The Apache Software Foundation.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    *
8    *      http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   */
16  
17  package org.apache.commons.math.distribution;
18  
19  import java.io.Serializable;
20  
21  import org.apache.commons.math.util.MathUtils;
22  
23  /**
24   * The default implementation of {@link HypergeometricDistribution}.
25   *
26   * @version $Revision: 348888 $ $Date: 2005-11-24 23:21:25 -0700 (Thu, 24 Nov 2005) $
27   */
28  public class HypergeometricDistributionImpl extends AbstractIntegerDistribution
29      implements HypergeometricDistribution, Serializable 
30  {
31  
32      /** Serializable version identifier */
33      private static final long serialVersionUID = -436928820673516179L;
34  
35      /** The number of successes in the population. */
36      private int numberOfSuccesses;
37      
38      /** The population size. */
39      private int populationSize;
40      
41      /** The sample size. */
42      private int sampleSize;
43      
44      /**
45       * Construct a new hypergeometric distribution with the given the population
46       * size, the number of successes in the population, and the sample size.
47       * @param populationSize the population size.
48       * @param numberOfSuccesses number of successes in the population.
49       * @param sampleSize the sample size.
50       */
51      public HypergeometricDistributionImpl(int populationSize,
52          int numberOfSuccesses, int sampleSize) {
53          super();
54          if (numberOfSuccesses > populationSize) {
55              throw new IllegalArgumentException(
56                  "number of successes must be less than or equal to " +
57                  "population size");
58          }
59          if (sampleSize > populationSize) {
60              throw new IllegalArgumentException(
61              "sample size must be less than or equal to population size");
62          }
63          setPopulationSize(populationSize);
64          setSampleSize(sampleSize);
65          setNumberOfSuccesses(numberOfSuccesses);
66      }
67  
68      /**
69       * For this disbution, X, this method returns P(X ≤ x).
70       * @param x the value at which the PDF is evaluated.
71       * @return PDF for this distribution. 
72       */
73      public double cumulativeProbability(int x) {
74          double ret;
75          
76          int n = getPopulationSize();
77          int m = getNumberOfSuccesses();
78          int k = getSampleSize();
79  
80          int[] domain = getDomain(n, m, k);
81          if (x < domain[0]) {
82              ret = 0.0;
83          } else if(x >= domain[1]) {
84              ret = 1.0;
85          } else {
86              ret = innerCumulativeProbability(domain[0], x, 1, n, m, k);
87          }
88          
89          return ret;
90      }
91  
92      /**
93       * Return the domain for the given hypergeometric distribution parameters.
94       * @param n the population size.
95       * @param m number of successes in the population.
96       * @param k the sample size.
97       * @return a two element array containing the lower and upper bounds of the
98       *         hypergeometric distribution.  
99       */
100     private int[] getDomain(int n, int m, int k){
101         return new int[]{
102             getLowerDomain(n, m, k),
103             getUpperDomain(m, k)
104         };
105     }
106     
107     /**
108      * Access the domain value lower bound, based on <code>p</code>, used to
109      * bracket a PDF root.
110      * 
111      * @param p the desired probability for the critical value
112      * @return domain value lower bound, i.e.
113      *         P(X &lt; <i>lower bound</i>) &lt; <code>p</code> 
114      */
115     protected int getDomainLowerBound(double p) {
116         return getLowerDomain(getPopulationSize(), getNumberOfSuccesses(),
117             getSampleSize());
118     }
119     
120     /**
121      * Access the domain value upper bound, based on <code>p</code>, used to
122      * bracket a PDF root.
123      * 
124      * @param p the desired probability for the critical value
125      * @return domain value upper bound, i.e.
126      *         P(X &lt; <i>upper bound</i>) &gt; <code>p</code> 
127      */
128     protected int getDomainUpperBound(double p) {
129         return getUpperDomain(getSampleSize(), getNumberOfSuccesses());
130     }
131 
132     /**
133      * Return the lowest domain value for the given hypergeometric distribution
134      * parameters.
135      * @param n the population size.
136      * @param m number of successes in the population.
137      * @param k the sample size.
138      * @return the lowest domain value of the hypergeometric distribution.  
139      */
140     private int getLowerDomain(int n, int m, int k) {
141         return Math.max(0, m - (n - k));
142     }
143 
144     /**
145      * Access the number of successes.
146      * @return the number of successes.
147      */
148     public int getNumberOfSuccesses() {
149         return numberOfSuccesses;
150     }
151 
152     /**
153      * Access the population size.
154      * @return the population size.
155      */
156     public int getPopulationSize() {
157         return populationSize;
158     }
159 
160     /**
161      * Access the sample size.
162      * @return the sample size.
163      */
164     public int getSampleSize() {
165         return sampleSize;
166     }
167 
168     /**
169      * Return the highest domain value for the given hypergeometric distribution
170      * parameters.
171      * @param m number of successes in the population.
172      * @param k the sample size.
173      * @return the highest domain value of the hypergeometric distribution.  
174      */
175     private int getUpperDomain(int m, int k){
176         return Math.min(k, m);
177     }
178 
179     /**
180      * For this disbution, X, this method returns P(X = x).
181      * 
182      * @param x the value at which the PMF is evaluated.
183      * @return PMF for this distribution. 
184      */
185     public double probability(int x) {
186         double ret;
187         
188         int n = getPopulationSize();
189         int m = getNumberOfSuccesses();
190         int k = getSampleSize();
191 
192         int[] domain = getDomain(n, m, k);
193         if(x < domain[0] || x > domain[1]){
194             ret = 0.0;
195         } else {
196             ret = probability(n, m, k, x);
197         }
198         
199         return ret;
200     }
201     
202     /**
203      * For the disbution, X, defined by the given hypergeometric distribution
204      * parameters, this method returns P(X = x).
205      * 
206      * @param n the population size.
207      * @param m number of successes in the population.
208      * @param k the sample size.
209      * @param x the value at which the PMF is evaluated.
210      * @return PMF for the distribution. 
211      */
212     private double probability(int n, int m, int k, int x) {
213         return Math.exp(MathUtils.binomialCoefficientLog(m, x) +
214             MathUtils.binomialCoefficientLog(n - m, k - x) -
215             MathUtils.binomialCoefficientLog(n, k));
216     }
217 
218     /**
219      * Modify the number of successes.
220      * @param num the new number of successes.
221      * @throws IllegalArgumentException if <code>num</code> is negative.
222      */
223     public void setNumberOfSuccesses(int num) {
224         if(num < 0){
225             throw new IllegalArgumentException(
226                 "number of successes must be non-negative.");
227         }
228         numberOfSuccesses = num;
229     }
230 
231     /**
232      * Modify the population size.
233      * @param size the new population size.
234      * @throws IllegalArgumentException if <code>size</code> is not positive.
235      */
236     public void setPopulationSize(int size) {
237         if(size <= 0){
238             throw new IllegalArgumentException(
239                 "population size must be positive.");
240         }
241         populationSize = size;
242     }
243     
244     /**
245      * Modify the sample size.
246      * @param size the new sample size.
247      * @throws IllegalArgumentException if <code>size</code> is negative.
248      */
249     public void setSampleSize(int size) {
250         if (size < 0) {
251             throw new IllegalArgumentException(
252                 "sample size must be non-negative.");
253         }    
254         sampleSize = size;
255     }
256 
257     /**
258      * For this disbution, X, this method returns P(X &ge; x).
259      * @param x the value at which the CDF is evaluated.
260      * @return upper tail CDF for this distribution.
261      * @since 1.1
262      */
263     public double upperCumulativeProbability(int x) {
264         double ret;
265         
266         int n = getPopulationSize();
267         int m = getNumberOfSuccesses();
268         int k = getSampleSize();
269 
270         int[] domain = getDomain(n, m, k);
271         if (x < domain[0]) {
272             ret = 1.0;
273         } else if(x > domain[1]) {
274             ret = 0.0;
275         } else {
276             ret = innerCumulativeProbability(domain[1], x, -1, n, m, k);
277         }
278         
279         return ret;
280     }
281     
282     /**
283      * For this disbution, X, this method returns P(x0 &le; X &le; x1).  This
284      * probability is computed by summing the point probabilities for the values
285      * x0, x0 + 1, x0 + 2, ..., x1, in the order directed by dx. 
286      * @param x0 the inclusive, lower bound
287      * @param x1 the inclusive, upper bound
288      * @param dx the direction of summation. 1 indicates summing from x0 to x1.
289      *           0 indicates summing from x1 to x0.
290      * @param n the population size.
291      * @param m number of successes in the population.
292      * @param k the sample size.
293      * @return P(x0 &le; X &le; x1). 
294      */
295     private double innerCumulativeProbability(
296         int x0, int x1, int dx, int n, int m, int k)
297     {
298         double ret = probability(n, m, k, x0);
299         while (x0 != x1) {
300             x0 += dx;
301             ret += probability(n, m, k, x0);
302         }
303         return ret;
304     }
305 }