1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package org.apache.commons.math.distribution;
18
19 import java.io.Serializable;
20
21 import org.apache.commons.math.util.MathUtils;
22
23 /**
24 * The default implementation of {@link HypergeometricDistribution}.
25 *
26 * @version $Revision: 348888 $ $Date: 2005-11-24 23:21:25 -0700 (Thu, 24 Nov 2005) $
27 */
28 public class HypergeometricDistributionImpl extends AbstractIntegerDistribution
29 implements HypergeometricDistribution, Serializable
30 {
31
32 /** Serializable version identifier */
33 private static final long serialVersionUID = -436928820673516179L;
34
35 /** The number of successes in the population. */
36 private int numberOfSuccesses;
37
38 /** The population size. */
39 private int populationSize;
40
41 /** The sample size. */
42 private int sampleSize;
43
44 /**
45 * Construct a new hypergeometric distribution with the given the population
46 * size, the number of successes in the population, and the sample size.
47 * @param populationSize the population size.
48 * @param numberOfSuccesses number of successes in the population.
49 * @param sampleSize the sample size.
50 */
51 public HypergeometricDistributionImpl(int populationSize,
52 int numberOfSuccesses, int sampleSize) {
53 super();
54 if (numberOfSuccesses > populationSize) {
55 throw new IllegalArgumentException(
56 "number of successes must be less than or equal to " +
57 "population size");
58 }
59 if (sampleSize > populationSize) {
60 throw new IllegalArgumentException(
61 "sample size must be less than or equal to population size");
62 }
63 setPopulationSize(populationSize);
64 setSampleSize(sampleSize);
65 setNumberOfSuccesses(numberOfSuccesses);
66 }
67
68 /**
69 * For this disbution, X, this method returns P(X ≤ x).
70 * @param x the value at which the PDF is evaluated.
71 * @return PDF for this distribution.
72 */
73 public double cumulativeProbability(int x) {
74 double ret;
75
76 int n = getPopulationSize();
77 int m = getNumberOfSuccesses();
78 int k = getSampleSize();
79
80 int[] domain = getDomain(n, m, k);
81 if (x < domain[0]) {
82 ret = 0.0;
83 } else if(x >= domain[1]) {
84 ret = 1.0;
85 } else {
86 ret = innerCumulativeProbability(domain[0], x, 1, n, m, k);
87 }
88
89 return ret;
90 }
91
92 /**
93 * Return the domain for the given hypergeometric distribution parameters.
94 * @param n the population size.
95 * @param m number of successes in the population.
96 * @param k the sample size.
97 * @return a two element array containing the lower and upper bounds of the
98 * hypergeometric distribution.
99 */
100 private int[] getDomain(int n, int m, int k){
101 return new int[]{
102 getLowerDomain(n, m, k),
103 getUpperDomain(m, k)
104 };
105 }
106
107 /**
108 * Access the domain value lower bound, based on <code>p</code>, used to
109 * bracket a PDF root.
110 *
111 * @param p the desired probability for the critical value
112 * @return domain value lower bound, i.e.
113 * P(X < <i>lower bound</i>) < <code>p</code>
114 */
115 protected int getDomainLowerBound(double p) {
116 return getLowerDomain(getPopulationSize(), getNumberOfSuccesses(),
117 getSampleSize());
118 }
119
120 /**
121 * Access the domain value upper bound, based on <code>p</code>, used to
122 * bracket a PDF root.
123 *
124 * @param p the desired probability for the critical value
125 * @return domain value upper bound, i.e.
126 * P(X < <i>upper bound</i>) > <code>p</code>
127 */
128 protected int getDomainUpperBound(double p) {
129 return getUpperDomain(getSampleSize(), getNumberOfSuccesses());
130 }
131
132 /**
133 * Return the lowest domain value for the given hypergeometric distribution
134 * parameters.
135 * @param n the population size.
136 * @param m number of successes in the population.
137 * @param k the sample size.
138 * @return the lowest domain value of the hypergeometric distribution.
139 */
140 private int getLowerDomain(int n, int m, int k) {
141 return Math.max(0, m - (n - k));
142 }
143
144 /**
145 * Access the number of successes.
146 * @return the number of successes.
147 */
148 public int getNumberOfSuccesses() {
149 return numberOfSuccesses;
150 }
151
152 /**
153 * Access the population size.
154 * @return the population size.
155 */
156 public int getPopulationSize() {
157 return populationSize;
158 }
159
160 /**
161 * Access the sample size.
162 * @return the sample size.
163 */
164 public int getSampleSize() {
165 return sampleSize;
166 }
167
168 /**
169 * Return the highest domain value for the given hypergeometric distribution
170 * parameters.
171 * @param m number of successes in the population.
172 * @param k the sample size.
173 * @return the highest domain value of the hypergeometric distribution.
174 */
175 private int getUpperDomain(int m, int k){
176 return Math.min(k, m);
177 }
178
179 /**
180 * For this disbution, X, this method returns P(X = x).
181 *
182 * @param x the value at which the PMF is evaluated.
183 * @return PMF for this distribution.
184 */
185 public double probability(int x) {
186 double ret;
187
188 int n = getPopulationSize();
189 int m = getNumberOfSuccesses();
190 int k = getSampleSize();
191
192 int[] domain = getDomain(n, m, k);
193 if(x < domain[0] || x > domain[1]){
194 ret = 0.0;
195 } else {
196 ret = probability(n, m, k, x);
197 }
198
199 return ret;
200 }
201
202 /**
203 * For the disbution, X, defined by the given hypergeometric distribution
204 * parameters, this method returns P(X = x).
205 *
206 * @param n the population size.
207 * @param m number of successes in the population.
208 * @param k the sample size.
209 * @param x the value at which the PMF is evaluated.
210 * @return PMF for the distribution.
211 */
212 private double probability(int n, int m, int k, int x) {
213 return Math.exp(MathUtils.binomialCoefficientLog(m, x) +
214 MathUtils.binomialCoefficientLog(n - m, k - x) -
215 MathUtils.binomialCoefficientLog(n, k));
216 }
217
218 /**
219 * Modify the number of successes.
220 * @param num the new number of successes.
221 * @throws IllegalArgumentException if <code>num</code> is negative.
222 */
223 public void setNumberOfSuccesses(int num) {
224 if(num < 0){
225 throw new IllegalArgumentException(
226 "number of successes must be non-negative.");
227 }
228 numberOfSuccesses = num;
229 }
230
231 /**
232 * Modify the population size.
233 * @param size the new population size.
234 * @throws IllegalArgumentException if <code>size</code> is not positive.
235 */
236 public void setPopulationSize(int size) {
237 if(size <= 0){
238 throw new IllegalArgumentException(
239 "population size must be positive.");
240 }
241 populationSize = size;
242 }
243
244 /**
245 * Modify the sample size.
246 * @param size the new sample size.
247 * @throws IllegalArgumentException if <code>size</code> is negative.
248 */
249 public void setSampleSize(int size) {
250 if (size < 0) {
251 throw new IllegalArgumentException(
252 "sample size must be non-negative.");
253 }
254 sampleSize = size;
255 }
256
257 /**
258 * For this disbution, X, this method returns P(X ≥ x).
259 * @param x the value at which the CDF is evaluated.
260 * @return upper tail CDF for this distribution.
261 * @since 1.1
262 */
263 public double upperCumulativeProbability(int x) {
264 double ret;
265
266 int n = getPopulationSize();
267 int m = getNumberOfSuccesses();
268 int k = getSampleSize();
269
270 int[] domain = getDomain(n, m, k);
271 if (x < domain[0]) {
272 ret = 1.0;
273 } else if(x > domain[1]) {
274 ret = 0.0;
275 } else {
276 ret = innerCumulativeProbability(domain[1], x, -1, n, m, k);
277 }
278
279 return ret;
280 }
281
282 /**
283 * For this disbution, X, this method returns P(x0 ≤ X ≤ x1). This
284 * probability is computed by summing the point probabilities for the values
285 * x0, x0 + 1, x0 + 2, ..., x1, in the order directed by dx.
286 * @param x0 the inclusive, lower bound
287 * @param x1 the inclusive, upper bound
288 * @param dx the direction of summation. 1 indicates summing from x0 to x1.
289 * 0 indicates summing from x1 to x0.
290 * @param n the population size.
291 * @param m number of successes in the population.
292 * @param k the sample size.
293 * @return P(x0 ≤ X ≤ x1).
294 */
295 private double innerCumulativeProbability(
296 int x0, int x1, int dx, int n, int m, int k)
297 {
298 double ret = probability(n, m, k, x0);
299 while (x0 != x1) {
300 x0 += dx;
301 ret += probability(n, m, k, x0);
302 }
303 return ret;
304 }
305 }