1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16 package org.apache.commons.math.stat;
17
18 import org.apache.commons.math.stat.descriptive.UnivariateStatistic;
19 import org.apache.commons.math.stat.descriptive.moment.GeometricMean;
20 import org.apache.commons.math.stat.descriptive.moment.Mean;
21 import org.apache.commons.math.stat.descriptive.moment.Variance;
22 import org.apache.commons.math.stat.descriptive.rank.Max;
23 import org.apache.commons.math.stat.descriptive.rank.Min;
24 import org.apache.commons.math.stat.descriptive.rank.Percentile;
25 import org.apache.commons.math.stat.descriptive.summary.Product;
26 import org.apache.commons.math.stat.descriptive.summary.Sum;
27 import org.apache.commons.math.stat.descriptive.summary.SumOfLogs;
28 import org.apache.commons.math.stat.descriptive.summary.SumOfSquares;
29
30 /**
31 * StatUtils provides static methods for computing statistics based on data
32 * stored in double[] arrays.
33 *
34 * @version $Revision: 233996 $ $Date: 2005-08-19 21:26:27 -0700 (Fri, 19 Aug 2005) $
35 */
36 public final class StatUtils {
37
38 /** sum */
39 private static UnivariateStatistic sum = new Sum();
40
41 /** sumSq */
42 private static UnivariateStatistic sumSq = new SumOfSquares();
43
44 /** prod */
45 private static UnivariateStatistic prod = new Product();
46
47 /** sumLog */
48 private static UnivariateStatistic sumLog = new SumOfLogs();
49
50 /** min */
51 private static UnivariateStatistic min = new Min();
52
53 /** max */
54 private static UnivariateStatistic max = new Max();
55
56 /** mean */
57 private static UnivariateStatistic mean = new Mean();
58
59 /** variance */
60 private static Variance variance = new Variance();
61
62 /** percentile */
63 private static Percentile percentile = new Percentile();
64
65 /** geometric mean */
66 private static GeometricMean geometricMean = new GeometricMean();
67
68 /**
69 * Private Constructor
70 */
71 private StatUtils() {
72 }
73
74 /**
75 * Returns the sum of the values in the input array, or
76 * <code>Double.NaN</code> if the array is empty.
77 * <p>
78 * Throws <code>IllegalArgumentException</code> if the input array
79 * is null.
80 *
81 * @param values array of values to sum
82 * @return the sum of the values or <code>Double.NaN</code> if the array
83 * is empty
84 * @throws IllegalArgumentException if the array is null
85 */
86 public static double sum(final double[] values) {
87 return sum.evaluate(values);
88 }
89
90 /**
91 * Returns the sum of the entries in the specified portion of
92 * the input array, or <code>Double.NaN</code> if the designated subarray
93 * is empty.
94 * <p>
95 * Throws <code>IllegalArgumentException</code> if the array is null.
96 *
97 * @param values the input array
98 * @param begin index of the first array element to include
99 * @param length the number of elements to include
100 * @return the sum of the values or Double.NaN if length = 0
101 * @throws IllegalArgumentException if the array is null or the array index
102 * parameters are not valid
103 */
104 public static double sum(final double[] values, final int begin,
105 final int length) {
106 return sum.evaluate(values, begin, length);
107 }
108
109 /**
110 * Returns the sum of the squares of the entries in the input array, or
111 * <code>Double.NaN</code> if the array is empty.
112 * <p>
113 * Throws <code>IllegalArgumentException</code> if the array is null.
114 *
115 * @param values input array
116 * @return the sum of the squared values or <code>Double.NaN</code> if the
117 * array is empty
118 * @throws IllegalArgumentException if the array is null
119 */
120 public static double sumSq(final double[] values) {
121 return sumSq.evaluate(values);
122 }
123
124 /**
125 * Returns the sum of the squares of the entries in the specified portion of
126 * the input array, or <code>Double.NaN</code> if the designated subarray
127 * is empty.
128 * <p>
129 * Throws <code>IllegalArgumentException</code> if the array is null.
130 *
131 * @param values the input array
132 * @param begin index of the first array element to include
133 * @param length the number of elements to include
134 * @return the sum of the squares of the values or Double.NaN if length = 0
135 * @throws IllegalArgumentException if the array is null or the array index
136 * parameters are not valid
137 */
138 public static double sumSq(final double[] values, final int begin,
139 final int length) {
140 return sumSq.evaluate(values, begin, length);
141 }
142
143 /**
144 * Returns the product of the entries in the input array, or
145 * <code>Double.NaN</code> if the array is empty.
146 * <p>
147 * Throws <code>IllegalArgumentException</code> if the array is null.
148 *
149 * @param values the input array
150 * @return the product of the values or Double.NaN if the array is empty
151 * @throws IllegalArgumentException if the array is null
152 */
153 public static double product(final double[] values) {
154 return prod.evaluate(values);
155 }
156
157 /**
158 * Returns the product of the entries in the specified portion of
159 * the input array, or <code>Double.NaN</code> if the designated subarray
160 * is empty.
161 * <p>
162 * Throws <code>IllegalArgumentException</code> if the array is null.
163 *
164 * @param values the input array
165 * @param begin index of the first array element to include
166 * @param length the number of elements to include
167 * @return the product of the values or Double.NaN if length = 0
168 * @throws IllegalArgumentException if the array is null or the array index
169 * parameters are not valid
170 */
171 public static double product(final double[] values, final int begin,
172 final int length) {
173 return prod.evaluate(values, begin, length);
174 }
175
176 /**
177 * Returns the sum of the natural logs of the entries in the input array, or
178 * <code>Double.NaN</code> if the array is empty.
179 * <p>
180 * Throws <code>IllegalArgumentException</code> if the array is null.
181 * <p>
182 * See {@link org.apache.commons.math.stat.descriptive.summary.SumOfLogs}.
183 *
184 * @param values the input array
185 * @return the sum of the natural logs of the values or Double.NaN if
186 * the array is empty
187 * @throws IllegalArgumentException if the array is null
188 */
189 public static double sumLog(final double[] values) {
190 return sumLog.evaluate(values);
191 }
192
193 /**
194 * Returns the sum of the natural logs of the entries in the specified portion of
195 * the input array, or <code>Double.NaN</code> if the designated subarray
196 * is empty.
197 * <p>
198 * Throws <code>IllegalArgumentException</code> if the array is null.
199 * <p>
200 * See {@link org.apache.commons.math.stat.descriptive.summary.SumOfLogs}.
201 *
202 * @param values the input array
203 * @param begin index of the first array element to include
204 * @param length the number of elements to include
205 * @return the sum of the natural logs of the values or Double.NaN if
206 * length = 0
207 * @throws IllegalArgumentException if the array is null or the array index
208 * parameters are not valid
209 */
210 public static double sumLog(final double[] values, final int begin,
211 final int length) {
212 return sumLog.evaluate(values, begin, length);
213 }
214
215 /**
216 * Returns the arithmetic mean of the entries in the input array, or
217 * <code>Double.NaN</code> if the array is empty.
218 * <p>
219 * Throws <code>IllegalArgumentException</code> if the array is null.
220 * <p>
221 * See {@link org.apache.commons.math.stat.descriptive.moment.Mean} for
222 * details on the computing algorithm.
223 *
224 * @param values the input array
225 * @return the mean of the values or Double.NaN if the array is empty
226 * @throws IllegalArgumentException if the array is null
227 */
228 public static double mean(final double[] values) {
229 return mean.evaluate(values);
230 }
231
232 /**
233 * Returns the arithmetic mean of the entries in the specified portion of
234 * the input array, or <code>Double.NaN</code> if the designated subarray
235 * is empty.
236 * <p>
237 * Throws <code>IllegalArgumentException</code> if the array is null.
238 * <p>
239 * See {@link org.apache.commons.math.stat.descriptive.moment.Mean} for
240 * details on the computing algorithm.
241 *
242 * @param values the input array
243 * @param begin index of the first array element to include
244 * @param length the number of elements to include
245 * @return the mean of the values or Double.NaN if length = 0
246 * @throws IllegalArgumentException if the array is null or the array index
247 * parameters are not valid
248 */
249 public static double mean(final double[] values, final int begin,
250 final int length) {
251 return mean.evaluate(values, begin, length);
252 }
253
254 /**
255 * Returns the geometric mean of the entries in the input array, or
256 * <code>Double.NaN</code> if the array is empty.
257 * <p>
258 * Throws <code>IllegalArgumentException</code> if the array is null.
259 * <p>
260 * See {@link org.apache.commons.math.stat.descriptive.moment.GeometricMean}
261 * for details on the computing algorithm.
262 *
263 * @param values the input array
264 * @return the geometric mean of the values or Double.NaN if the array is empty
265 * @throws IllegalArgumentException if the array is null
266 */
267 public static double geometricMean(final double[] values) {
268 return geometricMean.evaluate(values);
269 }
270
271 /**
272 * Returns the geometric mean of the entries in the specified portion of
273 * the input array, or <code>Double.NaN</code> if the designated subarray
274 * is empty.
275 * <p>
276 * Throws <code>IllegalArgumentException</code> if the array is null.
277 * <p>
278 * See {@link org.apache.commons.math.stat.descriptive.moment.GeometricMean}
279 * for details on the computing algorithm.
280 *
281 * @param values the input array
282 * @param begin index of the first array element to include
283 * @param length the number of elements to include
284 * @return the geometric mean of the values or Double.NaN if length = 0
285 * @throws IllegalArgumentException if the array is null or the array index
286 * parameters are not valid
287 */
288 public static double geometricMean(final double[] values, final int begin,
289 final int length) {
290 return geometricMean.evaluate(values, begin, length);
291 }
292
293
294 /**
295 * Returns the variance of the entries in the input array, or
296 * <code>Double.NaN</code> if the array is empty.
297 * <p>
298 * See {@link org.apache.commons.math.stat.descriptive.moment.Variance} for
299 * details on the computing algorithm.
300 * <p>
301 * Returns 0 for a single-value (i.e. length = 1) sample.
302 * <p>
303 * Throws <code>IllegalArgumentException</code> if the array is null.
304 *
305 * @param values the input array
306 * @return the variance of the values or Double.NaN if the array is empty
307 * @throws IllegalArgumentException if the array is null
308 */
309 public static double variance(final double[] values) {
310 return variance.evaluate(values);
311 }
312
313 /**
314 * Returns the variance of the entries in the specified portion of
315 * the input array, or <code>Double.NaN</code> if the designated subarray
316 * is empty.
317 * <p>
318 * See {@link org.apache.commons.math.stat.descriptive.moment.Variance} for
319 * details on the computing algorithm.
320 * <p>
321 * Returns 0 for a single-value (i.e. length = 1) sample.
322 * <p>
323 * Throws <code>IllegalArgumentException</code> if the array is null or the
324 * array index parameters are not valid.
325 *
326 * @param values the input array
327 * @param begin index of the first array element to include
328 * @param length the number of elements to include
329 * @return the variance of the values or Double.NaN if length = 0
330 * @throws IllegalArgumentException if the array is null or the array index
331 * parameters are not valid
332 */
333 public static double variance(final double[] values, final int begin,
334 final int length) {
335 return variance.evaluate(values, begin, length);
336 }
337
338 /**
339 * Returns the variance of the entries in the specified portion of
340 * the input array, using the precomputed mean value. Returns
341 * <code>Double.NaN</code> if the designated subarray is empty.
342 * <p>
343 * See {@link org.apache.commons.math.stat.descriptive.moment.Variance} for
344 * details on the computing algorithm.
345 * <p>
346 * The formula used assumes that the supplied mean value is the arithmetic
347 * mean of the sample data, not a known population parameter. This method
348 * is supplied only to save computation when the mean has already been
349 * computed.
350 * <p>
351 * Returns 0 for a single-value (i.e. length = 1) sample.
352 * <p>
353 * Throws <code>IllegalArgumentException</code> if the array is null or the
354 * array index parameters are not valid.
355 *
356 * @param values the input array
357 * @param mean the precomputed mean value
358 * @param begin index of the first array element to include
359 * @param length the number of elements to include
360 * @return the variance of the values or Double.NaN if length = 0
361 * @throws IllegalArgumentException if the array is null or the array index
362 * parameters are not valid
363 */
364 public static double variance(final double[] values, final double mean,
365 final int begin, final int length) {
366 return variance.evaluate(values, mean, begin, length);
367 }
368
369 /**
370 * Returns the variance of the entries in the input array, using the
371 * precomputed mean value. Returns <code>Double.NaN</code> if the array
372 * is empty.
373 * <p>
374 * See {@link org.apache.commons.math.stat.descriptive.moment.Variance} for
375 * details on the computing algorithm.
376 * <p>
377 * The formula used assumes that the supplied mean value is the arithmetic
378 * mean of the sample data, not a known population parameter. This method
379 * is supplied only to save computation when the mean has already been
380 * computed.
381 * <p>
382 * Returns 0 for a single-value (i.e. length = 1) sample.
383 * <p>
384 * Throws <code>IllegalArgumentException</code> if the array is null.
385 *
386 * @param values the input array
387 * @param mean the precomputed mean value
388 * @return the variance of the values or Double.NaN if the array is empty
389 * @throws IllegalArgumentException if the array is null
390 */
391 public static double variance(final double[] values, final double mean) {
392 return variance.evaluate(values, mean);
393 }
394
395 /**
396 * Returns the maximum of the entries in the input array, or
397 * <code>Double.NaN</code> if the array is empty.
398 * <p>
399 * Throws <code>IllegalArgumentException</code> if the array is null.
400 * <p>
401 * <ul>
402 * <li>The result is <code>NaN</code> iff all values are <code>NaN</code>
403 * (i.e. <code>NaN</code> values have no impact on the value of the statistic).</li>
404 * <li>If any of the values equals <code>Double.POSITIVE_INFINITY</code>,
405 * the result is <code>Double.POSITIVE_INFINITY.</code></li>
406 * </ul>
407 *
408 * @param values the input array
409 * @return the maximum of the values or Double.NaN if the array is empty
410 * @throws IllegalArgumentException if the array is null
411 */
412 public static double max(final double[] values) {
413 return max.evaluate(values);
414 }
415
416 /**
417 * Returns the maximum of the entries in the specified portion of
418 * the input array, or <code>Double.NaN</code> if the designated subarray
419 * is empty.
420 * <p>
421 * Throws <code>IllegalArgumentException</code> if the array is null or
422 * the array index parameters are not valid.
423 * <p>
424 * <ul>
425 * <li>The result is <code>NaN</code> iff all values are <code>NaN</code>
426 * (i.e. <code>NaN</code> values have no impact on the value of the statistic).</li>
427 * <li>If any of the values equals <code>Double.POSITIVE_INFINITY</code>,
428 * the result is <code>Double.POSITIVE_INFINITY.</code></li>
429 * </ul>
430 *
431 * @param values the input array
432 * @param begin index of the first array element to include
433 * @param length the number of elements to include
434 * @return the maximum of the values or Double.NaN if length = 0
435 * @throws IllegalArgumentException if the array is null or the array index
436 * parameters are not valid
437 */
438 public static double max(final double[] values, final int begin,
439 final int length) {
440 return max.evaluate(values, begin, length);
441 }
442
443 /**
444 * Returns the minimum of the entries in the input array, or
445 * <code>Double.NaN</code> if the array is empty.
446 * <p>
447 * Throws <code>IllegalArgumentException</code> if the array is null.
448 * <p>
449 * <ul>
450 * <li>The result is <code>NaN</code> iff all values are <code>NaN</code>
451 * (i.e. <code>NaN</code> values have no impact on the value of the statistic).</li>
452 * <li>If any of the values equals <code>Double.NEGATIVE_INFINITY</code>,
453 * the result is <code>Double.NEGATIVE_INFINITY.</code></li>
454 * </ul>
455 *
456 * @param values the input array
457 * @return the minimum of the values or Double.NaN if the array is empty
458 * @throws IllegalArgumentException if the array is null
459 */
460 public static double min(final double[] values) {
461 return min.evaluate(values);
462 }
463
464 /**
465 * Returns the minimum of the entries in the specified portion of
466 * the input array, or <code>Double.NaN</code> if the designated subarray
467 * is empty.
468 * <p>
469 * Throws <code>IllegalArgumentException</code> if the array is null or
470 * the array index parameters are not valid.
471 * <p>
472 * <ul>
473 * <li>The result is <code>NaN</code> iff all values are <code>NaN</code>
474 * (i.e. <code>NaN</code> values have no impact on the value of the statistic).</li>
475 * <li>If any of the values equals <code>Double.NEGATIVE_INFINITY</code>,
476 * the result is <code>Double.NEGATIVE_INFINITY.</code></li>
477 * </ul>
478 *
479 * @param values the input array
480 * @param begin index of the first array element to include
481 * @param length the number of elements to include
482 * @return the minimum of the values or Double.NaN if length = 0
483 * @throws IllegalArgumentException if the array is null or the array index
484 * parameters are not valid
485 */
486 public static double min(final double[] values, final int begin,
487 final int length) {
488 return min.evaluate(values, begin, length);
489 }
490
491 /**
492 * Returns an estimate of the <code>p</code>th percentile of the values
493 * in the <code>values</code> array.
494 * <p>
495 * <ul>
496 * <li>Returns <code>Double.NaN</code> if <code>values</code> has length
497 * <code>0</code></li>
498 * <li>Returns (for any value of <code>p</code>) <code>values[0]</code>
499 * if <code>values</code> has length <code>1</code></li>
500 * <li>Throws <code>IllegalArgumentException</code> if <code>values</code>
501 * is null or p is not a valid quantile value (p must be greater than 0
502 * and less than or equal to 100)</li>
503 * </ul>
504 * <p>
505 * See {@link org.apache.commons.math.stat.descriptive.rank.Percentile} for
506 * a description of the percentile estimation algorithm used.
507 *
508 * @param values input array of values
509 * @param p the percentile value to compute
510 * @return the percentile value or Double.NaN if the array is empty
511 * @throws IllegalArgumentException if <code>values</code> is null
512 * or p is invalid
513 */
514 public static double percentile(final double[] values, final double p) {
515 return percentile.evaluate(values,p);
516 }
517
518 /**
519 * Returns an estimate of the <code>p</code>th percentile of the values
520 * in the <code>values</code> array, starting with the element in (0-based)
521 * position <code>begin</code> in the array and including <code>length</code>
522 * values.
523 * <p>
524 * <ul>
525 * <li>Returns <code>Double.NaN</code> if <code>length = 0</code></li>
526 * <li>Returns (for any value of <code>p</code>) <code>values[begin]</code>
527 * if <code>length = 1 </code></li>
528 * <li>Throws <code>IllegalArgumentException</code> if <code>values</code>
529 * is null , <code>begin</code> or <code>length</code> is invalid, or
530 * <code>p</code> is not a valid quantile value (p must be greater than 0
531 * and less than or equal to 100)</li>
532 * </ul>
533 * <p>
534 * See {@link org.apache.commons.math.stat.descriptive.rank.Percentile} for
535 * a description of the percentile estimation algorithm used.
536 *
537 * @param values array of input values
538 * @param p the percentile to compute
539 * @param begin the first (0-based) element to include in the computation
540 * @param length the number of array elements to include
541 * @return the percentile value
542 * @throws IllegalArgumentException if the parameters are not valid or the
543 * input array is null
544 */
545 public static double percentile(final double[] values, final int begin,
546 final int length, final double p) {
547 return percentile.evaluate(values, begin, length, p);
548 }
549
550 /**
551 * Returns the sum of the (signed) differences between corresponding elements of the
552 * input arrays -- i.e., sum(sample1[i] - sample2[i]).
553 *
554 * @param sample1 the first array
555 * @param sample2 the second array
556 * @return sum of paired differences
557 * @throws IllegalArgumentException if the arrays do not have the same
558 * (positive) length
559 */
560 public static double sumDifference(final double[] sample1, final double[] sample2)
561 throws IllegalArgumentException {
562 int n = sample1.length;
563 if (n != sample2.length || n < 1) {
564 throw new IllegalArgumentException
565 ("Input arrays must have the same (positive) length.");
566 }
567 double result = 0;
568 for (int i = 0; i < n; i++) {
569 result += sample1[i] - sample2[i];
570 }
571 return result;
572 }
573
574 /**
575 * Returns the mean of the (signed) differences between corresponding elements of the
576 * input arrays -- i.e., sum(sample1[i] - sample2[i]) / sample1.length.
577 *
578 * @param sample1 the first array
579 * @param sample2 the second array
580 * @return mean of paired differences
581 * @throws IllegalArgumentException if the arrays do not have the same
582 * (positive) length
583 */
584 public static double meanDifference(final double[] sample1, final double[] sample2)
585 throws IllegalArgumentException {
586 return sumDifference(sample1, sample2) / (double) sample1.length;
587 }
588
589 /**
590 * Returns the variance of the (signed) differences between corresponding elements of the
591 * input arrays -- i.e., var(sample1[i] - sample2[i]).
592 *
593 * @param sample1 the first array
594 * @param sample2 the second array
595 * @param meanDifference the mean difference between corresponding entries
596 * @see #meanDifference(double[],double[])
597 * @return variance of paired differences
598 * @throws IllegalArgumentException if the arrays do not have the same
599 * length or their common length is less than 2.
600 */
601 public static double varianceDifference(final double[] sample1, final double[] sample2,
602 double meanDifference) throws IllegalArgumentException {
603 double sum1 = 0d;
604 double sum2 = 0d;
605 double diff = 0d;
606 int n = sample1.length;
607 if (n < 2 || n != sample2.length) {
608 throw new IllegalArgumentException("Input array lengths must be equal and at least 2.");
609 }
610 for (int i = 0; i < n; i++) {
611 diff = sample1[i] - sample2[i];
612 sum1 += (diff - meanDifference) *(diff - meanDifference);
613 sum2 += diff - meanDifference;
614 }
615 return (sum1 - (sum2 * sum2 / (double) n)) / (double) (n - 1);
616 }
617
618 }