View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.math.stat;
18  
19  import org.apache.commons.math.MathRuntimeException;
20  import org.apache.commons.math.stat.descriptive.UnivariateStatistic;
21  import org.apache.commons.math.stat.descriptive.moment.GeometricMean;
22  import org.apache.commons.math.stat.descriptive.moment.Mean;
23  import org.apache.commons.math.stat.descriptive.moment.Variance;
24  import org.apache.commons.math.stat.descriptive.rank.Max;
25  import org.apache.commons.math.stat.descriptive.rank.Min;
26  import org.apache.commons.math.stat.descriptive.rank.Percentile;
27  import org.apache.commons.math.stat.descriptive.summary.Product;
28  import org.apache.commons.math.stat.descriptive.summary.Sum;
29  import org.apache.commons.math.stat.descriptive.summary.SumOfLogs;
30  import org.apache.commons.math.stat.descriptive.summary.SumOfSquares;
31  
32  /**
33   * StatUtils provides static methods for computing statistics based on data
34   * stored in double[] arrays. 
35   * 
36   * @version $Revision: 772119 $ $Date: 2009-05-06 05:43:28 -0400 (Wed, 06 May 2009) $
37   */
38  public final class StatUtils {
39  
40      /** sum */
41      private static final UnivariateStatistic sum = new Sum();
42  
43      /** sumSq */
44      private static final UnivariateStatistic sumSq = new SumOfSquares();
45  
46      /** prod */
47      private static final UnivariateStatistic prod = new Product();
48  
49      /** sumLog */
50      private static final UnivariateStatistic sumLog = new SumOfLogs();
51  
52      /** min */
53      private static final UnivariateStatistic min = new Min();
54  
55      /** max */
56      private static final UnivariateStatistic max = new Max();
57  
58      /** mean */
59      private static final UnivariateStatistic mean = new Mean();
60  
61      /** variance */
62      private static final Variance variance = new Variance();
63  
64      /** percentile */
65      private static final Percentile percentile = new Percentile();
66      
67      /** geometric mean */
68      private static final GeometricMean geometricMean = new GeometricMean();
69  
70      /**
71       * Private Constructor
72       */
73      private StatUtils() {
74      }
75  
76      /**
77       * Returns the sum of the values in the input array, or
78       * <code>Double.NaN</code> if the array is empty.
79       * <p>
80       * Throws <code>IllegalArgumentException</code> if the input array
81       * is null.</p>
82       * 
83       * @param values  array of values to sum
84       * @return the sum of the values or <code>Double.NaN</code> if the array
85       * is empty
86       * @throws IllegalArgumentException if the array is null
87       */
88      public static double sum(final double[] values) {
89          return sum.evaluate(values);
90      }
91  
92      /**
93       * Returns the sum of the entries in the specified portion of
94       * the input array, or <code>Double.NaN</code> if the designated subarray
95       * is empty.
96       * <p>
97       * Throws <code>IllegalArgumentException</code> if the array is null.</p>
98       * 
99       * @param values the input array
100      * @param begin index of the first array element to include
101      * @param length the number of elements to include
102      * @return the sum of the values or Double.NaN if length = 0
103      * @throws IllegalArgumentException if the array is null or the array index
104      *  parameters are not valid
105      */
106     public static double sum(final double[] values, final int begin, 
107             final int length) {
108         return sum.evaluate(values, begin, length);
109     }
110 
111     /**
112      * Returns the sum of the squares of the entries in the input array, or 
113      * <code>Double.NaN</code> if the array is empty.
114      * <p>
115      * Throws <code>IllegalArgumentException</code> if the array is null.</p>
116      * 
117      * @param values  input array
118      * @return the sum of the squared values or <code>Double.NaN</code> if the
119      * array is empty
120      * @throws IllegalArgumentException if the array is null
121      */
122     public static double sumSq(final double[] values) {
123         return sumSq.evaluate(values);
124     }
125 
126     /**
127      * Returns the sum of the squares of the entries in the specified portion of
128      * the input array, or <code>Double.NaN</code> if the designated subarray
129      * is empty.
130      * <p>
131      * Throws <code>IllegalArgumentException</code> if the array is null.</p>
132      * 
133      * @param values the input array
134      * @param begin index of the first array element to include
135      * @param length the number of elements to include
136      * @return the sum of the squares of the values or Double.NaN if length = 0
137      * @throws IllegalArgumentException if the array is null or the array index
138      * parameters are not valid
139      */
140     public static double sumSq(final double[] values, final int begin,
141             final int length) {
142         return sumSq.evaluate(values, begin, length);
143     }
144 
145     /**
146      * Returns the product of the entries in the input array, or 
147      * <code>Double.NaN</code> if the array is empty.
148      * <p>
149      * Throws <code>IllegalArgumentException</code> if the array is null.</p>
150      * 
151      * @param values the input array
152      * @return the product of the values or Double.NaN if the array is empty
153      * @throws IllegalArgumentException if the array is null
154      */
155     public static double product(final double[] values) {
156         return prod.evaluate(values);
157     }
158 
159     /**
160      * Returns the product of the entries in the specified portion of
161      * the input array, or <code>Double.NaN</code> if the designated subarray
162      * is empty.
163      * <p>
164      * Throws <code>IllegalArgumentException</code> if the array is null.</p>
165      * 
166      * @param values the input array
167      * @param begin index of the first array element to include
168      * @param length the number of elements to include
169      * @return the product of the values or Double.NaN if length = 0
170      * @throws IllegalArgumentException if the array is null or the array index
171      * parameters are not valid
172      */
173     public static double product(final double[] values, final int begin,
174             final int length) {
175         return prod.evaluate(values, begin, length);
176     }
177 
178     /**
179      * Returns the sum of the natural logs of the entries in the input array, or 
180      * <code>Double.NaN</code> if the array is empty.
181      * <p>
182      * Throws <code>IllegalArgumentException</code> if the array is null.</p>
183      * <p>
184      * See {@link org.apache.commons.math.stat.descriptive.summary.SumOfLogs}.
185      * </p>
186      * 
187      * @param values the input array
188      * @return the sum of the natural logs of the values or Double.NaN if 
189      * the array is empty
190      * @throws IllegalArgumentException if the array is null
191      */
192     public static double sumLog(final double[] values) {
193         return sumLog.evaluate(values);
194     }
195 
196     /**
197      * Returns the sum of the natural logs of the entries in the specified portion of
198      * the input array, or <code>Double.NaN</code> if the designated subarray
199      * is empty.
200      * <p>
201      * Throws <code>IllegalArgumentException</code> if the array is null.</p>
202      * <p>
203      * See {@link org.apache.commons.math.stat.descriptive.summary.SumOfLogs}.
204      * </p>
205      * 
206      * @param values the input array
207      * @param begin index of the first array element to include
208      * @param length the number of elements to include
209      * @return the sum of the natural logs of the values or Double.NaN if 
210      * length = 0
211      * @throws IllegalArgumentException if the array is null or the array index
212      * parameters are not valid
213      */
214     public static double sumLog(final double[] values, final int begin,
215             final int length) {
216         return sumLog.evaluate(values, begin, length);
217     }
218 
219     /**
220      * Returns the arithmetic mean of the entries in the input array, or 
221      * <code>Double.NaN</code> if the array is empty.
222      * <p>
223      * Throws <code>IllegalArgumentException</code> if the array is null.</p>
224      * <p>
225      * See {@link org.apache.commons.math.stat.descriptive.moment.Mean} for
226      * details on the computing algorithm.</p>
227      * 
228      * @param values the input array
229      * @return the mean of the values or Double.NaN if the array is empty
230      * @throws IllegalArgumentException if the array is null
231      */
232     public static double mean(final double[] values) {
233         return mean.evaluate(values);
234     }
235 
236     /**
237      * Returns the arithmetic mean of the entries in the specified portion of
238      * the input array, or <code>Double.NaN</code> if the designated subarray
239      * is empty.
240      * <p>
241      * Throws <code>IllegalArgumentException</code> if the array is null.</p>
242      * <p>
243      * See {@link org.apache.commons.math.stat.descriptive.moment.Mean} for
244      * details on the computing algorithm.</p>
245      * 
246      * @param values the input array
247      * @param begin index of the first array element to include
248      * @param length the number of elements to include
249      * @return the mean of the values or Double.NaN if length = 0
250      * @throws IllegalArgumentException if the array is null or the array index
251      * parameters are not valid
252      */
253     public static double mean(final double[] values, final int begin,
254             final int length) {
255         return mean.evaluate(values, begin, length);
256     }
257     
258     /**
259      * Returns the geometric mean of the entries in the input array, or 
260      * <code>Double.NaN</code> if the array is empty.
261      * <p>
262      * Throws <code>IllegalArgumentException</code> if the array is null.</p>
263      * <p>
264      * See {@link org.apache.commons.math.stat.descriptive.moment.GeometricMean}
265      * for details on the computing algorithm.</p>
266      * 
267      * @param values the input array
268      * @return the geometric mean of the values or Double.NaN if the array is empty
269      * @throws IllegalArgumentException if the array is null
270      */
271     public static double geometricMean(final double[] values) {
272         return geometricMean.evaluate(values);
273     }
274 
275     /**
276      * Returns the geometric mean of the entries in the specified portion of
277      * the input array, or <code>Double.NaN</code> if the designated subarray
278      * is empty.
279      * <p>
280      * Throws <code>IllegalArgumentException</code> if the array is null.</p>
281      * <p>
282      * See {@link org.apache.commons.math.stat.descriptive.moment.GeometricMean}
283      * for details on the computing algorithm.</p>
284      * 
285      * @param values the input array
286      * @param begin index of the first array element to include
287      * @param length the number of elements to include
288      * @return the geometric mean of the values or Double.NaN if length = 0
289      * @throws IllegalArgumentException if the array is null or the array index
290      * parameters are not valid
291      */
292     public static double geometricMean(final double[] values, final int begin,
293             final int length) {
294         return geometricMean.evaluate(values, begin, length);
295     }
296     
297 
298     /**
299      * Returns the variance of the entries in the input array, or 
300      * <code>Double.NaN</code> if the array is empty.
301      * <p>
302      * See {@link org.apache.commons.math.stat.descriptive.moment.Variance} for
303      * details on the computing algorithm.</p>
304      * <p>
305      * Returns 0 for a single-value (i.e. length = 1) sample.</p>
306      * <p>
307      * Throws <code>IllegalArgumentException</code> if the array is null.</p>
308      * 
309      * @param values the input array
310      * @return the variance of the values or Double.NaN if the array is empty
311      * @throws IllegalArgumentException if the array is null
312      */
313     public static double variance(final double[] values) {
314         return variance.evaluate(values);
315     }
316 
317     /**
318      * Returns the variance of the entries in the specified portion of
319      * the input array, or <code>Double.NaN</code> if the designated subarray
320      * is empty.
321      * <p>
322      * See {@link org.apache.commons.math.stat.descriptive.moment.Variance} for
323      * details on the computing algorithm.</p>
324      * <p>
325      * Returns 0 for a single-value (i.e. length = 1) sample.</p>
326      * <p>
327      * Throws <code>IllegalArgumentException</code> if the array is null or the
328      * array index parameters are not valid.</p>
329      * 
330      * @param values the input array
331      * @param begin index of the first array element to include
332      * @param length the number of elements to include
333      * @return the variance of the values or Double.NaN if length = 0
334      * @throws IllegalArgumentException if the array is null or the array index
335      *  parameters are not valid
336      */
337     public static double variance(final double[] values, final int begin,
338             final int length) {
339         return variance.evaluate(values, begin, length);
340     }
341     
342     /**
343      * Returns the variance of the entries in the specified portion of
344      * the input array, using the precomputed mean value.  Returns 
345      * <code>Double.NaN</code> if the designated subarray is empty.
346      * <p>
347      * See {@link org.apache.commons.math.stat.descriptive.moment.Variance} for
348      * details on the computing algorithm.</p>
349      * <p>
350      * The formula used assumes that the supplied mean value is the arithmetic
351      * mean of the sample data, not a known population parameter.  This method
352      * is supplied only to save computation when the mean has already been
353      * computed.</p>
354      * <p>
355      * Returns 0 for a single-value (i.e. length = 1) sample.</p>
356      * <p>
357      * Throws <code>IllegalArgumentException</code> if the array is null or the
358      * array index parameters are not valid.</p>
359      * 
360      * @param values the input array
361      * @param mean the precomputed mean value
362      * @param begin index of the first array element to include
363      * @param length the number of elements to include
364      * @return the variance of the values or Double.NaN if length = 0
365      * @throws IllegalArgumentException if the array is null or the array index
366      *  parameters are not valid
367      */
368     public static double variance(final double[] values, final double mean, 
369             final int begin, final int length) {
370         return variance.evaluate(values, mean, begin, length);    
371     }
372     
373     /**
374      * Returns the variance of the entries in the input array, using the
375      * precomputed mean value.  Returns <code>Double.NaN</code> if the array
376      * is empty.  
377      * <p>
378      * See {@link org.apache.commons.math.stat.descriptive.moment.Variance} for
379      * details on the computing algorithm.</p>  
380      * <p>
381      * The formula used assumes that the supplied mean value is the arithmetic
382      * mean of the sample data, not a known population parameter.  This method
383      * is supplied only to save computation when the mean has already been
384      * computed.</p>
385      * <p>
386      * Returns 0 for a single-value (i.e. length = 1) sample.</p>
387      * <p>
388      * Throws <code>IllegalArgumentException</code> if the array is null.</p>
389      * 
390      * @param values the input array
391      * @param mean the precomputed mean value
392      * @return the variance of the values or Double.NaN if the array is empty
393      * @throws IllegalArgumentException if the array is null
394      */
395     public static double variance(final double[] values, final double mean) {
396         return variance.evaluate(values, mean);    
397     }
398 
399     /**
400      * Returns the maximum of the entries in the input array, or 
401      * <code>Double.NaN</code> if the array is empty.
402      * <p>
403      * Throws <code>IllegalArgumentException</code> if the array is null.</p>
404      * <p>
405      * <ul>
406      * <li>The result is <code>NaN</code> iff all values are <code>NaN</code> 
407      * (i.e. <code>NaN</code> values have no impact on the value of the statistic).</li>
408      * <li>If any of the values equals <code>Double.POSITIVE_INFINITY</code>, 
409      * the result is <code>Double.POSITIVE_INFINITY.</code></li>
410      * </ul></p>
411      * 
412      * @param values the input array
413      * @return the maximum of the values or Double.NaN if the array is empty
414      * @throws IllegalArgumentException if the array is null
415      */
416     public static double max(final double[] values) {
417         return max.evaluate(values);
418     }
419 
420     /**
421      * Returns the maximum of the entries in the specified portion of
422      * the input array, or <code>Double.NaN</code> if the designated subarray
423      * is empty.
424      * <p>
425      * Throws <code>IllegalArgumentException</code> if the array is null or
426      * the array index parameters are not valid.</p>
427      * <p>
428      * <ul>
429      * <li>The result is <code>NaN</code> iff all values are <code>NaN</code> 
430      * (i.e. <code>NaN</code> values have no impact on the value of the statistic).</li>
431      * <li>If any of the values equals <code>Double.POSITIVE_INFINITY</code>, 
432      * the result is <code>Double.POSITIVE_INFINITY.</code></li>
433      * </ul></p>
434      * 
435      * @param values the input array
436      * @param begin index of the first array element to include
437      * @param length the number of elements to include
438      * @return the maximum of the values or Double.NaN if length = 0
439      * @throws IllegalArgumentException if the array is null or the array index
440      * parameters are not valid
441      */
442     public static double max(final double[] values, final int begin,
443             final int length) {
444         return max.evaluate(values, begin, length);
445     }
446 
447      /**
448      * Returns the minimum of the entries in the input array, or 
449      * <code>Double.NaN</code> if the array is empty.
450      * <p>
451      * Throws <code>IllegalArgumentException</code> if the array is null.</p>
452      * <p>
453      * <ul>
454      * <li>The result is <code>NaN</code> iff all values are <code>NaN</code> 
455      * (i.e. <code>NaN</code> values have no impact on the value of the statistic).</li>
456      * <li>If any of the values equals <code>Double.NEGATIVE_INFINITY</code>, 
457      * the result is <code>Double.NEGATIVE_INFINITY.</code></li>
458      * </ul> </p>
459      * 
460      * @param values the input array
461      * @return the minimum of the values or Double.NaN if the array is empty
462      * @throws IllegalArgumentException if the array is null
463      */
464     public static double min(final double[] values) {
465         return min.evaluate(values);
466     }
467 
468      /**
469      * Returns the minimum of the entries in the specified portion of
470      * the input array, or <code>Double.NaN</code> if the designated subarray
471      * is empty.
472      * <p>
473      * Throws <code>IllegalArgumentException</code> if the array is null or
474      * the array index parameters are not valid.</p>
475      * <p>
476      * <ul>
477      * <li>The result is <code>NaN</code> iff all values are <code>NaN</code> 
478      * (i.e. <code>NaN</code> values have no impact on the value of the statistic).</li>
479      * <li>If any of the values equals <code>Double.NEGATIVE_INFINITY</code>, 
480      * the result is <code>Double.NEGATIVE_INFINITY.</code></li>
481      * </ul></p>
482      * 
483      * @param values the input array
484      * @param begin index of the first array element to include
485      * @param length the number of elements to include
486      * @return the minimum of the values or Double.NaN if length = 0
487      * @throws IllegalArgumentException if the array is null or the array index
488      * parameters are not valid
489      */
490     public static double min(final double[] values, final int begin,
491             final int length) {
492         return min.evaluate(values, begin, length);
493     }
494     
495     /**
496      * Returns an estimate of the <code>p</code>th percentile of the values
497      * in the <code>values</code> array.
498      * <p>
499      * <ul>
500      * <li>Returns <code>Double.NaN</code> if <code>values</code> has length 
501      * <code>0</code></li></p>
502      * <li>Returns (for any value of <code>p</code>) <code>values[0]</code>
503      *  if <code>values</code> has length <code>1</code></li>
504      * <li>Throws <code>IllegalArgumentException</code> if <code>values</code>
505      * is null  or p is not a valid quantile value (p must be greater than 0
506      * and less than or equal to 100)</li>
507      * </ul></p>
508      * <p>
509      * See {@link org.apache.commons.math.stat.descriptive.rank.Percentile} for
510      * a description of the percentile estimation algorithm used.</p>
511      * 
512      * @param values input array of values
513      * @param p the percentile value to compute
514      * @return the percentile value or Double.NaN if the array is empty
515      * @throws IllegalArgumentException if <code>values</code> is null 
516      * or p is invalid
517      */
518     public static double percentile(final double[] values, final double p) {
519             return percentile.evaluate(values,p);
520     }
521 
522      /**
523      * Returns an estimate of the <code>p</code>th percentile of the values
524      * in the <code>values</code> array, starting with the element in (0-based)
525      * position <code>begin</code> in the array and including <code>length</code>
526      * values.
527      * <p>
528      * <ul>
529      * <li>Returns <code>Double.NaN</code> if <code>length = 0</code></li>
530      * <li>Returns (for any value of <code>p</code>) <code>values[begin]</code>
531      *  if <code>length = 1 </code></li>
532      * <li>Throws <code>IllegalArgumentException</code> if <code>values</code>
533      *  is null , <code>begin</code> or <code>length</code> is invalid, or 
534      * <code>p</code> is not a valid quantile value (p must be greater than 0
535      * and less than or equal to 100)</li>
536      * </ul></p>
537      * <p>
538       * See {@link org.apache.commons.math.stat.descriptive.rank.Percentile} for
539       * a description of the percentile estimation algorithm used.</p>
540      * 
541      * @param values array of input values
542      * @param p  the percentile to compute
543      * @param begin  the first (0-based) element to include in the computation
544      * @param length  the number of array elements to include
545      * @return  the percentile value
546      * @throws IllegalArgumentException if the parameters are not valid or the
547      * input array is null
548      */
549     public static double percentile(final double[] values, final int begin, 
550             final int length, final double p) {
551         return percentile.evaluate(values, begin, length, p);
552     }   
553     
554     /**
555      * Returns the sum of the (signed) differences between corresponding elements of the
556      * input arrays -- i.e., sum(sample1[i] - sample2[i]).
557      * 
558      * @param sample1  the first array
559      * @param sample2  the second array
560      * @return sum of paired differences
561      * @throws IllegalArgumentException if the arrays do not have the same
562      * (positive) length
563      */
564     public static double sumDifference(final double[] sample1, final double[] sample2)
565         throws IllegalArgumentException {
566         int n = sample1.length;
567         if ((n  != sample2.length) || (n < 1)) {
568             throw MathRuntimeException.createIllegalArgumentException(
569                   "input arrays must have the same positive length ({0} and {1})",
570                   n, sample2.length);
571         }
572         double result = 0;
573         for (int i = 0; i < n; i++) {
574             result += sample1[i] - sample2[i];
575         }
576         return result;
577     }
578     
579     /**
580      * Returns the mean of the (signed) differences between corresponding elements of the
581      * input arrays -- i.e., sum(sample1[i] - sample2[i]) / sample1.length.
582      * 
583      * @param sample1  the first array
584      * @param sample2  the second array
585      * @return mean of paired differences
586      * @throws IllegalArgumentException if the arrays do not have the same
587      * (positive) length
588      */
589     public static double meanDifference(final double[] sample1, final double[] sample2)
590     throws IllegalArgumentException {
591         return sumDifference(sample1, sample2) / sample1.length;
592     }
593     
594     /**
595      * Returns the variance of the (signed) differences between corresponding elements of the
596      * input arrays -- i.e., var(sample1[i] - sample2[i]).
597      * 
598      * @param sample1  the first array
599      * @param sample2  the second array
600      * @param meanDifference   the mean difference between corresponding entries 
601      * @see #meanDifference(double[],double[])
602      * @return variance of paired differences
603      * @throws IllegalArgumentException if the arrays do not have the same
604      * length or their common length is less than 2.
605      */
606     public static double varianceDifference(final double[] sample1, final double[] sample2, 
607             double meanDifference)  throws IllegalArgumentException {
608         double sum1 = 0d;
609         double sum2 = 0d;
610         double diff = 0d;
611         int n = sample1.length;
612         if (n < 2 || n != sample2.length) {
613             throw MathRuntimeException.createIllegalArgumentException(
614                   "input arrays must have the same length and at least two elements ({0} and {1})",
615                   n, sample2.length);
616         }
617         for (int i = 0; i < n; i++) {
618             diff = sample1[i] - sample2[i];
619             sum1 += (diff - meanDifference) *(diff - meanDifference);
620             sum2 += diff - meanDifference;
621         }
622         return (sum1 - (sum2 * sum2 / n)) / (n - 1);
623     }      
624     
625 }