View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.math.stat.descriptive;
18  
19  import java.io.Serializable;
20  import java.lang.reflect.InvocationTargetException;
21  import java.util.Arrays;
22  
23  import org.apache.commons.math.MathRuntimeException;
24  import org.apache.commons.math.stat.descriptive.moment.GeometricMean;
25  import org.apache.commons.math.stat.descriptive.moment.Kurtosis;
26  import org.apache.commons.math.stat.descriptive.moment.Mean;
27  import org.apache.commons.math.stat.descriptive.moment.Skewness;
28  import org.apache.commons.math.stat.descriptive.moment.Variance;
29  import org.apache.commons.math.stat.descriptive.rank.Max;
30  import org.apache.commons.math.stat.descriptive.rank.Min;
31  import org.apache.commons.math.stat.descriptive.rank.Percentile;
32  import org.apache.commons.math.stat.descriptive.summary.Sum;
33  import org.apache.commons.math.stat.descriptive.summary.SumOfSquares;
34  import org.apache.commons.math.util.ResizableDoubleArray;
35  
36  
37  /**
38   * Maintains a dataset of values of a single variable and computes descriptive
39   * statistics based on stored data. The {@link #getWindowSize() windowSize}
40   * property sets a limit on the number of values that can be stored in the 
41   * dataset.  The default value, INFINITE_WINDOW, puts no limit on the size of
42   * the dataset.  This value should be used with caution, as the backing store
43   * will grow without bound in this case.  For very large datasets, 
44   * {@link SummaryStatistics}, which does not store the dataset, should be used
45   * instead of this class. If <code>windowSize</code> is not INFINITE_WINDOW and
46   * more values are added than can be stored in the dataset, new values are
47   * added in a "rolling" manner, with new values replacing the "oldest" values 
48   * in the dataset.
49   * 
50   * <p>Note: this class is not threadsafe.  Use 
51   * {@link SynchronizedDescriptiveStatistics} if concurrent access from multiple
52   * threads is required.</p>
53   *
54   * @version $Revision: 772119 $ $Date: 2009-05-06 05:43:28 -0400 (Wed, 06 May 2009) $
55   */
56  public class DescriptiveStatistics implements StatisticalSummary, Serializable {
57      
58      /** Serialization UID */
59      private static final long serialVersionUID = 4133067267405273064L;
60  
61      /** hold the window size **/
62      protected int windowSize = INFINITE_WINDOW;
63      
64      /** 
65       *  Stored data values
66       */
67      protected ResizableDoubleArray eDA = new ResizableDoubleArray();
68    
69      /** Mean statistic implementation - can be reset by setter. */
70      private UnivariateStatistic meanImpl = new Mean();
71      
72      /** Geometric mean statistic implementation - can be reset by setter. */
73      private UnivariateStatistic geometricMeanImpl = new GeometricMean();
74      
75      /** Kurtosis statistic implementation - can be reset by setter. */
76      private UnivariateStatistic kurtosisImpl = new Kurtosis();
77      
78      /** Maximum statistic implementation - can be reset by setter. */
79      private UnivariateStatistic maxImpl = new Max();
80      
81      /** Minimum statistic implementation - can be reset by setter. */
82      private UnivariateStatistic minImpl = new Min();
83      
84      /** Percentile statistic implementation - can be reset by setter. */
85      private UnivariateStatistic percentileImpl = new Percentile();
86      
87      /** Skewness statistic implementation - can be reset by setter. */
88      private UnivariateStatistic skewnessImpl = new Skewness();
89      
90      /** Variance statistic implementation - can be reset by setter. */
91      private UnivariateStatistic varianceImpl = new Variance();
92      
93      /** Sum of squares statistic implementation - can be reset by setter. */
94      private UnivariateStatistic sumsqImpl = new SumOfSquares();
95      
96      /** Sum statistic implementation - can be reset by setter. */
97      private UnivariateStatistic sumImpl = new Sum();
98      
99      /**
100      * Construct a DescriptiveStatistics instance with an infinite window
101      */
102     public DescriptiveStatistics() {
103     }
104     
105     /**
106      * Construct a DescriptiveStatistics instance with the specified window
107      * 
108      * @param window the window size.
109      */
110     public DescriptiveStatistics(int window) {
111         setWindowSize(window);
112     }
113     
114     /**
115      * Copy constructor.  Construct a new DescriptiveStatistics instance that
116      * is a copy of original.
117      * 
118      * @param original DescriptiveStatistics instance to copy
119      */
120     public DescriptiveStatistics(DescriptiveStatistics original) {
121         copy(original, this);
122     }
123     
124     /**
125      * Represents an infinite window size.  When the {@link #getWindowSize()}
126      * returns this value, there is no limit to the number of data values
127      * that can be stored in the dataset.
128      */
129     public static final int INFINITE_WINDOW = -1;
130 
131     /**
132      * Adds the value to the dataset. If the dataset is at the maximum size
133      * (i.e., the number of stored elements equals the currently configured
134      * windowSize), the first (oldest) element in the dataset is discarded
135      * to make room for the new value.
136      * 
137      * @param v the value to be added 
138      */
139     public void addValue(double v) {
140         if (windowSize != INFINITE_WINDOW) {
141             if (getN() == windowSize) {
142                 eDA.addElementRolling(v);
143             } else if (getN() < windowSize) {
144                 eDA.addElement(v);
145             }
146         } else {
147             eDA.addElement(v);
148         }
149     }
150 
151     /**
152      * Removes the most recent value from the dataset.
153      */
154     public void removeMostRecentValue() {
155         eDA.discardMostRecentElements(1);
156     }
157 
158     /**
159      * Replaces the most recently stored value with the given value.
160      * There must be at least one element stored to call this method.
161      * 
162      * @param v the value to replace the most recent stored value
163      * @return replaced value
164      */
165     public double replaceMostRecentValue(double v) {
166         return eDA.substituteMostRecentElement(v);
167     }
168 
169     /** 
170      * Returns the <a href="http://www.xycoon.com/arithmetic_mean.htm">
171      * arithmetic mean </a> of the available values 
172      * @return The mean or Double.NaN if no values have been added.
173      */
174     public double getMean() {
175         return apply(meanImpl);
176     }
177 
178     /** 
179      * Returns the <a href="http://www.xycoon.com/geometric_mean.htm">
180      * geometric mean </a> of the available values
181      * @return The geometricMean, Double.NaN if no values have been added, 
182      * or if the product of the available values is less than or equal to 0.
183      */
184     public double getGeometricMean() {
185         return apply(geometricMeanImpl);
186     }
187 
188     /** 
189      * Returns the variance of the available values.
190      * @return The variance, Double.NaN if no values have been added 
191      * or 0.0 for a single value set.  
192      */
193     public double getVariance() {
194         return apply(varianceImpl);
195     }
196 
197     /** 
198      * Returns the standard deviation of the available values.
199      * @return The standard deviation, Double.NaN if no values have been added 
200      * or 0.0 for a single value set. 
201      */
202     public double getStandardDeviation() {
203         double stdDev = Double.NaN;
204         if (getN() > 0) {
205             if (getN() > 1) {
206                 stdDev = Math.sqrt(getVariance());
207             } else {
208                 stdDev = 0.0;
209             }
210         }
211         return (stdDev);
212     }
213 
214     /**
215      * Returns the skewness of the available values. Skewness is a 
216      * measure of the asymmetry of a given distribution.
217      * @return The skewness, Double.NaN if no values have been added 
218      * or 0.0 for a value set &lt;=2. 
219      */
220     public double getSkewness() {
221         return apply(skewnessImpl);
222     }
223 
224     /**
225      * Returns the Kurtosis of the available values. Kurtosis is a 
226      * measure of the "peakedness" of a distribution
227      * @return The kurtosis, Double.NaN if no values have been added, or 0.0 
228      * for a value set &lt;=3. 
229      */
230     public double getKurtosis() {
231         return apply(kurtosisImpl);
232     }
233 
234     /** 
235      * Returns the maximum of the available values
236      * @return The max or Double.NaN if no values have been added.
237      */
238     public double getMax() {
239         return apply(maxImpl);
240     }
241 
242     /** 
243     * Returns the minimum of the available values
244     * @return The min or Double.NaN if no values have been added.
245     */
246     public double getMin() {
247         return apply(minImpl);
248     }
249 
250     /** 
251      * Returns the number of available values
252      * @return The number of available values
253      */
254     public long getN() {
255         return eDA.getNumElements();
256     }
257 
258     /**
259      * Returns the sum of the values that have been added to Univariate.
260      * @return The sum or Double.NaN if no values have been added
261      */
262     public double getSum() {
263         return apply(sumImpl);
264     }
265 
266     /**
267      * Returns the sum of the squares of the available values.
268      * @return The sum of the squares or Double.NaN if no 
269      * values have been added.
270      */
271     public double getSumsq() {
272         return apply(sumsqImpl);
273     }
274 
275     /** 
276      * Resets all statistics and storage
277      */
278     public void clear() {
279         eDA.clear();
280     }
281 
282 
283     /**
284      * Returns the maximum number of values that can be stored in the
285      * dataset, or INFINITE_WINDOW (-1) if there is no limit.
286      * 
287      * @return The current window size or -1 if its Infinite.
288      */
289     public int getWindowSize() {
290         return windowSize;
291     }
292 
293     /**
294      * WindowSize controls the number of values which contribute 
295      * to the reported statistics.  For example, if 
296      * windowSize is set to 3 and the values {1,2,3,4,5} 
297      * have been added <strong> in that order</strong> 
298      * then the <i>available values</i> are {3,4,5} and all
299      * reported statistics will be based on these values
300      * @param windowSize sets the size of the window.
301      */
302     public void setWindowSize(int windowSize) {
303         if (windowSize < 1) {
304             if (windowSize != INFINITE_WINDOW) {
305                 throw MathRuntimeException.createIllegalArgumentException(
306                       "window size must be positive ({0})", windowSize);
307             }
308         }
309         
310         this.windowSize = windowSize;
311 
312         // We need to check to see if we need to discard elements
313         // from the front of the array.  If the windowSize is less than 
314         // the current number of elements.
315         if (windowSize != INFINITE_WINDOW && windowSize < eDA.getNumElements()) {
316             eDA.discardFrontElements(eDA.getNumElements() - windowSize);
317         }
318     }
319     
320     /**
321      * Returns the current set of values in an array of double primitives.  
322      * The order of addition is preserved.  The returned array is a fresh
323      * copy of the underlying data -- i.e., it is not a reference to the
324      * stored data.
325      * 
326      * @return returns the current set of numbers in the order in which they 
327      *         were added to this set
328      */
329     public double[] getValues() {
330         return eDA.getElements();
331     }
332 
333     /**
334      * Returns the current set of values in an array of double primitives,  
335      * sorted in ascending order.  The returned array is a fresh
336      * copy of the underlying data -- i.e., it is not a reference to the
337      * stored data.
338      * @return returns the current set of 
339      * numbers sorted in ascending order        
340      */
341     public double[] getSortedValues() {
342         double[] sort = getValues();
343         Arrays.sort(sort);
344         return sort;
345     }
346 
347     /**
348      * Returns the element at the specified index
349      * @param index The Index of the element
350      * @return return the element at the specified index
351      */
352     public double getElement(int index) {
353         return eDA.getElement(index);
354     }
355 
356     /**
357      * Returns an estimate for the pth percentile of the stored values. 
358      * <p>
359      * The implementation provided here follows the first estimation procedure presented
360      * <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc252.htm">here.</a>
361      * </p><p>
362      * <strong>Preconditions</strong>:<ul>
363      * <li><code>0 &lt; p &le; 100</code> (otherwise an 
364      * <code>IllegalArgumentException</code> is thrown)</li>
365      * <li>at least one value must be stored (returns <code>Double.NaN
366      *     </code> otherwise)</li>
367      * </ul></p>
368      * 
369      * @param p the requested percentile (scaled from 0 - 100)
370      * @return An estimate for the pth percentile of the stored data 
371      * @throws IllegalStateException if percentile implementation has been
372      *  overridden and the supplied implementation does not support setQuantile
373      * values
374      */
375     public double getPercentile(double p) {
376         if (percentileImpl instanceof Percentile) {
377             ((Percentile) percentileImpl).setQuantile(p);
378         } else {
379             try {
380                 percentileImpl.getClass().getMethod("setQuantile", 
381                         new Class[] {Double.TYPE}).invoke(percentileImpl,
382                                 new Object[] {Double.valueOf(p)});
383             } catch (NoSuchMethodException e1) { // Setter guard should prevent
384                 throw MathRuntimeException.createIllegalArgumentException(
385                       "percentile implementation {0} does not support setQuantile",
386                       percentileImpl.getClass().getName());
387             } catch (IllegalAccessException e2) {
388                 throw MathRuntimeException.createIllegalArgumentException(
389                       "cannot access setQuantile method in percentile implementation {0}",
390                       percentileImpl.getClass().getName());
391             } catch (InvocationTargetException e3) {
392                 throw MathRuntimeException.createIllegalArgumentException(e3.getCause()); 
393             }
394         }
395         return apply(percentileImpl);
396     }
397     
398     /**
399      * Generates a text report displaying univariate statistics from values
400      * that have been added.  Each statistic is displayed on a separate
401      * line.
402      * 
403      * @return String with line feeds displaying statistics
404      */
405     @Override
406     public String toString() {
407         StringBuffer outBuffer = new StringBuffer();
408         String endl = "\n";
409         outBuffer.append("DescriptiveStatistics:").append(endl);
410         outBuffer.append("n: ").append(getN()).append(endl);
411         outBuffer.append("min: ").append(getMin()).append(endl);
412         outBuffer.append("max: ").append(getMax()).append(endl);
413         outBuffer.append("mean: ").append(getMean()).append(endl);
414         outBuffer.append("std dev: ").append(getStandardDeviation())
415             .append(endl);
416         outBuffer.append("median: ").append(getPercentile(50)).append(endl);
417         outBuffer.append("skewness: ").append(getSkewness()).append(endl);
418         outBuffer.append("kurtosis: ").append(getKurtosis()).append(endl);
419         return outBuffer.toString();
420     }
421     
422     /**
423      * Apply the given statistic to the data associated with this set of statistics.
424      * @param stat the statistic to apply
425      * @return the computed value of the statistic.
426      */
427     public double apply(UnivariateStatistic stat) {
428         return stat.evaluate(eDA.getInternalValues(), eDA.start(), eDA.getNumElements());
429     }
430 
431     // Implementation getters and setter
432     
433     /**
434      * Returns the currently configured mean implementation.
435      * 
436      * @return the UnivariateStatistic implementing the mean
437      * @since 1.2
438      */
439     public synchronized UnivariateStatistic getMeanImpl() {
440         return meanImpl;
441     }
442 
443     /**
444      * <p>Sets the implementation for the mean.</p>
445      * 
446      * @param meanImpl the UnivariateStatistic instance to use
447      * for computing the mean
448      * @since 1.2
449      */
450     public synchronized void setMeanImpl(UnivariateStatistic meanImpl) {
451         this.meanImpl = meanImpl;
452     }
453 
454     /**
455      * Returns the currently configured geometric mean implementation.
456      * 
457      * @return the UnivariateStatistic implementing the geometric mean
458      * @since 1.2
459      */
460     public synchronized UnivariateStatistic getGeometricMeanImpl() {
461         return geometricMeanImpl;
462     }
463 
464     /**
465      * <p>Sets the implementation for the gemoetric mean.</p>
466      * 
467      * @param geometricMeanImpl the UnivariateStatistic instance to use
468      * for computing the geometric mean
469      * @since 1.2
470      */
471     public synchronized void setGeometricMeanImpl(
472             UnivariateStatistic geometricMeanImpl) {
473         this.geometricMeanImpl = geometricMeanImpl;
474     }
475 
476     /**
477      * Returns the currently configured kurtosis implementation.
478      * 
479      * @return the UnivariateStatistic implementing the kurtosis
480      * @since 1.2
481      */
482     public synchronized UnivariateStatistic getKurtosisImpl() {
483         return kurtosisImpl;
484     }
485 
486     /**
487      * <p>Sets the implementation for the kurtosis.</p>
488      * 
489      * @param kurtosisImpl the UnivariateStatistic instance to use
490      * for computing the kurtosis
491      * @since 1.2
492      */
493     public synchronized void setKurtosisImpl(UnivariateStatistic kurtosisImpl) {
494         this.kurtosisImpl = kurtosisImpl;
495     }
496 
497     /**
498      * Returns the currently configured maximum implementation.
499      * 
500      * @return the UnivariateStatistic implementing the maximum
501      * @since 1.2
502      */
503     public synchronized UnivariateStatistic getMaxImpl() {
504         return maxImpl;
505     }
506 
507     /**
508      * <p>Sets the implementation for the maximum.</p>
509      * 
510      * @param maxImpl the UnivariateStatistic instance to use
511      * for computing the maximum
512      * @since 1.2
513      */
514     public synchronized void setMaxImpl(UnivariateStatistic maxImpl) {
515         this.maxImpl = maxImpl;
516     }
517 
518     /**
519      * Returns the currently configured minimum implementation.
520      * 
521      * @return the UnivariateStatistic implementing the minimum
522      * @since 1.2
523      */
524     public synchronized UnivariateStatistic getMinImpl() {
525         return minImpl;
526     }
527 
528     /**
529      * <p>Sets the implementation for the minimum.</p>
530      * 
531      * @param minImpl the UnivariateStatistic instance to use
532      * for computing the minimum
533      * @since 1.2
534      */
535     public synchronized void setMinImpl(UnivariateStatistic minImpl) {
536         this.minImpl = minImpl;
537     }
538 
539     /**
540      * Returns the currently configured percentile implementation.
541      * 
542      * @return the UnivariateStatistic implementing the percentile
543      * @since 1.2
544      */
545     public synchronized UnivariateStatistic getPercentileImpl() {
546         return percentileImpl;
547     }
548 
549     /**
550      * Sets the implementation to be used by {@link #getPercentile(double)}.
551      * The supplied <code>UnivariateStatistic</code> must provide a
552      * <code>setQuantile(double)</code> method; otherwise 
553      * <code>IllegalArgumentException</code> is thrown.
554      * 
555      * @param percentileImpl the percentileImpl to set
556      * @throws IllegalArgumentException if the supplied implementation does not
557      *  provide a <code>setQuantile</code> method
558      * @since 1.2
559      */
560     public synchronized void setPercentileImpl(
561             UnivariateStatistic percentileImpl) {
562         try {
563             percentileImpl.getClass().getMethod("setQuantile", 
564                     new Class[] {Double.TYPE}).invoke(percentileImpl,
565                             new Object[] {Double.valueOf(50.0d)});
566         } catch (NoSuchMethodException e1) { 
567             throw MathRuntimeException.createIllegalArgumentException(
568                   "percentile implementation {0} does not support setQuantile",
569                   percentileImpl.getClass().getName());
570         } catch (IllegalAccessException e2) {
571             throw MathRuntimeException.createIllegalArgumentException(
572                   "cannot access setQuantile method in percentile implementation {0}",
573                   percentileImpl.getClass().getName());
574         } catch (InvocationTargetException e3) {
575             throw MathRuntimeException.createIllegalArgumentException(e3.getCause()); 
576         }
577         this.percentileImpl = percentileImpl;
578     }
579 
580     /**
581      * Returns the currently configured skewness implementation.
582      * 
583      * @return the UnivariateStatistic implementing the skewness
584      * @since 1.2
585      */
586     public synchronized UnivariateStatistic getSkewnessImpl() {
587         return skewnessImpl;
588     }
589 
590     /**
591      * <p>Sets the implementation for the skewness.</p>
592      * 
593      * @param skewnessImpl the UnivariateStatistic instance to use
594      * for computing the skewness
595      * @since 1.2
596      */
597     public synchronized void setSkewnessImpl(
598             UnivariateStatistic skewnessImpl) {
599         this.skewnessImpl = skewnessImpl;
600     }
601 
602     /**
603      * Returns the currently configured variance implementation.
604      * 
605      * @return the UnivariateStatistic implementing the variance
606      * @since 1.2
607      */
608     public synchronized UnivariateStatistic getVarianceImpl() {
609         return varianceImpl;
610     }
611 
612     /**
613      * <p>Sets the implementation for the variance.</p>
614      * 
615      * @param varianceImpl the UnivariateStatistic instance to use
616      * for computing the variance
617      * @since 1.2
618      */
619     public synchronized void setVarianceImpl(
620             UnivariateStatistic varianceImpl) {
621         this.varianceImpl = varianceImpl;
622     }
623 
624     /**
625      * Returns the currently configured sum of squares implementation.
626      * 
627      * @return the UnivariateStatistic implementing the sum of squares
628      * @since 1.2
629      */
630     public synchronized UnivariateStatistic getSumsqImpl() {
631         return sumsqImpl;
632     }
633 
634     /**
635      * <p>Sets the implementation for the sum of squares.</p>
636      * 
637      * @param sumsqImpl the UnivariateStatistic instance to use
638      * for computing the sum of squares
639      * @since 1.2
640      */
641     public synchronized void setSumsqImpl(UnivariateStatistic sumsqImpl) {
642         this.sumsqImpl = sumsqImpl;
643     }
644 
645     /**
646      * Returns the currently configured sum implementation.
647      * 
648      * @return the UnivariateStatistic implementing the sum
649      * @since 1.2
650      */
651     public synchronized UnivariateStatistic getSumImpl() {
652         return sumImpl;
653     }
654 
655     /**
656      * <p>Sets the implementation for the sum.</p>
657      * 
658      * @param sumImpl the UnivariateStatistic instance to use
659      * for computing the sum
660      * @since 1.2
661      */
662     public synchronized void setSumImpl(UnivariateStatistic sumImpl) {
663         this.sumImpl = sumImpl;
664     }  
665     
666     /**
667      * Returns a copy of this DescriptiveStatistics instance with the same internal state.
668      * 
669      * @return a copy of this
670      */
671     public DescriptiveStatistics copy() {
672         DescriptiveStatistics result = new DescriptiveStatistics();
673         copy(this, result);
674         return result; 
675     }
676      
677     /**
678      * Copies source to dest.
679      * <p>Neither source nor dest can be null.</p>
680      * 
681      * @param source DescriptiveStatistics to copy
682      * @param dest DescriptiveStatistics to copy to
683      * @throws NullPointerException if either source or dest is null
684      */
685     public static void copy(DescriptiveStatistics source, DescriptiveStatistics dest) {
686         // Copy data and window size
687         dest.eDA = source.eDA.copy();
688         dest.windowSize = source.windowSize;
689         
690         // Copy implementations
691         dest.maxImpl = source.maxImpl.copy();
692         dest.meanImpl = source.meanImpl.copy();
693         dest.minImpl = source.minImpl.copy();
694         dest.sumImpl = source.sumImpl.copy();
695         dest.varianceImpl = source.varianceImpl.copy();
696         dest.sumsqImpl = source.sumsqImpl.copy();
697         dest.geometricMeanImpl = source.geometricMeanImpl.copy();
698         dest.kurtosisImpl = source.kurtosisImpl;
699         dest.skewnessImpl = source.skewnessImpl;
700         dest.percentileImpl = source.percentileImpl;
701     }
702 }