001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    package org.apache.commons.math.stat.descriptive;
018    
019    import java.io.Serializable;
020    import java.lang.reflect.InvocationTargetException;
021    import java.util.Arrays;
022    
023    import org.apache.commons.math.MathRuntimeException;
024    import org.apache.commons.math.stat.descriptive.moment.GeometricMean;
025    import org.apache.commons.math.stat.descriptive.moment.Kurtosis;
026    import org.apache.commons.math.stat.descriptive.moment.Mean;
027    import org.apache.commons.math.stat.descriptive.moment.Skewness;
028    import org.apache.commons.math.stat.descriptive.moment.Variance;
029    import org.apache.commons.math.stat.descriptive.rank.Max;
030    import org.apache.commons.math.stat.descriptive.rank.Min;
031    import org.apache.commons.math.stat.descriptive.rank.Percentile;
032    import org.apache.commons.math.stat.descriptive.summary.Sum;
033    import org.apache.commons.math.stat.descriptive.summary.SumOfSquares;
034    import org.apache.commons.math.util.ResizableDoubleArray;
035    
036    
037    /**
038     * Maintains a dataset of values of a single variable and computes descriptive
039     * statistics based on stored data. The {@link #getWindowSize() windowSize}
040     * property sets a limit on the number of values that can be stored in the 
041     * dataset.  The default value, INFINITE_WINDOW, puts no limit on the size of
042     * the dataset.  This value should be used with caution, as the backing store
043     * will grow without bound in this case.  For very large datasets, 
044     * {@link SummaryStatistics}, which does not store the dataset, should be used
045     * instead of this class. If <code>windowSize</code> is not INFINITE_WINDOW and
046     * more values are added than can be stored in the dataset, new values are
047     * added in a "rolling" manner, with new values replacing the "oldest" values 
048     * in the dataset.
049     * 
050     * <p>Note: this class is not threadsafe.  Use 
051     * {@link SynchronizedDescriptiveStatistics} if concurrent access from multiple
052     * threads is required.</p>
053     *
054     * @version $Revision: 772119 $ $Date: 2009-05-06 05:43:28 -0400 (Wed, 06 May 2009) $
055     */
056    public class DescriptiveStatistics implements StatisticalSummary, Serializable {
057        
058        /** Serialization UID */
059        private static final long serialVersionUID = 4133067267405273064L;
060    
061        /** hold the window size **/
062        protected int windowSize = INFINITE_WINDOW;
063        
064        /** 
065         *  Stored data values
066         */
067        protected ResizableDoubleArray eDA = new ResizableDoubleArray();
068      
069        /** Mean statistic implementation - can be reset by setter. */
070        private UnivariateStatistic meanImpl = new Mean();
071        
072        /** Geometric mean statistic implementation - can be reset by setter. */
073        private UnivariateStatistic geometricMeanImpl = new GeometricMean();
074        
075        /** Kurtosis statistic implementation - can be reset by setter. */
076        private UnivariateStatistic kurtosisImpl = new Kurtosis();
077        
078        /** Maximum statistic implementation - can be reset by setter. */
079        private UnivariateStatistic maxImpl = new Max();
080        
081        /** Minimum statistic implementation - can be reset by setter. */
082        private UnivariateStatistic minImpl = new Min();
083        
084        /** Percentile statistic implementation - can be reset by setter. */
085        private UnivariateStatistic percentileImpl = new Percentile();
086        
087        /** Skewness statistic implementation - can be reset by setter. */
088        private UnivariateStatistic skewnessImpl = new Skewness();
089        
090        /** Variance statistic implementation - can be reset by setter. */
091        private UnivariateStatistic varianceImpl = new Variance();
092        
093        /** Sum of squares statistic implementation - can be reset by setter. */
094        private UnivariateStatistic sumsqImpl = new SumOfSquares();
095        
096        /** Sum statistic implementation - can be reset by setter. */
097        private UnivariateStatistic sumImpl = new Sum();
098        
099        /**
100         * Construct a DescriptiveStatistics instance with an infinite window
101         */
102        public DescriptiveStatistics() {
103        }
104        
105        /**
106         * Construct a DescriptiveStatistics instance with the specified window
107         * 
108         * @param window the window size.
109         */
110        public DescriptiveStatistics(int window) {
111            setWindowSize(window);
112        }
113        
114        /**
115         * Copy constructor.  Construct a new DescriptiveStatistics instance that
116         * is a copy of original.
117         * 
118         * @param original DescriptiveStatistics instance to copy
119         */
120        public DescriptiveStatistics(DescriptiveStatistics original) {
121            copy(original, this);
122        }
123        
124        /**
125         * Represents an infinite window size.  When the {@link #getWindowSize()}
126         * returns this value, there is no limit to the number of data values
127         * that can be stored in the dataset.
128         */
129        public static final int INFINITE_WINDOW = -1;
130    
131        /**
132         * Adds the value to the dataset. If the dataset is at the maximum size
133         * (i.e., the number of stored elements equals the currently configured
134         * windowSize), the first (oldest) element in the dataset is discarded
135         * to make room for the new value.
136         * 
137         * @param v the value to be added 
138         */
139        public void addValue(double v) {
140            if (windowSize != INFINITE_WINDOW) {
141                if (getN() == windowSize) {
142                    eDA.addElementRolling(v);
143                } else if (getN() < windowSize) {
144                    eDA.addElement(v);
145                }
146            } else {
147                eDA.addElement(v);
148            }
149        }
150    
151        /**
152         * Removes the most recent value from the dataset.
153         */
154        public void removeMostRecentValue() {
155            eDA.discardMostRecentElements(1);
156        }
157    
158        /**
159         * Replaces the most recently stored value with the given value.
160         * There must be at least one element stored to call this method.
161         * 
162         * @param v the value to replace the most recent stored value
163         * @return replaced value
164         */
165        public double replaceMostRecentValue(double v) {
166            return eDA.substituteMostRecentElement(v);
167        }
168    
169        /** 
170         * Returns the <a href="http://www.xycoon.com/arithmetic_mean.htm">
171         * arithmetic mean </a> of the available values 
172         * @return The mean or Double.NaN if no values have been added.
173         */
174        public double getMean() {
175            return apply(meanImpl);
176        }
177    
178        /** 
179         * Returns the <a href="http://www.xycoon.com/geometric_mean.htm">
180         * geometric mean </a> of the available values
181         * @return The geometricMean, Double.NaN if no values have been added, 
182         * or if the product of the available values is less than or equal to 0.
183         */
184        public double getGeometricMean() {
185            return apply(geometricMeanImpl);
186        }
187    
188        /** 
189         * Returns the variance of the available values.
190         * @return The variance, Double.NaN if no values have been added 
191         * or 0.0 for a single value set.  
192         */
193        public double getVariance() {
194            return apply(varianceImpl);
195        }
196    
197        /** 
198         * Returns the standard deviation of the available values.
199         * @return The standard deviation, Double.NaN if no values have been added 
200         * or 0.0 for a single value set. 
201         */
202        public double getStandardDeviation() {
203            double stdDev = Double.NaN;
204            if (getN() > 0) {
205                if (getN() > 1) {
206                    stdDev = Math.sqrt(getVariance());
207                } else {
208                    stdDev = 0.0;
209                }
210            }
211            return (stdDev);
212        }
213    
214        /**
215         * Returns the skewness of the available values. Skewness is a 
216         * measure of the asymmetry of a given distribution.
217         * @return The skewness, Double.NaN if no values have been added 
218         * or 0.0 for a value set &lt;=2. 
219         */
220        public double getSkewness() {
221            return apply(skewnessImpl);
222        }
223    
224        /**
225         * Returns the Kurtosis of the available values. Kurtosis is a 
226         * measure of the "peakedness" of a distribution
227         * @return The kurtosis, Double.NaN if no values have been added, or 0.0 
228         * for a value set &lt;=3. 
229         */
230        public double getKurtosis() {
231            return apply(kurtosisImpl);
232        }
233    
234        /** 
235         * Returns the maximum of the available values
236         * @return The max or Double.NaN if no values have been added.
237         */
238        public double getMax() {
239            return apply(maxImpl);
240        }
241    
242        /** 
243        * Returns the minimum of the available values
244        * @return The min or Double.NaN if no values have been added.
245        */
246        public double getMin() {
247            return apply(minImpl);
248        }
249    
250        /** 
251         * Returns the number of available values
252         * @return The number of available values
253         */
254        public long getN() {
255            return eDA.getNumElements();
256        }
257    
258        /**
259         * Returns the sum of the values that have been added to Univariate.
260         * @return The sum or Double.NaN if no values have been added
261         */
262        public double getSum() {
263            return apply(sumImpl);
264        }
265    
266        /**
267         * Returns the sum of the squares of the available values.
268         * @return The sum of the squares or Double.NaN if no 
269         * values have been added.
270         */
271        public double getSumsq() {
272            return apply(sumsqImpl);
273        }
274    
275        /** 
276         * Resets all statistics and storage
277         */
278        public void clear() {
279            eDA.clear();
280        }
281    
282    
283        /**
284         * Returns the maximum number of values that can be stored in the
285         * dataset, or INFINITE_WINDOW (-1) if there is no limit.
286         * 
287         * @return The current window size or -1 if its Infinite.
288         */
289        public int getWindowSize() {
290            return windowSize;
291        }
292    
293        /**
294         * WindowSize controls the number of values which contribute 
295         * to the reported statistics.  For example, if 
296         * windowSize is set to 3 and the values {1,2,3,4,5} 
297         * have been added <strong> in that order</strong> 
298         * then the <i>available values</i> are {3,4,5} and all
299         * reported statistics will be based on these values
300         * @param windowSize sets the size of the window.
301         */
302        public void setWindowSize(int windowSize) {
303            if (windowSize < 1) {
304                if (windowSize != INFINITE_WINDOW) {
305                    throw MathRuntimeException.createIllegalArgumentException(
306                          "window size must be positive ({0})", windowSize);
307                }
308            }
309            
310            this.windowSize = windowSize;
311    
312            // We need to check to see if we need to discard elements
313            // from the front of the array.  If the windowSize is less than 
314            // the current number of elements.
315            if (windowSize != INFINITE_WINDOW && windowSize < eDA.getNumElements()) {
316                eDA.discardFrontElements(eDA.getNumElements() - windowSize);
317            }
318        }
319        
320        /**
321         * Returns the current set of values in an array of double primitives.  
322         * The order of addition is preserved.  The returned array is a fresh
323         * copy of the underlying data -- i.e., it is not a reference to the
324         * stored data.
325         * 
326         * @return returns the current set of numbers in the order in which they 
327         *         were added to this set
328         */
329        public double[] getValues() {
330            return eDA.getElements();
331        }
332    
333        /**
334         * Returns the current set of values in an array of double primitives,  
335         * sorted in ascending order.  The returned array is a fresh
336         * copy of the underlying data -- i.e., it is not a reference to the
337         * stored data.
338         * @return returns the current set of 
339         * numbers sorted in ascending order        
340         */
341        public double[] getSortedValues() {
342            double[] sort = getValues();
343            Arrays.sort(sort);
344            return sort;
345        }
346    
347        /**
348         * Returns the element at the specified index
349         * @param index The Index of the element
350         * @return return the element at the specified index
351         */
352        public double getElement(int index) {
353            return eDA.getElement(index);
354        }
355    
356        /**
357         * Returns an estimate for the pth percentile of the stored values. 
358         * <p>
359         * The implementation provided here follows the first estimation procedure presented
360         * <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc252.htm">here.</a>
361         * </p><p>
362         * <strong>Preconditions</strong>:<ul>
363         * <li><code>0 &lt; p &le; 100</code> (otherwise an 
364         * <code>IllegalArgumentException</code> is thrown)</li>
365         * <li>at least one value must be stored (returns <code>Double.NaN
366         *     </code> otherwise)</li>
367         * </ul></p>
368         * 
369         * @param p the requested percentile (scaled from 0 - 100)
370         * @return An estimate for the pth percentile of the stored data 
371         * @throws IllegalStateException if percentile implementation has been
372         *  overridden and the supplied implementation does not support setQuantile
373         * values
374         */
375        public double getPercentile(double p) {
376            if (percentileImpl instanceof Percentile) {
377                ((Percentile) percentileImpl).setQuantile(p);
378            } else {
379                try {
380                    percentileImpl.getClass().getMethod("setQuantile", 
381                            new Class[] {Double.TYPE}).invoke(percentileImpl,
382                                    new Object[] {Double.valueOf(p)});
383                } catch (NoSuchMethodException e1) { // Setter guard should prevent
384                    throw MathRuntimeException.createIllegalArgumentException(
385                          "percentile implementation {0} does not support setQuantile",
386                          percentileImpl.getClass().getName());
387                } catch (IllegalAccessException e2) {
388                    throw MathRuntimeException.createIllegalArgumentException(
389                          "cannot access setQuantile method in percentile implementation {0}",
390                          percentileImpl.getClass().getName());
391                } catch (InvocationTargetException e3) {
392                    throw MathRuntimeException.createIllegalArgumentException(e3.getCause()); 
393                }
394            }
395            return apply(percentileImpl);
396        }
397        
398        /**
399         * Generates a text report displaying univariate statistics from values
400         * that have been added.  Each statistic is displayed on a separate
401         * line.
402         * 
403         * @return String with line feeds displaying statistics
404         */
405        @Override
406        public String toString() {
407            StringBuffer outBuffer = new StringBuffer();
408            String endl = "\n";
409            outBuffer.append("DescriptiveStatistics:").append(endl);
410            outBuffer.append("n: ").append(getN()).append(endl);
411            outBuffer.append("min: ").append(getMin()).append(endl);
412            outBuffer.append("max: ").append(getMax()).append(endl);
413            outBuffer.append("mean: ").append(getMean()).append(endl);
414            outBuffer.append("std dev: ").append(getStandardDeviation())
415                .append(endl);
416            outBuffer.append("median: ").append(getPercentile(50)).append(endl);
417            outBuffer.append("skewness: ").append(getSkewness()).append(endl);
418            outBuffer.append("kurtosis: ").append(getKurtosis()).append(endl);
419            return outBuffer.toString();
420        }
421        
422        /**
423         * Apply the given statistic to the data associated with this set of statistics.
424         * @param stat the statistic to apply
425         * @return the computed value of the statistic.
426         */
427        public double apply(UnivariateStatistic stat) {
428            return stat.evaluate(eDA.getInternalValues(), eDA.start(), eDA.getNumElements());
429        }
430    
431        // Implementation getters and setter
432        
433        /**
434         * Returns the currently configured mean implementation.
435         * 
436         * @return the UnivariateStatistic implementing the mean
437         * @since 1.2
438         */
439        public synchronized UnivariateStatistic getMeanImpl() {
440            return meanImpl;
441        }
442    
443        /**
444         * <p>Sets the implementation for the mean.</p>
445         * 
446         * @param meanImpl the UnivariateStatistic instance to use
447         * for computing the mean
448         * @since 1.2
449         */
450        public synchronized void setMeanImpl(UnivariateStatistic meanImpl) {
451            this.meanImpl = meanImpl;
452        }
453    
454        /**
455         * Returns the currently configured geometric mean implementation.
456         * 
457         * @return the UnivariateStatistic implementing the geometric mean
458         * @since 1.2
459         */
460        public synchronized UnivariateStatistic getGeometricMeanImpl() {
461            return geometricMeanImpl;
462        }
463    
464        /**
465         * <p>Sets the implementation for the gemoetric mean.</p>
466         * 
467         * @param geometricMeanImpl the UnivariateStatistic instance to use
468         * for computing the geometric mean
469         * @since 1.2
470         */
471        public synchronized void setGeometricMeanImpl(
472                UnivariateStatistic geometricMeanImpl) {
473            this.geometricMeanImpl = geometricMeanImpl;
474        }
475    
476        /**
477         * Returns the currently configured kurtosis implementation.
478         * 
479         * @return the UnivariateStatistic implementing the kurtosis
480         * @since 1.2
481         */
482        public synchronized UnivariateStatistic getKurtosisImpl() {
483            return kurtosisImpl;
484        }
485    
486        /**
487         * <p>Sets the implementation for the kurtosis.</p>
488         * 
489         * @param kurtosisImpl the UnivariateStatistic instance to use
490         * for computing the kurtosis
491         * @since 1.2
492         */
493        public synchronized void setKurtosisImpl(UnivariateStatistic kurtosisImpl) {
494            this.kurtosisImpl = kurtosisImpl;
495        }
496    
497        /**
498         * Returns the currently configured maximum implementation.
499         * 
500         * @return the UnivariateStatistic implementing the maximum
501         * @since 1.2
502         */
503        public synchronized UnivariateStatistic getMaxImpl() {
504            return maxImpl;
505        }
506    
507        /**
508         * <p>Sets the implementation for the maximum.</p>
509         * 
510         * @param maxImpl the UnivariateStatistic instance to use
511         * for computing the maximum
512         * @since 1.2
513         */
514        public synchronized void setMaxImpl(UnivariateStatistic maxImpl) {
515            this.maxImpl = maxImpl;
516        }
517    
518        /**
519         * Returns the currently configured minimum implementation.
520         * 
521         * @return the UnivariateStatistic implementing the minimum
522         * @since 1.2
523         */
524        public synchronized UnivariateStatistic getMinImpl() {
525            return minImpl;
526        }
527    
528        /**
529         * <p>Sets the implementation for the minimum.</p>
530         * 
531         * @param minImpl the UnivariateStatistic instance to use
532         * for computing the minimum
533         * @since 1.2
534         */
535        public synchronized void setMinImpl(UnivariateStatistic minImpl) {
536            this.minImpl = minImpl;
537        }
538    
539        /**
540         * Returns the currently configured percentile implementation.
541         * 
542         * @return the UnivariateStatistic implementing the percentile
543         * @since 1.2
544         */
545        public synchronized UnivariateStatistic getPercentileImpl() {
546            return percentileImpl;
547        }
548    
549        /**
550         * Sets the implementation to be used by {@link #getPercentile(double)}.
551         * The supplied <code>UnivariateStatistic</code> must provide a
552         * <code>setQuantile(double)</code> method; otherwise 
553         * <code>IllegalArgumentException</code> is thrown.
554         * 
555         * @param percentileImpl the percentileImpl to set
556         * @throws IllegalArgumentException if the supplied implementation does not
557         *  provide a <code>setQuantile</code> method
558         * @since 1.2
559         */
560        public synchronized void setPercentileImpl(
561                UnivariateStatistic percentileImpl) {
562            try {
563                percentileImpl.getClass().getMethod("setQuantile", 
564                        new Class[] {Double.TYPE}).invoke(percentileImpl,
565                                new Object[] {Double.valueOf(50.0d)});
566            } catch (NoSuchMethodException e1) { 
567                throw MathRuntimeException.createIllegalArgumentException(
568                      "percentile implementation {0} does not support setQuantile",
569                      percentileImpl.getClass().getName());
570            } catch (IllegalAccessException e2) {
571                throw MathRuntimeException.createIllegalArgumentException(
572                      "cannot access setQuantile method in percentile implementation {0}",
573                      percentileImpl.getClass().getName());
574            } catch (InvocationTargetException e3) {
575                throw MathRuntimeException.createIllegalArgumentException(e3.getCause()); 
576            }
577            this.percentileImpl = percentileImpl;
578        }
579    
580        /**
581         * Returns the currently configured skewness implementation.
582         * 
583         * @return the UnivariateStatistic implementing the skewness
584         * @since 1.2
585         */
586        public synchronized UnivariateStatistic getSkewnessImpl() {
587            return skewnessImpl;
588        }
589    
590        /**
591         * <p>Sets the implementation for the skewness.</p>
592         * 
593         * @param skewnessImpl the UnivariateStatistic instance to use
594         * for computing the skewness
595         * @since 1.2
596         */
597        public synchronized void setSkewnessImpl(
598                UnivariateStatistic skewnessImpl) {
599            this.skewnessImpl = skewnessImpl;
600        }
601    
602        /**
603         * Returns the currently configured variance implementation.
604         * 
605         * @return the UnivariateStatistic implementing the variance
606         * @since 1.2
607         */
608        public synchronized UnivariateStatistic getVarianceImpl() {
609            return varianceImpl;
610        }
611    
612        /**
613         * <p>Sets the implementation for the variance.</p>
614         * 
615         * @param varianceImpl the UnivariateStatistic instance to use
616         * for computing the variance
617         * @since 1.2
618         */
619        public synchronized void setVarianceImpl(
620                UnivariateStatistic varianceImpl) {
621            this.varianceImpl = varianceImpl;
622        }
623    
624        /**
625         * Returns the currently configured sum of squares implementation.
626         * 
627         * @return the UnivariateStatistic implementing the sum of squares
628         * @since 1.2
629         */
630        public synchronized UnivariateStatistic getSumsqImpl() {
631            return sumsqImpl;
632        }
633    
634        /**
635         * <p>Sets the implementation for the sum of squares.</p>
636         * 
637         * @param sumsqImpl the UnivariateStatistic instance to use
638         * for computing the sum of squares
639         * @since 1.2
640         */
641        public synchronized void setSumsqImpl(UnivariateStatistic sumsqImpl) {
642            this.sumsqImpl = sumsqImpl;
643        }
644    
645        /**
646         * Returns the currently configured sum implementation.
647         * 
648         * @return the UnivariateStatistic implementing the sum
649         * @since 1.2
650         */
651        public synchronized UnivariateStatistic getSumImpl() {
652            return sumImpl;
653        }
654    
655        /**
656         * <p>Sets the implementation for the sum.</p>
657         * 
658         * @param sumImpl the UnivariateStatistic instance to use
659         * for computing the sum
660         * @since 1.2
661         */
662        public synchronized void setSumImpl(UnivariateStatistic sumImpl) {
663            this.sumImpl = sumImpl;
664        }  
665        
666        /**
667         * Returns a copy of this DescriptiveStatistics instance with the same internal state.
668         * 
669         * @return a copy of this
670         */
671        public DescriptiveStatistics copy() {
672            DescriptiveStatistics result = new DescriptiveStatistics();
673            copy(this, result);
674            return result; 
675        }
676         
677        /**
678         * Copies source to dest.
679         * <p>Neither source nor dest can be null.</p>
680         * 
681         * @param source DescriptiveStatistics to copy
682         * @param dest DescriptiveStatistics to copy to
683         * @throws NullPointerException if either source or dest is null
684         */
685        public static void copy(DescriptiveStatistics source, DescriptiveStatistics dest) {
686            // Copy data and window size
687            dest.eDA = source.eDA.copy();
688            dest.windowSize = source.windowSize;
689            
690            // Copy implementations
691            dest.maxImpl = source.maxImpl.copy();
692            dest.meanImpl = source.meanImpl.copy();
693            dest.minImpl = source.minImpl.copy();
694            dest.sumImpl = source.sumImpl.copy();
695            dest.varianceImpl = source.varianceImpl.copy();
696            dest.sumsqImpl = source.sumsqImpl.copy();
697            dest.geometricMeanImpl = source.geometricMeanImpl.copy();
698            dest.kurtosisImpl = source.kurtosisImpl;
699            dest.skewnessImpl = source.skewnessImpl;
700            dest.percentileImpl = source.percentileImpl;
701        }
702    }