1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 package org.apache.commons.math.stat.descriptive; 18 19 import java.io.Serializable; 20 import java.lang.reflect.InvocationTargetException; 21 import java.util.Arrays; 22 23 import org.apache.commons.math.MathRuntimeException; 24 import org.apache.commons.math.stat.descriptive.moment.GeometricMean; 25 import org.apache.commons.math.stat.descriptive.moment.Kurtosis; 26 import org.apache.commons.math.stat.descriptive.moment.Mean; 27 import org.apache.commons.math.stat.descriptive.moment.Skewness; 28 import org.apache.commons.math.stat.descriptive.moment.Variance; 29 import org.apache.commons.math.stat.descriptive.rank.Max; 30 import org.apache.commons.math.stat.descriptive.rank.Min; 31 import org.apache.commons.math.stat.descriptive.rank.Percentile; 32 import org.apache.commons.math.stat.descriptive.summary.Sum; 33 import org.apache.commons.math.stat.descriptive.summary.SumOfSquares; 34 import org.apache.commons.math.util.ResizableDoubleArray; 35 36 37 /** 38 * Maintains a dataset of values of a single variable and computes descriptive 39 * statistics based on stored data. The {@link #getWindowSize() windowSize} 40 * property sets a limit on the number of values that can be stored in the 41 * dataset. The default value, INFINITE_WINDOW, puts no limit on the size of 42 * the dataset. This value should be used with caution, as the backing store 43 * will grow without bound in this case. For very large datasets, 44 * {@link SummaryStatistics}, which does not store the dataset, should be used 45 * instead of this class. If <code>windowSize</code> is not INFINITE_WINDOW and 46 * more values are added than can be stored in the dataset, new values are 47 * added in a "rolling" manner, with new values replacing the "oldest" values 48 * in the dataset. 49 * 50 * <p>Note: this class is not threadsafe. Use 51 * {@link SynchronizedDescriptiveStatistics} if concurrent access from multiple 52 * threads is required.</p> 53 * 54 * @version $Revision: 772119 $ $Date: 2009-05-06 05:43:28 -0400 (Wed, 06 May 2009) $ 55 */ 56 public class DescriptiveStatistics implements StatisticalSummary, Serializable { 57 58 /** Serialization UID */ 59 private static final long serialVersionUID = 4133067267405273064L; 60 61 /** hold the window size **/ 62 protected int windowSize = INFINITE_WINDOW; 63 64 /** 65 * Stored data values 66 */ 67 protected ResizableDoubleArray eDA = new ResizableDoubleArray(); 68 69 /** Mean statistic implementation - can be reset by setter. */ 70 private UnivariateStatistic meanImpl = new Mean(); 71 72 /** Geometric mean statistic implementation - can be reset by setter. */ 73 private UnivariateStatistic geometricMeanImpl = new GeometricMean(); 74 75 /** Kurtosis statistic implementation - can be reset by setter. */ 76 private UnivariateStatistic kurtosisImpl = new Kurtosis(); 77 78 /** Maximum statistic implementation - can be reset by setter. */ 79 private UnivariateStatistic maxImpl = new Max(); 80 81 /** Minimum statistic implementation - can be reset by setter. */ 82 private UnivariateStatistic minImpl = new Min(); 83 84 /** Percentile statistic implementation - can be reset by setter. */ 85 private UnivariateStatistic percentileImpl = new Percentile(); 86 87 /** Skewness statistic implementation - can be reset by setter. */ 88 private UnivariateStatistic skewnessImpl = new Skewness(); 89 90 /** Variance statistic implementation - can be reset by setter. */ 91 private UnivariateStatistic varianceImpl = new Variance(); 92 93 /** Sum of squares statistic implementation - can be reset by setter. */ 94 private UnivariateStatistic sumsqImpl = new SumOfSquares(); 95 96 /** Sum statistic implementation - can be reset by setter. */ 97 private UnivariateStatistic sumImpl = new Sum(); 98 99 /** 100 * Construct a DescriptiveStatistics instance with an infinite window 101 */ 102 public DescriptiveStatistics() { 103 } 104 105 /** 106 * Construct a DescriptiveStatistics instance with the specified window 107 * 108 * @param window the window size. 109 */ 110 public DescriptiveStatistics(int window) { 111 setWindowSize(window); 112 } 113 114 /** 115 * Copy constructor. Construct a new DescriptiveStatistics instance that 116 * is a copy of original. 117 * 118 * @param original DescriptiveStatistics instance to copy 119 */ 120 public DescriptiveStatistics(DescriptiveStatistics original) { 121 copy(original, this); 122 } 123 124 /** 125 * Represents an infinite window size. When the {@link #getWindowSize()} 126 * returns this value, there is no limit to the number of data values 127 * that can be stored in the dataset. 128 */ 129 public static final int INFINITE_WINDOW = -1; 130 131 /** 132 * Adds the value to the dataset. If the dataset is at the maximum size 133 * (i.e., the number of stored elements equals the currently configured 134 * windowSize), the first (oldest) element in the dataset is discarded 135 * to make room for the new value. 136 * 137 * @param v the value to be added 138 */ 139 public void addValue(double v) { 140 if (windowSize != INFINITE_WINDOW) { 141 if (getN() == windowSize) { 142 eDA.addElementRolling(v); 143 } else if (getN() < windowSize) { 144 eDA.addElement(v); 145 } 146 } else { 147 eDA.addElement(v); 148 } 149 } 150 151 /** 152 * Removes the most recent value from the dataset. 153 */ 154 public void removeMostRecentValue() { 155 eDA.discardMostRecentElements(1); 156 } 157 158 /** 159 * Replaces the most recently stored value with the given value. 160 * There must be at least one element stored to call this method. 161 * 162 * @param v the value to replace the most recent stored value 163 * @return replaced value 164 */ 165 public double replaceMostRecentValue(double v) { 166 return eDA.substituteMostRecentElement(v); 167 } 168 169 /** 170 * Returns the <a href="http://www.xycoon.com/arithmetic_mean.htm"> 171 * arithmetic mean </a> of the available values 172 * @return The mean or Double.NaN if no values have been added. 173 */ 174 public double getMean() { 175 return apply(meanImpl); 176 } 177 178 /** 179 * Returns the <a href="http://www.xycoon.com/geometric_mean.htm"> 180 * geometric mean </a> of the available values 181 * @return The geometricMean, Double.NaN if no values have been added, 182 * or if the product of the available values is less than or equal to 0. 183 */ 184 public double getGeometricMean() { 185 return apply(geometricMeanImpl); 186 } 187 188 /** 189 * Returns the variance of the available values. 190 * @return The variance, Double.NaN if no values have been added 191 * or 0.0 for a single value set. 192 */ 193 public double getVariance() { 194 return apply(varianceImpl); 195 } 196 197 /** 198 * Returns the standard deviation of the available values. 199 * @return The standard deviation, Double.NaN if no values have been added 200 * or 0.0 for a single value set. 201 */ 202 public double getStandardDeviation() { 203 double stdDev = Double.NaN; 204 if (getN() > 0) { 205 if (getN() > 1) { 206 stdDev = Math.sqrt(getVariance()); 207 } else { 208 stdDev = 0.0; 209 } 210 } 211 return (stdDev); 212 } 213 214 /** 215 * Returns the skewness of the available values. Skewness is a 216 * measure of the asymmetry of a given distribution. 217 * @return The skewness, Double.NaN if no values have been added 218 * or 0.0 for a value set <=2. 219 */ 220 public double getSkewness() { 221 return apply(skewnessImpl); 222 } 223 224 /** 225 * Returns the Kurtosis of the available values. Kurtosis is a 226 * measure of the "peakedness" of a distribution 227 * @return The kurtosis, Double.NaN if no values have been added, or 0.0 228 * for a value set <=3. 229 */ 230 public double getKurtosis() { 231 return apply(kurtosisImpl); 232 } 233 234 /** 235 * Returns the maximum of the available values 236 * @return The max or Double.NaN if no values have been added. 237 */ 238 public double getMax() { 239 return apply(maxImpl); 240 } 241 242 /** 243 * Returns the minimum of the available values 244 * @return The min or Double.NaN if no values have been added. 245 */ 246 public double getMin() { 247 return apply(minImpl); 248 } 249 250 /** 251 * Returns the number of available values 252 * @return The number of available values 253 */ 254 public long getN() { 255 return eDA.getNumElements(); 256 } 257 258 /** 259 * Returns the sum of the values that have been added to Univariate. 260 * @return The sum or Double.NaN if no values have been added 261 */ 262 public double getSum() { 263 return apply(sumImpl); 264 } 265 266 /** 267 * Returns the sum of the squares of the available values. 268 * @return The sum of the squares or Double.NaN if no 269 * values have been added. 270 */ 271 public double getSumsq() { 272 return apply(sumsqImpl); 273 } 274 275 /** 276 * Resets all statistics and storage 277 */ 278 public void clear() { 279 eDA.clear(); 280 } 281 282 283 /** 284 * Returns the maximum number of values that can be stored in the 285 * dataset, or INFINITE_WINDOW (-1) if there is no limit. 286 * 287 * @return The current window size or -1 if its Infinite. 288 */ 289 public int getWindowSize() { 290 return windowSize; 291 } 292 293 /** 294 * WindowSize controls the number of values which contribute 295 * to the reported statistics. For example, if 296 * windowSize is set to 3 and the values {1,2,3,4,5} 297 * have been added <strong> in that order</strong> 298 * then the <i>available values</i> are {3,4,5} and all 299 * reported statistics will be based on these values 300 * @param windowSize sets the size of the window. 301 */ 302 public void setWindowSize(int windowSize) { 303 if (windowSize < 1) { 304 if (windowSize != INFINITE_WINDOW) { 305 throw MathRuntimeException.createIllegalArgumentException( 306 "window size must be positive ({0})", windowSize); 307 } 308 } 309 310 this.windowSize = windowSize; 311 312 // We need to check to see if we need to discard elements 313 // from the front of the array. If the windowSize is less than 314 // the current number of elements. 315 if (windowSize != INFINITE_WINDOW && windowSize < eDA.getNumElements()) { 316 eDA.discardFrontElements(eDA.getNumElements() - windowSize); 317 } 318 } 319 320 /** 321 * Returns the current set of values in an array of double primitives. 322 * The order of addition is preserved. The returned array is a fresh 323 * copy of the underlying data -- i.e., it is not a reference to the 324 * stored data. 325 * 326 * @return returns the current set of numbers in the order in which they 327 * were added to this set 328 */ 329 public double[] getValues() { 330 return eDA.getElements(); 331 } 332 333 /** 334 * Returns the current set of values in an array of double primitives, 335 * sorted in ascending order. The returned array is a fresh 336 * copy of the underlying data -- i.e., it is not a reference to the 337 * stored data. 338 * @return returns the current set of 339 * numbers sorted in ascending order 340 */ 341 public double[] getSortedValues() { 342 double[] sort = getValues(); 343 Arrays.sort(sort); 344 return sort; 345 } 346 347 /** 348 * Returns the element at the specified index 349 * @param index The Index of the element 350 * @return return the element at the specified index 351 */ 352 public double getElement(int index) { 353 return eDA.getElement(index); 354 } 355 356 /** 357 * Returns an estimate for the pth percentile of the stored values. 358 * <p> 359 * The implementation provided here follows the first estimation procedure presented 360 * <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc252.htm">here.</a> 361 * </p><p> 362 * <strong>Preconditions</strong>:<ul> 363 * <li><code>0 < p ≤ 100</code> (otherwise an 364 * <code>IllegalArgumentException</code> is thrown)</li> 365 * <li>at least one value must be stored (returns <code>Double.NaN 366 * </code> otherwise)</li> 367 * </ul></p> 368 * 369 * @param p the requested percentile (scaled from 0 - 100) 370 * @return An estimate for the pth percentile of the stored data 371 * @throws IllegalStateException if percentile implementation has been 372 * overridden and the supplied implementation does not support setQuantile 373 * values 374 */ 375 public double getPercentile(double p) { 376 if (percentileImpl instanceof Percentile) { 377 ((Percentile) percentileImpl).setQuantile(p); 378 } else { 379 try { 380 percentileImpl.getClass().getMethod("setQuantile", 381 new Class[] {Double.TYPE}).invoke(percentileImpl, 382 new Object[] {Double.valueOf(p)}); 383 } catch (NoSuchMethodException e1) { // Setter guard should prevent 384 throw MathRuntimeException.createIllegalArgumentException( 385 "percentile implementation {0} does not support setQuantile", 386 percentileImpl.getClass().getName()); 387 } catch (IllegalAccessException e2) { 388 throw MathRuntimeException.createIllegalArgumentException( 389 "cannot access setQuantile method in percentile implementation {0}", 390 percentileImpl.getClass().getName()); 391 } catch (InvocationTargetException e3) { 392 throw MathRuntimeException.createIllegalArgumentException(e3.getCause()); 393 } 394 } 395 return apply(percentileImpl); 396 } 397 398 /** 399 * Generates a text report displaying univariate statistics from values 400 * that have been added. Each statistic is displayed on a separate 401 * line. 402 * 403 * @return String with line feeds displaying statistics 404 */ 405 @Override 406 public String toString() { 407 StringBuffer outBuffer = new StringBuffer(); 408 String endl = "\n"; 409 outBuffer.append("DescriptiveStatistics:").append(endl); 410 outBuffer.append("n: ").append(getN()).append(endl); 411 outBuffer.append("min: ").append(getMin()).append(endl); 412 outBuffer.append("max: ").append(getMax()).append(endl); 413 outBuffer.append("mean: ").append(getMean()).append(endl); 414 outBuffer.append("std dev: ").append(getStandardDeviation()) 415 .append(endl); 416 outBuffer.append("median: ").append(getPercentile(50)).append(endl); 417 outBuffer.append("skewness: ").append(getSkewness()).append(endl); 418 outBuffer.append("kurtosis: ").append(getKurtosis()).append(endl); 419 return outBuffer.toString(); 420 } 421 422 /** 423 * Apply the given statistic to the data associated with this set of statistics. 424 * @param stat the statistic to apply 425 * @return the computed value of the statistic. 426 */ 427 public double apply(UnivariateStatistic stat) { 428 return stat.evaluate(eDA.getInternalValues(), eDA.start(), eDA.getNumElements()); 429 } 430 431 // Implementation getters and setter 432 433 /** 434 * Returns the currently configured mean implementation. 435 * 436 * @return the UnivariateStatistic implementing the mean 437 * @since 1.2 438 */ 439 public synchronized UnivariateStatistic getMeanImpl() { 440 return meanImpl; 441 } 442 443 /** 444 * <p>Sets the implementation for the mean.</p> 445 * 446 * @param meanImpl the UnivariateStatistic instance to use 447 * for computing the mean 448 * @since 1.2 449 */ 450 public synchronized void setMeanImpl(UnivariateStatistic meanImpl) { 451 this.meanImpl = meanImpl; 452 } 453 454 /** 455 * Returns the currently configured geometric mean implementation. 456 * 457 * @return the UnivariateStatistic implementing the geometric mean 458 * @since 1.2 459 */ 460 public synchronized UnivariateStatistic getGeometricMeanImpl() { 461 return geometricMeanImpl; 462 } 463 464 /** 465 * <p>Sets the implementation for the gemoetric mean.</p> 466 * 467 * @param geometricMeanImpl the UnivariateStatistic instance to use 468 * for computing the geometric mean 469 * @since 1.2 470 */ 471 public synchronized void setGeometricMeanImpl( 472 UnivariateStatistic geometricMeanImpl) { 473 this.geometricMeanImpl = geometricMeanImpl; 474 } 475 476 /** 477 * Returns the currently configured kurtosis implementation. 478 * 479 * @return the UnivariateStatistic implementing the kurtosis 480 * @since 1.2 481 */ 482 public synchronized UnivariateStatistic getKurtosisImpl() { 483 return kurtosisImpl; 484 } 485 486 /** 487 * <p>Sets the implementation for the kurtosis.</p> 488 * 489 * @param kurtosisImpl the UnivariateStatistic instance to use 490 * for computing the kurtosis 491 * @since 1.2 492 */ 493 public synchronized void setKurtosisImpl(UnivariateStatistic kurtosisImpl) { 494 this.kurtosisImpl = kurtosisImpl; 495 } 496 497 /** 498 * Returns the currently configured maximum implementation. 499 * 500 * @return the UnivariateStatistic implementing the maximum 501 * @since 1.2 502 */ 503 public synchronized UnivariateStatistic getMaxImpl() { 504 return maxImpl; 505 } 506 507 /** 508 * <p>Sets the implementation for the maximum.</p> 509 * 510 * @param maxImpl the UnivariateStatistic instance to use 511 * for computing the maximum 512 * @since 1.2 513 */ 514 public synchronized void setMaxImpl(UnivariateStatistic maxImpl) { 515 this.maxImpl = maxImpl; 516 } 517 518 /** 519 * Returns the currently configured minimum implementation. 520 * 521 * @return the UnivariateStatistic implementing the minimum 522 * @since 1.2 523 */ 524 public synchronized UnivariateStatistic getMinImpl() { 525 return minImpl; 526 } 527 528 /** 529 * <p>Sets the implementation for the minimum.</p> 530 * 531 * @param minImpl the UnivariateStatistic instance to use 532 * for computing the minimum 533 * @since 1.2 534 */ 535 public synchronized void setMinImpl(UnivariateStatistic minImpl) { 536 this.minImpl = minImpl; 537 } 538 539 /** 540 * Returns the currently configured percentile implementation. 541 * 542 * @return the UnivariateStatistic implementing the percentile 543 * @since 1.2 544 */ 545 public synchronized UnivariateStatistic getPercentileImpl() { 546 return percentileImpl; 547 } 548 549 /** 550 * Sets the implementation to be used by {@link #getPercentile(double)}. 551 * The supplied <code>UnivariateStatistic</code> must provide a 552 * <code>setQuantile(double)</code> method; otherwise 553 * <code>IllegalArgumentException</code> is thrown. 554 * 555 * @param percentileImpl the percentileImpl to set 556 * @throws IllegalArgumentException if the supplied implementation does not 557 * provide a <code>setQuantile</code> method 558 * @since 1.2 559 */ 560 public synchronized void setPercentileImpl( 561 UnivariateStatistic percentileImpl) { 562 try { 563 percentileImpl.getClass().getMethod("setQuantile", 564 new Class[] {Double.TYPE}).invoke(percentileImpl, 565 new Object[] {Double.valueOf(50.0d)}); 566 } catch (NoSuchMethodException e1) { 567 throw MathRuntimeException.createIllegalArgumentException( 568 "percentile implementation {0} does not support setQuantile", 569 percentileImpl.getClass().getName()); 570 } catch (IllegalAccessException e2) { 571 throw MathRuntimeException.createIllegalArgumentException( 572 "cannot access setQuantile method in percentile implementation {0}", 573 percentileImpl.getClass().getName()); 574 } catch (InvocationTargetException e3) { 575 throw MathRuntimeException.createIllegalArgumentException(e3.getCause()); 576 } 577 this.percentileImpl = percentileImpl; 578 } 579 580 /** 581 * Returns the currently configured skewness implementation. 582 * 583 * @return the UnivariateStatistic implementing the skewness 584 * @since 1.2 585 */ 586 public synchronized UnivariateStatistic getSkewnessImpl() { 587 return skewnessImpl; 588 } 589 590 /** 591 * <p>Sets the implementation for the skewness.</p> 592 * 593 * @param skewnessImpl the UnivariateStatistic instance to use 594 * for computing the skewness 595 * @since 1.2 596 */ 597 public synchronized void setSkewnessImpl( 598 UnivariateStatistic skewnessImpl) { 599 this.skewnessImpl = skewnessImpl; 600 } 601 602 /** 603 * Returns the currently configured variance implementation. 604 * 605 * @return the UnivariateStatistic implementing the variance 606 * @since 1.2 607 */ 608 public synchronized UnivariateStatistic getVarianceImpl() { 609 return varianceImpl; 610 } 611 612 /** 613 * <p>Sets the implementation for the variance.</p> 614 * 615 * @param varianceImpl the UnivariateStatistic instance to use 616 * for computing the variance 617 * @since 1.2 618 */ 619 public synchronized void setVarianceImpl( 620 UnivariateStatistic varianceImpl) { 621 this.varianceImpl = varianceImpl; 622 } 623 624 /** 625 * Returns the currently configured sum of squares implementation. 626 * 627 * @return the UnivariateStatistic implementing the sum of squares 628 * @since 1.2 629 */ 630 public synchronized UnivariateStatistic getSumsqImpl() { 631 return sumsqImpl; 632 } 633 634 /** 635 * <p>Sets the implementation for the sum of squares.</p> 636 * 637 * @param sumsqImpl the UnivariateStatistic instance to use 638 * for computing the sum of squares 639 * @since 1.2 640 */ 641 public synchronized void setSumsqImpl(UnivariateStatistic sumsqImpl) { 642 this.sumsqImpl = sumsqImpl; 643 } 644 645 /** 646 * Returns the currently configured sum implementation. 647 * 648 * @return the UnivariateStatistic implementing the sum 649 * @since 1.2 650 */ 651 public synchronized UnivariateStatistic getSumImpl() { 652 return sumImpl; 653 } 654 655 /** 656 * <p>Sets the implementation for the sum.</p> 657 * 658 * @param sumImpl the UnivariateStatistic instance to use 659 * for computing the sum 660 * @since 1.2 661 */ 662 public synchronized void setSumImpl(UnivariateStatistic sumImpl) { 663 this.sumImpl = sumImpl; 664 } 665 666 /** 667 * Returns a copy of this DescriptiveStatistics instance with the same internal state. 668 * 669 * @return a copy of this 670 */ 671 public DescriptiveStatistics copy() { 672 DescriptiveStatistics result = new DescriptiveStatistics(); 673 copy(this, result); 674 return result; 675 } 676 677 /** 678 * Copies source to dest. 679 * <p>Neither source nor dest can be null.</p> 680 * 681 * @param source DescriptiveStatistics to copy 682 * @param dest DescriptiveStatistics to copy to 683 * @throws NullPointerException if either source or dest is null 684 */ 685 public static void copy(DescriptiveStatistics source, DescriptiveStatistics dest) { 686 // Copy data and window size 687 dest.eDA = source.eDA.copy(); 688 dest.windowSize = source.windowSize; 689 690 // Copy implementations 691 dest.maxImpl = source.maxImpl.copy(); 692 dest.meanImpl = source.meanImpl.copy(); 693 dest.minImpl = source.minImpl.copy(); 694 dest.sumImpl = source.sumImpl.copy(); 695 dest.varianceImpl = source.varianceImpl.copy(); 696 dest.sumsqImpl = source.sumsqImpl.copy(); 697 dest.geometricMeanImpl = source.geometricMeanImpl.copy(); 698 dest.kurtosisImpl = source.kurtosisImpl; 699 dest.skewnessImpl = source.skewnessImpl; 700 dest.percentileImpl = source.percentileImpl; 701 } 702 }