1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16 package org.apache.commons.math.stat.descriptive;
17
18 import java.io.Serializable;
19 import java.util.Arrays;
20
21 import org.apache.commons.discovery.tools.DiscoverClass;
22 import org.apache.commons.math.stat.descriptive.moment.GeometricMean;
23 import org.apache.commons.math.stat.descriptive.moment.Kurtosis;
24 import org.apache.commons.math.stat.descriptive.moment.Mean;
25 import org.apache.commons.math.stat.descriptive.moment.Skewness;
26 import org.apache.commons.math.stat.descriptive.moment.Variance;
27 import org.apache.commons.math.stat.descriptive.rank.Max;
28 import org.apache.commons.math.stat.descriptive.rank.Min;
29 import org.apache.commons.math.stat.descriptive.rank.Percentile;
30 import org.apache.commons.math.stat.descriptive.summary.Sum;
31 import org.apache.commons.math.stat.descriptive.summary.SumOfSquares;
32
33
34 /**
35 * Abstract factory class for univariate statistical summaries.
36 *
37 * @version $Revision: 348519 $ $Date: 2005-11-23 12:12:18 -0700 (Wed, 23 Nov 2005) $
38 */
39 public abstract class DescriptiveStatistics implements StatisticalSummary, Serializable {
40
41 /** Serialization UID */
42 private static final long serialVersionUID = 5188298269533339922L;
43
44 /**
45 * Create an instance of a <code>DescriptiveStatistics</code>
46 * @param cls the type of <code>DescriptiveStatistics</code> object to
47 * create.
48 * @return a new factory.
49 * @throws InstantiationException is thrown if the object can not be
50 * created.
51 * @throws IllegalAccessException is thrown if the type's default
52 * constructor is not accessible.
53 */
54 public static DescriptiveStatistics newInstance(Class cls) throws InstantiationException, IllegalAccessException {
55 return (DescriptiveStatistics)cls.newInstance();
56 }
57
58 /**
59 * Create an instance of a <code>DescriptiveStatistics</code>
60 * @return a new factory.
61 */
62 public static DescriptiveStatistics newInstance() {
63 DescriptiveStatistics factory = null;
64 try {
65 DiscoverClass dc = new DiscoverClass();
66 factory = (DescriptiveStatistics) dc.newInstance(
67 DescriptiveStatistics.class,
68 "org.apache.commons.math.stat.descriptive.DescriptiveStatisticsImpl");
69 } catch(Throwable t) {
70 return new DescriptiveStatisticsImpl();
71 }
72 return factory;
73 }
74
75 /**
76 * This constant signals that a Univariate implementation
77 * takes into account the contributions of an infinite number of
78 * elements. In other words, if getWindow returns this
79 * constant, there is, in effect, no "window".
80 */
81 public static final int INFINITE_WINDOW = -1;
82
83 /**
84 * Adds the value to the set of numbers
85 * @param v the value to be added
86 */
87 public abstract void addValue(double v);
88
89 /**
90 * Returns the <a href="http://www.xycoon.com/arithmetic_mean.htm">
91 * arithmetic mean </a> of the available values
92 * @return The mean or Double.NaN if no values have been added.
93 */
94 public double getMean() {
95 return apply(new Mean());
96 }
97
98 /**
99 * Returns the <a href="http://www.xycoon.com/geometric_mean.htm">
100 * geometric mean </a> of the available values
101 * @return The geometricMean, Double.NaN if no values have been added,
102 * or if the productof the available values is less than or equal to 0.
103 */
104 public double getGeometricMean() {
105 return apply(new GeometricMean());
106 }
107
108 /**
109 * Returns the variance of the available values.
110 * @return The variance, Double.NaN if no values have been added
111 * or 0.0 for a single value set.
112 */
113 public double getVariance() {
114 return apply(new Variance());
115 }
116
117 /**
118 * Returns the standard deviation of the available values.
119 * @return The standard deviation, Double.NaN if no values have been added
120 * or 0.0 for a single value set.
121 */
122 public double getStandardDeviation() {
123 double stdDev = Double.NaN;
124 if (getN() > 0) {
125 if (getN() > 1) {
126 stdDev = Math.sqrt(getVariance());
127 } else {
128 stdDev = 0.0;
129 }
130 }
131 return (stdDev);
132 }
133
134 /**
135 * Returns the skewness of the available values. Skewness is a
136 * measure of the assymetry of a given distribution.
137 * @return The skewness, Double.NaN if no values have been added
138 * or 0.0 for a value set <=2.
139 */
140 public double getSkewness() {
141 return apply(new Skewness());
142 }
143
144 /**
145 * Returns the Kurtosis of the available values. Kurtosis is a
146 * measure of the "peakedness" of a distribution
147 * @return The kurtosis, Double.NaN if no values have been added, or 0.0
148 * for a value set <=3.
149 */
150 public double getKurtosis() {
151 return apply(new Kurtosis());
152 }
153
154 /**
155 * Returns the maximum of the available values
156 * @return The max or Double.NaN if no values have been added.
157 */
158 public double getMax() {
159 return apply(new Max());
160 }
161
162 /**
163 * Returns the minimum of the available values
164 * @return The min or Double.NaN if no values have been added.
165 */
166 public double getMin() {
167 return apply(new Min());
168 }
169
170 /**
171 * Returns the number of available values
172 * @return The number of available values
173 */
174 public abstract long getN();
175
176 /**
177 * Returns the sum of the values that have been added to Univariate.
178 * @return The sum or Double.NaN if no values have been added
179 */
180 public double getSum() {
181 return apply(new Sum());
182 }
183
184 /**
185 * Returns the sum of the squares of the available values.
186 * @return The sum of the squares or Double.NaN if no
187 * values have been added.
188 */
189 public double getSumsq() {
190 return apply(new SumOfSquares());
191 }
192
193 /**
194 * Resets all statistics and storage
195 */
196 public abstract void clear();
197
198 /**
199 * Univariate has the ability to return only measures for the
200 * last N elements added to the set of values.
201 * @return The current window size or -1 if its Infinite.
202 */
203
204 public abstract int getWindowSize();
205
206 /**
207 * WindowSize controls the number of values which contribute
208 * to the values returned by Univariate. For example, if
209 * windowSize is set to 3 and the values {1,2,3,4,5}
210 * have been added <strong> in that order</strong>
211 * then the <i>available values</i> are {3,4,5} and all
212 * reported statistics will be based on these values
213 * @param windowSize sets the size of the window.
214 */
215 public abstract void setWindowSize(int windowSize);
216
217 /**
218 * Returns the current set of values in an array of double primitives.
219 * The order of addition is preserved. The returned array is a fresh
220 * copy of the underlying data -- i.e., it is not a reference to the
221 * stored data.
222 *
223 * @return returns the current set of numbers in the order in which they
224 * were added to this set
225 */
226 public abstract double[] getValues();
227
228 /**
229 * Returns the current set of values in an array of double primitives,
230 * sorted in ascending order. The returned array is a fresh
231 * copy of the underlying data -- i.e., it is not a reference to the
232 * stored data.
233 * @return returns the current set of
234 * numbers sorted in ascending order
235 */
236 public double[] getSortedValues() {
237 double[] sort = getValues();
238 Arrays.sort(sort);
239 return sort;
240 }
241
242 /**
243 * Returns the element at the specified index
244 * @param index The Index of the element
245 * @return return the element at the specified index
246 */
247 public abstract double getElement(int index);
248
249 /**
250 * Returns an estimate for the pth percentile of the stored values.
251 * <p>
252 * The implementation provided here follows the first estimation procedure presented
253 * <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc252.htm">here.</a>
254 * <p>
255 * <strong>Preconditions</strong>:<ul>
256 * <li><code>0 < p < 100</code> (otherwise an
257 * <code>IllegalArgumentException</code> is thrown)</li>
258 * <li>at least one value must be stored (returns <code>Double.NaN
259 * </code> otherwise)</li>
260 * </ul>
261 *
262 * @param p the requested percentile (scaled from 0 - 100)
263 * @return An estimate for the pth percentile of the stored data
264 * values
265 */
266 public double getPercentile(double p) {
267 return apply(new Percentile(p));
268 }
269
270 /**
271 * Generates a text report displaying univariate statistics from values
272 * that have been added. Each statistic is displayed on a separate
273 * line.
274 *
275 * @return String with line feeds displaying statistics
276 */
277 public String toString() {
278 StringBuffer outBuffer = new StringBuffer();
279 outBuffer.append("DescriptiveStatistics:\n");
280 outBuffer.append("n: " + getN() + "\n");
281 outBuffer.append("min: " + getMin() + "\n");
282 outBuffer.append("max: " + getMax() + "\n");
283 outBuffer.append("mean: " + getMean() + "\n");
284 outBuffer.append("std dev: " + getStandardDeviation() + "\n");
285 outBuffer.append("median: " + getPercentile(50) + "\n");
286 outBuffer.append("skewness: " + getSkewness() + "\n");
287 outBuffer.append("kurtosis: " + getKurtosis() + "\n");
288 return outBuffer.toString();
289 }
290
291 /**
292 * Apply the given statistic to the data associated with this set of statistics.
293 * @param stat the statistic to apply
294 * @return the computed value of the statistic.
295 */
296 public abstract double apply(UnivariateStatistic stat);
297
298 }