1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16 package org.apache.commons.math.stat.descriptive.rank;
17
18 import java.io.Serializable;
19 import java.util.Arrays;
20 import org.apache.commons.math.stat.descriptive.AbstractUnivariateStatistic;
21
22 /**
23 * Provides percentile computation.
24 * <p>
25 * There are several commonly used methods for estimating percentiles (a.k.a.
26 * quantiles) based on sample data. For large samples, the different methods
27 * agree closely, but when sample sizes are small, different methods will give
28 * significantly different results. The algorithm implemented here works as follows:
29 * <ol>
30 * <li>Let <code>n</code> be the length of the (sorted) array and
31 * <code>0 < p <= 100</code> be the desired percentile.</li>
32 * <li>If <code> n = 1 </code> return the unique array element (regardless of
33 * the value of <code>p</code>); otherwise </li>
34 * <li>Compute the estimated percentile position
35 * <code> pos = p * (n + 1) / 100</code> and the difference, <code>d</code>
36 * between <code>pos</code> and <code>floor(pos)</code> (i.e. the fractional
37 * part of <code>pos</code>). If <code>pos >= n</code> return the largest
38 * element in the array; otherwise</li>
39 * <li>Let <code>lower</code> be the element in position
40 * <code>floor(pos)</code> in the array and let <code>upper</code> be the
41 * next element in the array. Return <code>lower + d * (upper - lower)</code>
42 * </li>
43 * </ol>
44 * <p>
45 * To compute percentiles, the data must be (totally) ordered. Input arrays
46 * are copied and then sorted using {@link java.util.Arrays#sort(double[])}.
47 * The ordering used by <code>Arrays.sort(double[])</code> is the one determined
48 * by {@link java.lang.Double#compareTo(Double)}. This ordering makes
49 * <code>Double.NaN</code> larger than any other value (including
50 * <code>Double.POSITIVE_INFINITY</code>). Therefore, for example, the median
51 * (50th percentile) of
52 * <code>{0, 1, 2, 3, 4, Double.NaN}</code> evaluates to <code>2.5.</code>
53 * <p>
54 * Since percentile estimation usually involves interpolation between array
55 * elements, arrays containing <code>NaN</code> or infinite values will often
56 * result in <code>NaN<code> or infinite values returned.
57 * <p>
58 * <strong>Note that this implementation is not synchronized.</strong> If
59 * multiple threads access an instance of this class concurrently, and at least
60 * one of the threads invokes the <code>increment()</code> or
61 * <code>clear()</code> method, it must be synchronized externally.
62 *
63 * @version $Revision: 348519 $ $Date: 2005-11-23 12:12:18 -0700 (Wed, 23 Nov 2005) $
64 */
65 public class Percentile extends AbstractUnivariateStatistic implements Serializable {
66
67 /** Serializable version identifier */
68 private static final long serialVersionUID = -8091216485095130416L;
69
70 /** Determines what percentile is computed when evaluate() is activated
71 * with no quantile argument */
72 private double quantile = 0.0;
73
74 /**
75 * Constructs a Percentile with a default quantile
76 * value of 50.0.
77 */
78 public Percentile() {
79 this(50.0);
80 }
81
82 /**
83 * Constructs a Percentile with the specific quantile value.
84 * @param p the quantile
85 * @throws IllegalArgumentException if p is not greater than 0 and less
86 * than or equal to 100
87 */
88 public Percentile(final double p) {
89 setQuantile(p);
90 }
91
92 /**
93 * Returns an estimate of the <code>p</code>th percentile of the values
94 * in the <code>values</code> array.
95 * <p>
96 * Calls to this method do not modify the internal <code>quantile</code>
97 * state of this statistic.
98 * <p>
99 * <ul>
100 * <li>Returns <code>Double.NaN</code> if <code>values</code> has length
101 * <code>0</code></li>
102 * <li>Returns (for any value of <code>p</code>) <code>values[0]</code>
103 * if <code>values</code> has length <code>1</code></li>
104 * <li>Throws <code>IllegalArgumentException</code> if <code>values</code>
105 * is null or p is not a valid quantile value (p must be greater than 0
106 * and less than or equal to 100) </li>
107 * </ul>
108 * <p>
109 * See {@link Percentile} for a description of the percentile estimation
110 * algorithm used.
111 *
112 * @param values input array of values
113 * @param p the percentile value to compute
114 * @return the percentile value or Double.NaN if the array is empty
115 * @throws IllegalArgumentException if <code>values</code> is null
116 * or p is invalid
117 */
118 public double evaluate(final double[] values, final double p) {
119 test(values, 0, 0);
120 return evaluate(values, 0, values.length, p);
121 }
122
123 /**
124 * Returns an estimate of the <code>quantile</code>th percentile of the
125 * designated values in the <code>values</code> array. The quantile
126 * estimated is determined by the <code>quantile</code> property.
127 * <p>
128 * <ul>
129 * <li>Returns <code>Double.NaN</code> if <code>length = 0</code></li>
130 * <li>Returns (for any value of <code>quantile</code>)
131 * <code>values[begin]</code> if <code>length = 1 </code></li>
132 * <li>Throws <code>IllegalArgumentException</code> if <code>values</code>
133 * is null, or <code>start</code> or <code>length</code>
134 * is invalid</li>
135 * </ul>
136 * <p>
137 * See {@link Percentile} for a description of the percentile estimation
138 * algorithm used.
139 *
140 * @param values the input array
141 * @param start index of the first array element to include
142 * @param length the number of elements to include
143 * @return the percentile value
144 * @throws IllegalArgumentException if the parameters are not valid
145 *
146 */
147 public double evaluate( final double[] values, final int start, final int length) {
148 return evaluate(values, start, length, quantile);
149 }
150
151 /**
152 * Returns an estimate of the <code>p</code>th percentile of the values
153 * in the <code>values</code> array, starting with the element in (0-based)
154 * position <code>begin</code> in the array and including <code>length</code>
155 * values.
156 * <p>
157 * Calls to this method do not modify the internal <code>quantile</code>
158 * state of this statistic.
159 * <p>
160 * <ul>
161 * <li>Returns <code>Double.NaN</code> if <code>length = 0</code></li>
162 * <li>Returns (for any value of <code>p</code>) <code>values[begin]</code>
163 * if <code>length = 1 </code></li>
164 * <li>Throws <code>IllegalArgumentException</code> if <code>values</code>
165 * is null , <code>begin</code> or <code>length</code> is invalid, or
166 * <code>p</code> is not a valid quantile value (p must be greater than 0
167 * and less than or equal to 100)</li>
168 * </ul>
169 * <p>
170 * See {@link Percentile} for a description of the percentile estimation
171 * algorithm used.
172 *
173 * @param values array of input values
174 * @param p the percentile to compute
175 * @param begin the first (0-based) element to include in the computation
176 * @param length the number of array elements to include
177 * @return the percentile value
178 * @throws IllegalArgumentException if the parameters are not valid or the
179 * input array is null
180 */
181 public double evaluate(final double[] values, final int begin,
182 final int length, final double p) {
183
184 test(values, begin, length);
185
186 if ((p > 100) || (p <= 0)) {
187 throw new IllegalArgumentException("invalid quantile value: " + p);
188 }
189 if (length == 0) {
190 return Double.NaN;
191 }
192 if (length == 1) {
193 return values[begin];
194 }
195 double n = (double) length;
196 double pos = p * (n + 1) / 100;
197 double fpos = Math.floor(pos);
198 int intPos = (int) fpos;
199 double dif = pos - fpos;
200 double[] sorted = new double[length];
201 System.arraycopy(values, begin, sorted, 0, length);
202 Arrays.sort(sorted);
203
204 if (pos < 1) {
205 return sorted[0];
206 }
207 if (pos >= n) {
208 return sorted[length - 1];
209 }
210 double lower = sorted[intPos - 1];
211 double upper = sorted[intPos];
212 return lower + dif * (upper - lower);
213 }
214
215 /**
216 * Returns the value of the quantile field (determines what percentile is
217 * computed when evaluate() is called with no quantile argument).
218 *
219 * @return quantile
220 */
221 public double getQuantile() {
222 return quantile;
223 }
224
225 /**
226 * Sets the value of the quantile field (determines what percentile is
227 * computed when evaluate() is called with no quantile argument).
228 *
229 * @param p a value between 0 < p <= 100
230 * @throws IllegalArgumentException if p is not greater than 0 and less
231 * than or equal to 100
232 */
233 public void setQuantile(final double p) {
234 if (p <= 0 || p > 100) {
235 throw new IllegalArgumentException("Illegal quantile value: " + p);
236 }
237 quantile = p;
238 }
239
240 }