Classes in this File | Line Coverage | Branch Coverage | Complexity | ||||||||
Percentile |
|
| 2.7142857142857144;2.714 |
1 | /* |
|
2 | * Copyright 2003-2004 The Apache Software Foundation. |
|
3 | * |
|
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
|
5 | * you may not use this file except in compliance with the License. |
|
6 | * You may obtain a copy of the License at |
|
7 | * |
|
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
|
9 | * |
|
10 | * Unless required by applicable law or agreed to in writing, software |
|
11 | * distributed under the License is distributed on an "AS IS" BASIS, |
|
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
13 | * See the License for the specific language governing permissions and |
|
14 | * limitations under the License. |
|
15 | */ |
|
16 | package org.apache.commons.math.stat.descriptive.rank; |
|
17 | ||
18 | import java.io.Serializable; |
|
19 | import java.util.Arrays; |
|
20 | import org.apache.commons.math.stat.descriptive.AbstractUnivariateStatistic; |
|
21 | ||
22 | /** |
|
23 | * Provides percentile computation. |
|
24 | * <p> |
|
25 | * There are several commonly used methods for estimating percentiles (a.k.a. |
|
26 | * quantiles) based on sample data. For large samples, the different methods |
|
27 | * agree closely, but when sample sizes are small, different methods will give |
|
28 | * significantly different results. The algorithm implemented here works as follows: |
|
29 | * <ol> |
|
30 | * <li>Let <code>n</code> be the length of the (sorted) array and |
|
31 | * <code>0 < p <= 100</code> be the desired percentile.</li> |
|
32 | * <li>If <code> n = 1 </code> return the unique array element (regardless of |
|
33 | * the value of <code>p</code>); otherwise </li> |
|
34 | * <li>Compute the estimated percentile position |
|
35 | * <code> pos = p * (n + 1) / 100</code> and the difference, <code>d</code> |
|
36 | * between <code>pos</code> and <code>floor(pos)</code> (i.e. the fractional |
|
37 | * part of <code>pos</code>). If <code>pos >= n</code> return the largest |
|
38 | * element in the array; otherwise</li> |
|
39 | * <li>Let <code>lower</code> be the element in position |
|
40 | * <code>floor(pos)</code> in the array and let <code>upper</code> be the |
|
41 | * next element in the array. Return <code>lower + d * (upper - lower)</code> |
|
42 | * </li> |
|
43 | * </ol> |
|
44 | * <p> |
|
45 | * To compute percentiles, the data must be (totally) ordered. Input arrays |
|
46 | * are copied and then sorted using {@link java.util.Arrays#sort(double[])}. |
|
47 | * The ordering used by <code>Arrays.sort(double[])</code> is the one determined |
|
48 | * by {@link java.lang.Double#compareTo(Double)}. This ordering makes |
|
49 | * <code>Double.NaN</code> larger than any other value (including |
|
50 | * <code>Double.POSITIVE_INFINITY</code>). Therefore, for example, the median |
|
51 | * (50th percentile) of |
|
52 | * <code>{0, 1, 2, 3, 4, Double.NaN}</code> evaluates to <code>2.5.</code> |
|
53 | * <p> |
|
54 | * Since percentile estimation usually involves interpolation between array |
|
55 | * elements, arrays containing <code>NaN</code> or infinite values will often |
|
56 | * result in <code>NaN<code> or infinite values returned. |
|
57 | * <p> |
|
58 | * <strong>Note that this implementation is not synchronized.</strong> If |
|
59 | * multiple threads access an instance of this class concurrently, and at least |
|
60 | * one of the threads invokes the <code>increment()</code> or |
|
61 | * <code>clear()</code> method, it must be synchronized externally. |
|
62 | * |
|
63 | * @version $Revision$ $Date: 2005-02-26 05:11:52 -0800 (Sat, 26 Feb 2005) $ |
|
64 | */ |
|
65 | public class Percentile extends AbstractUnivariateStatistic implements Serializable { |
|
66 | ||
67 | /** Serializable version identifier */ |
|
68 | static final long serialVersionUID = -8091216485095130416L; |
|
69 | ||
70 | /** Determines what percentile is computed when evaluate() is activated |
|
71 | * with no quantile argument */ |
|
72 | 488 | private double quantile = 0.0; |
73 | ||
74 | /** |
|
75 | * Constructs a Percentile with a default quantile |
|
76 | * value of 50.0. |
|
77 | */ |
|
78 | public Percentile() { |
|
79 | 10 | this(50.0); |
80 | 10 | } |
81 | ||
82 | /** |
|
83 | * Constructs a Percentile with the specific quantile value. |
|
84 | * @param p the quantile |
|
85 | * @throws IllegalArgumentException if p is not greater than 0 and less |
|
86 | * than or equal to 100 |
|
87 | */ |
|
88 | 488 | public Percentile(final double p) { |
89 | 488 | setQuantile(p); |
90 | 478 | } |
91 | ||
92 | /** |
|
93 | * Returns an estimate of the <code>p</code>th percentile of the values |
|
94 | * in the <code>values</code> array. |
|
95 | * <p> |
|
96 | * Calls to this method do not modify the internal <code>quantile</code> |
|
97 | * state of this statistic. |
|
98 | * <p> |
|
99 | * <ul> |
|
100 | * <li>Returns <code>Double.NaN</code> if <code>values</code> has length |
|
101 | * <code>0</code></li> |
|
102 | * <li>Returns (for any value of <code>p</code>) <code>values[0]</code> |
|
103 | * if <code>values</code> has length <code>1</code></li> |
|
104 | * <li>Throws <code>IllegalArgumentException</code> if <code>values</code> |
|
105 | * is null or p is not a valid quantile value (p must be greater than 0 |
|
106 | * and less than or equal to 100) </li> |
|
107 | * </ul> |
|
108 | * <p> |
|
109 | * See {@link Percentile} for a description of the percentile estimation |
|
110 | * algorithm used. |
|
111 | * |
|
112 | * @param values input array of values |
|
113 | * @param p the percentile value to compute |
|
114 | * @return the percentile value or Double.NaN if the array is empty |
|
115 | * @throws IllegalArgumentException if <code>values</code> is null |
|
116 | * or p is invalid |
|
117 | */ |
|
118 | public double evaluate(final double[] values, final double p) { |
|
119 | 8 | test(values, 0, 0); |
120 | 6 | return evaluate(values, 0, values.length, p); |
121 | } |
|
122 | ||
123 | /** |
|
124 | * Returns an estimate of the <code>quantile</code>th percentile of the |
|
125 | * designated values in the <code>values</code> array. The quantile |
|
126 | * estimated is determined by the <code>quantile</code> property. |
|
127 | * <p> |
|
128 | * <ul> |
|
129 | * <li>Returns <code>Double.NaN</code> if <code>length = 0</code></li> |
|
130 | * <li>Returns (for any value of <code>quantile</code>) |
|
131 | * <code>values[begin]</code> if <code>length = 1 </code></li> |
|
132 | * <li>Throws <code>IllegalArgumentException</code> if <code>values</code> |
|
133 | * is null, or <code>start</code> or <code>length</code> |
|
134 | * is invalid</li> |
|
135 | * </ul> |
|
136 | * <p> |
|
137 | * See {@link Percentile} for a description of the percentile estimation |
|
138 | * algorithm used. |
|
139 | * |
|
140 | * @param values the input array |
|
141 | * @param start index of the first array element to include |
|
142 | * @param length the number of elements to include |
|
143 | * @return the percentile value |
|
144 | * @throws IllegalArgumentException if the parameters are not valid |
|
145 | * |
|
146 | */ |
|
147 | public double evaluate( final double[] values, final int start, final int length) { |
|
148 | 484 | return evaluate(values, start, length, quantile); |
149 | } |
|
150 | ||
151 | /** |
|
152 | * Returns an estimate of the <code>p</code>th percentile of the values |
|
153 | * in the <code>values</code> array, starting with the element in (0-based) |
|
154 | * position <code>begin</code> in the array and including <code>length</code> |
|
155 | * values. |
|
156 | * <p> |
|
157 | * Calls to this method do not modify the internal <code>quantile</code> |
|
158 | * state of this statistic. |
|
159 | * <p> |
|
160 | * <ul> |
|
161 | * <li>Returns <code>Double.NaN</code> if <code>length = 0</code></li> |
|
162 | * <li>Returns (for any value of <code>p</code>) <code>values[begin]</code> |
|
163 | * if <code>length = 1 </code></li> |
|
164 | * <li>Throws <code>IllegalArgumentException</code> if <code>values</code> |
|
165 | * is null , <code>begin</code> or <code>length</code> is invalid, or |
|
166 | * <code>p</code> is not a valid quantile value (p must be greater than 0 |
|
167 | * and less than or equal to 100)</li> |
|
168 | * </ul> |
|
169 | * <p> |
|
170 | * See {@link Percentile} for a description of the percentile estimation |
|
171 | * algorithm used. |
|
172 | * |
|
173 | * @param values array of input values |
|
174 | * @param p the percentile to compute |
|
175 | * @param begin the first (0-based) element to include in the computation |
|
176 | * @param length the number of array elements to include |
|
177 | * @return the percentile value |
|
178 | * @throws IllegalArgumentException if the parameters are not valid or the |
|
179 | * input array is null |
|
180 | */ |
|
181 | public double evaluate(final double[] values, final int begin, |
|
182 | final int length, final double p) { |
|
183 | ||
184 | 508 | test(values, begin, length); |
185 | ||
186 | 506 | if ((p > 100) || (p <= 0)) { |
187 | 4 | throw new IllegalArgumentException("invalid quantile value: " + p); |
188 | } |
|
189 | 502 | double n = (double) length; |
190 | 502 | if (n == 0) { |
191 | 16 | return Double.NaN; |
192 | } |
|
193 | 486 | if (n == 1) { |
194 | 24 | return values[begin]; // always return single value for n = 1 |
195 | } |
|
196 | 462 | double pos = p * (n + 1) / 100; |
197 | 462 | double fpos = Math.floor(pos); |
198 | 462 | int intPos = (int) fpos; |
199 | 462 | double dif = pos - fpos; |
200 | 462 | double[] sorted = new double[length]; |
201 | 462 | System.arraycopy(values, begin, sorted, 0, length); |
202 | 462 | Arrays.sort(sorted); |
203 | ||
204 | 462 | if (pos < 1) { |
205 | 4 | return sorted[0]; |
206 | } |
|
207 | 458 | if (pos >= n) { |
208 | 8 | return sorted[length - 1]; |
209 | } |
|
210 | 450 | double lower = sorted[intPos - 1]; |
211 | 450 | double upper = sorted[intPos]; |
212 | 450 | return lower + dif * (upper - lower); |
213 | } |
|
214 | ||
215 | /** |
|
216 | * Returns the value of the quantile field (determines what percentile is |
|
217 | * computed when evaluate() is called with no quantile argument). |
|
218 | * |
|
219 | * @return quantile |
|
220 | */ |
|
221 | public double getQuantile() { |
|
222 | 2 | return quantile; |
223 | } |
|
224 | ||
225 | /** |
|
226 | * Sets the value of the quantile field (determines what percentile is |
|
227 | * computed when evaluate() is called with no quantile argument). |
|
228 | * |
|
229 | * @param p a value between 0 < p <= 100 |
|
230 | * @throws IllegalArgumentException if p is not greater than 0 and less |
|
231 | * than or equal to 100 |
|
232 | */ |
|
233 | public void setQuantile(final double p) { |
|
234 | 498 | if (p <= 0 || p > 100) { |
235 | 12 | throw new IllegalArgumentException("Illegal quantile value: " + p); |
236 | } |
|
237 | 486 | quantile = p; |
238 | 486 | } |
239 | ||
240 | } |