| Classes in this File | Line Coverage | Branch Coverage | Complexity | ||||||||
| Percentile |
|
| 2.7142857142857144;2.714 |
| 1 | /* |
|
| 2 | * Copyright 2003-2004 The Apache Software Foundation. |
|
| 3 | * |
|
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
|
| 5 | * you may not use this file except in compliance with the License. |
|
| 6 | * You may obtain a copy of the License at |
|
| 7 | * |
|
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
|
| 9 | * |
|
| 10 | * Unless required by applicable law or agreed to in writing, software |
|
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
|
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
| 13 | * See the License for the specific language governing permissions and |
|
| 14 | * limitations under the License. |
|
| 15 | */ |
|
| 16 | package org.apache.commons.math.stat.descriptive.rank; |
|
| 17 | ||
| 18 | import java.io.Serializable; |
|
| 19 | import java.util.Arrays; |
|
| 20 | import org.apache.commons.math.stat.descriptive.AbstractUnivariateStatistic; |
|
| 21 | ||
| 22 | /** |
|
| 23 | * Provides percentile computation. |
|
| 24 | * <p> |
|
| 25 | * There are several commonly used methods for estimating percentiles (a.k.a. |
|
| 26 | * quantiles) based on sample data. For large samples, the different methods |
|
| 27 | * agree closely, but when sample sizes are small, different methods will give |
|
| 28 | * significantly different results. The algorithm implemented here works as follows: |
|
| 29 | * <ol> |
|
| 30 | * <li>Let <code>n</code> be the length of the (sorted) array and |
|
| 31 | * <code>0 < p <= 100</code> be the desired percentile.</li> |
|
| 32 | * <li>If <code> n = 1 </code> return the unique array element (regardless of |
|
| 33 | * the value of <code>p</code>); otherwise </li> |
|
| 34 | * <li>Compute the estimated percentile position |
|
| 35 | * <code> pos = p * (n + 1) / 100</code> and the difference, <code>d</code> |
|
| 36 | * between <code>pos</code> and <code>floor(pos)</code> (i.e. the fractional |
|
| 37 | * part of <code>pos</code>). If <code>pos >= n</code> return the largest |
|
| 38 | * element in the array; otherwise</li> |
|
| 39 | * <li>Let <code>lower</code> be the element in position |
|
| 40 | * <code>floor(pos)</code> in the array and let <code>upper</code> be the |
|
| 41 | * next element in the array. Return <code>lower + d * (upper - lower)</code> |
|
| 42 | * </li> |
|
| 43 | * </ol> |
|
| 44 | * <p> |
|
| 45 | * To compute percentiles, the data must be (totally) ordered. Input arrays |
|
| 46 | * are copied and then sorted using {@link java.util.Arrays#sort(double[])}. |
|
| 47 | * The ordering used by <code>Arrays.sort(double[])</code> is the one determined |
|
| 48 | * by {@link java.lang.Double#compareTo(Double)}. This ordering makes |
|
| 49 | * <code>Double.NaN</code> larger than any other value (including |
|
| 50 | * <code>Double.POSITIVE_INFINITY</code>). Therefore, for example, the median |
|
| 51 | * (50th percentile) of |
|
| 52 | * <code>{0, 1, 2, 3, 4, Double.NaN}</code> evaluates to <code>2.5.</code> |
|
| 53 | * <p> |
|
| 54 | * Since percentile estimation usually involves interpolation between array |
|
| 55 | * elements, arrays containing <code>NaN</code> or infinite values will often |
|
| 56 | * result in <code>NaN<code> or infinite values returned. |
|
| 57 | * <p> |
|
| 58 | * <strong>Note that this implementation is not synchronized.</strong> If |
|
| 59 | * multiple threads access an instance of this class concurrently, and at least |
|
| 60 | * one of the threads invokes the <code>increment()</code> or |
|
| 61 | * <code>clear()</code> method, it must be synchronized externally. |
|
| 62 | * |
|
| 63 | * @version $Revision$ $Date: 2005-02-26 05:11:52 -0800 (Sat, 26 Feb 2005) $ |
|
| 64 | */ |
|
| 65 | public class Percentile extends AbstractUnivariateStatistic implements Serializable { |
|
| 66 | ||
| 67 | /** Serializable version identifier */ |
|
| 68 | static final long serialVersionUID = -8091216485095130416L; |
|
| 69 | ||
| 70 | /** Determines what percentile is computed when evaluate() is activated |
|
| 71 | * with no quantile argument */ |
|
| 72 | 488 | private double quantile = 0.0; |
| 73 | ||
| 74 | /** |
|
| 75 | * Constructs a Percentile with a default quantile |
|
| 76 | * value of 50.0. |
|
| 77 | */ |
|
| 78 | public Percentile() { |
|
| 79 | 10 | this(50.0); |
| 80 | 10 | } |
| 81 | ||
| 82 | /** |
|
| 83 | * Constructs a Percentile with the specific quantile value. |
|
| 84 | * @param p the quantile |
|
| 85 | * @throws IllegalArgumentException if p is not greater than 0 and less |
|
| 86 | * than or equal to 100 |
|
| 87 | */ |
|
| 88 | 488 | public Percentile(final double p) { |
| 89 | 488 | setQuantile(p); |
| 90 | 478 | } |
| 91 | ||
| 92 | /** |
|
| 93 | * Returns an estimate of the <code>p</code>th percentile of the values |
|
| 94 | * in the <code>values</code> array. |
|
| 95 | * <p> |
|
| 96 | * Calls to this method do not modify the internal <code>quantile</code> |
|
| 97 | * state of this statistic. |
|
| 98 | * <p> |
|
| 99 | * <ul> |
|
| 100 | * <li>Returns <code>Double.NaN</code> if <code>values</code> has length |
|
| 101 | * <code>0</code></li> |
|
| 102 | * <li>Returns (for any value of <code>p</code>) <code>values[0]</code> |
|
| 103 | * if <code>values</code> has length <code>1</code></li> |
|
| 104 | * <li>Throws <code>IllegalArgumentException</code> if <code>values</code> |
|
| 105 | * is null or p is not a valid quantile value (p must be greater than 0 |
|
| 106 | * and less than or equal to 100) </li> |
|
| 107 | * </ul> |
|
| 108 | * <p> |
|
| 109 | * See {@link Percentile} for a description of the percentile estimation |
|
| 110 | * algorithm used. |
|
| 111 | * |
|
| 112 | * @param values input array of values |
|
| 113 | * @param p the percentile value to compute |
|
| 114 | * @return the percentile value or Double.NaN if the array is empty |
|
| 115 | * @throws IllegalArgumentException if <code>values</code> is null |
|
| 116 | * or p is invalid |
|
| 117 | */ |
|
| 118 | public double evaluate(final double[] values, final double p) { |
|
| 119 | 8 | test(values, 0, 0); |
| 120 | 6 | return evaluate(values, 0, values.length, p); |
| 121 | } |
|
| 122 | ||
| 123 | /** |
|
| 124 | * Returns an estimate of the <code>quantile</code>th percentile of the |
|
| 125 | * designated values in the <code>values</code> array. The quantile |
|
| 126 | * estimated is determined by the <code>quantile</code> property. |
|
| 127 | * <p> |
|
| 128 | * <ul> |
|
| 129 | * <li>Returns <code>Double.NaN</code> if <code>length = 0</code></li> |
|
| 130 | * <li>Returns (for any value of <code>quantile</code>) |
|
| 131 | * <code>values[begin]</code> if <code>length = 1 </code></li> |
|
| 132 | * <li>Throws <code>IllegalArgumentException</code> if <code>values</code> |
|
| 133 | * is null, or <code>start</code> or <code>length</code> |
|
| 134 | * is invalid</li> |
|
| 135 | * </ul> |
|
| 136 | * <p> |
|
| 137 | * See {@link Percentile} for a description of the percentile estimation |
|
| 138 | * algorithm used. |
|
| 139 | * |
|
| 140 | * @param values the input array |
|
| 141 | * @param start index of the first array element to include |
|
| 142 | * @param length the number of elements to include |
|
| 143 | * @return the percentile value |
|
| 144 | * @throws IllegalArgumentException if the parameters are not valid |
|
| 145 | * |
|
| 146 | */ |
|
| 147 | public double evaluate( final double[] values, final int start, final int length) { |
|
| 148 | 484 | return evaluate(values, start, length, quantile); |
| 149 | } |
|
| 150 | ||
| 151 | /** |
|
| 152 | * Returns an estimate of the <code>p</code>th percentile of the values |
|
| 153 | * in the <code>values</code> array, starting with the element in (0-based) |
|
| 154 | * position <code>begin</code> in the array and including <code>length</code> |
|
| 155 | * values. |
|
| 156 | * <p> |
|
| 157 | * Calls to this method do not modify the internal <code>quantile</code> |
|
| 158 | * state of this statistic. |
|
| 159 | * <p> |
|
| 160 | * <ul> |
|
| 161 | * <li>Returns <code>Double.NaN</code> if <code>length = 0</code></li> |
|
| 162 | * <li>Returns (for any value of <code>p</code>) <code>values[begin]</code> |
|
| 163 | * if <code>length = 1 </code></li> |
|
| 164 | * <li>Throws <code>IllegalArgumentException</code> if <code>values</code> |
|
| 165 | * is null , <code>begin</code> or <code>length</code> is invalid, or |
|
| 166 | * <code>p</code> is not a valid quantile value (p must be greater than 0 |
|
| 167 | * and less than or equal to 100)</li> |
|
| 168 | * </ul> |
|
| 169 | * <p> |
|
| 170 | * See {@link Percentile} for a description of the percentile estimation |
|
| 171 | * algorithm used. |
|
| 172 | * |
|
| 173 | * @param values array of input values |
|
| 174 | * @param p the percentile to compute |
|
| 175 | * @param begin the first (0-based) element to include in the computation |
|
| 176 | * @param length the number of array elements to include |
|
| 177 | * @return the percentile value |
|
| 178 | * @throws IllegalArgumentException if the parameters are not valid or the |
|
| 179 | * input array is null |
|
| 180 | */ |
|
| 181 | public double evaluate(final double[] values, final int begin, |
|
| 182 | final int length, final double p) { |
|
| 183 | ||
| 184 | 508 | test(values, begin, length); |
| 185 | ||
| 186 | 506 | if ((p > 100) || (p <= 0)) { |
| 187 | 4 | throw new IllegalArgumentException("invalid quantile value: " + p); |
| 188 | } |
|
| 189 | 502 | double n = (double) length; |
| 190 | 502 | if (n == 0) { |
| 191 | 16 | return Double.NaN; |
| 192 | } |
|
| 193 | 486 | if (n == 1) { |
| 194 | 24 | return values[begin]; // always return single value for n = 1 |
| 195 | } |
|
| 196 | 462 | double pos = p * (n + 1) / 100; |
| 197 | 462 | double fpos = Math.floor(pos); |
| 198 | 462 | int intPos = (int) fpos; |
| 199 | 462 | double dif = pos - fpos; |
| 200 | 462 | double[] sorted = new double[length]; |
| 201 | 462 | System.arraycopy(values, begin, sorted, 0, length); |
| 202 | 462 | Arrays.sort(sorted); |
| 203 | ||
| 204 | 462 | if (pos < 1) { |
| 205 | 4 | return sorted[0]; |
| 206 | } |
|
| 207 | 458 | if (pos >= n) { |
| 208 | 8 | return sorted[length - 1]; |
| 209 | } |
|
| 210 | 450 | double lower = sorted[intPos - 1]; |
| 211 | 450 | double upper = sorted[intPos]; |
| 212 | 450 | return lower + dif * (upper - lower); |
| 213 | } |
|
| 214 | ||
| 215 | /** |
|
| 216 | * Returns the value of the quantile field (determines what percentile is |
|
| 217 | * computed when evaluate() is called with no quantile argument). |
|
| 218 | * |
|
| 219 | * @return quantile |
|
| 220 | */ |
|
| 221 | public double getQuantile() { |
|
| 222 | 2 | return quantile; |
| 223 | } |
|
| 224 | ||
| 225 | /** |
|
| 226 | * Sets the value of the quantile field (determines what percentile is |
|
| 227 | * computed when evaluate() is called with no quantile argument). |
|
| 228 | * |
|
| 229 | * @param p a value between 0 < p <= 100 |
|
| 230 | * @throws IllegalArgumentException if p is not greater than 0 and less |
|
| 231 | * than or equal to 100 |
|
| 232 | */ |
|
| 233 | public void setQuantile(final double p) { |
|
| 234 | 498 | if (p <= 0 || p > 100) { |
| 235 | 12 | throw new IllegalArgumentException("Illegal quantile value: " + p); |
| 236 | } |
|
| 237 | 486 | quantile = p; |
| 238 | 486 | } |
| 239 | ||
| 240 | } |