| Classes in this File | Line Coverage | Branch Coverage | Complexity | ||||||||
| SimpleRegression |
|
| 1.5416666666666667;1.542 |
| 1 | /* |
|
| 2 | * Copyright 2003-2004 The Apache Software Foundation. |
|
| 3 | * |
|
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
|
| 5 | * you may not use this file except in compliance with the License. |
|
| 6 | * You may obtain a copy of the License at |
|
| 7 | * |
|
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
|
| 9 | * |
|
| 10 | * Unless required by applicable law or agreed to in writing, software |
|
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
|
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
| 13 | * See the License for the specific language governing permissions and |
|
| 14 | * limitations under the License. |
|
| 15 | */ |
|
| 16 | ||
| 17 | package org.apache.commons.math.stat.regression; |
|
| 18 | import java.io.Serializable; |
|
| 19 | ||
| 20 | import org.apache.commons.math.MathException; |
|
| 21 | import org.apache.commons.math.distribution.DistributionFactory; |
|
| 22 | import org.apache.commons.math.distribution.TDistribution; |
|
| 23 | ||
| 24 | /** |
|
| 25 | * Estimates an ordinary least squares regression model |
|
| 26 | * with one independent variable. |
|
| 27 | * <p> |
|
| 28 | * <code> y = intercept + slope * x </code> |
|
| 29 | * <p> |
|
| 30 | * Standard errors for <code>intercept</code> and <code>slope</code> are |
|
| 31 | * available as well as ANOVA, r-square and Pearson's r statistics. |
|
| 32 | * <p> |
|
| 33 | * Observations (x,y pairs) can be added to the model one at a time or they |
|
| 34 | * can be provided in a 2-dimensional array. The observations are not stored |
|
| 35 | * in memory, so there is no limit to the number of observations that can be |
|
| 36 | * added to the model. |
|
| 37 | * <p> |
|
| 38 | * <strong>Usage Notes</strong>: <ul> |
|
| 39 | * <li> When there are fewer than two observations in the model, or when |
|
| 40 | * there is no variation in the x values (i.e. all x values are the same) |
|
| 41 | * all statistics return <code>NaN</code>. At least two observations with |
|
| 42 | * different x coordinates are requred to estimate a bivariate regression |
|
| 43 | * model. |
|
| 44 | * </li> |
|
| 45 | * <li> getters for the statistics always compute values based on the current |
|
| 46 | * set of observations -- i.e., you can get statistics, then add more data |
|
| 47 | * and get updated statistics without using a new instance. There is no |
|
| 48 | * "compute" method that updates all statistics. Each of the getters performs |
|
| 49 | * the necessary computations to return the requested statistic.</li> |
|
| 50 | * </ul> |
|
| 51 | * |
|
| 52 | * @version $Revision$ $Date: 2005-02-26 05:11:52 -0800 (Sat, 26 Feb 2005) $ |
|
| 53 | */ |
|
| 54 | public class SimpleRegression implements Serializable { |
|
| 55 | ||
| 56 | /** Serializable version identifier */ |
|
| 57 | static final long serialVersionUID = -3004689053607543335L; |
|
| 58 | ||
| 59 | /** sum of x values */ |
|
| 60 | 20 | private double sumX = 0d; |
| 61 | ||
| 62 | /** total variation in x (sum of squared deviations from xbar) */ |
|
| 63 | 20 | private double sumXX = 0d; |
| 64 | ||
| 65 | /** sum of y values */ |
|
| 66 | 20 | private double sumY = 0d; |
| 67 | ||
| 68 | /** total variation in y (sum of squared deviations from ybar) */ |
|
| 69 | 20 | private double sumYY = 0d; |
| 70 | ||
| 71 | /** sum of products */ |
|
| 72 | 20 | private double sumXY = 0d; |
| 73 | ||
| 74 | /** number of observations */ |
|
| 75 | 20 | private long n = 0; |
| 76 | ||
| 77 | /** mean of accumulated x values, used in updating formulas */ |
|
| 78 | 20 | private double xbar = 0; |
| 79 | ||
| 80 | /** mean of accumulated y values, used in updating formulas */ |
|
| 81 | 20 | private double ybar = 0; |
| 82 | ||
| 83 | // ---------------------Public methods-------------------------------------- |
|
| 84 | ||
| 85 | /** |
|
| 86 | * Create an empty SimpleRegression instance |
|
| 87 | */ |
|
| 88 | public SimpleRegression() { |
|
| 89 | 20 | super(); |
| 90 | 20 | } |
| 91 | ||
| 92 | /** |
|
| 93 | * Adds the observation (x,y) to the regression data set. |
|
| 94 | * <p> |
|
| 95 | * Uses updating formulas for means and sums of squares defined in |
|
| 96 | * "Algorithms for Computing the Sample Variance: Analysis and |
|
| 97 | * Recommendations", Chan, T.F., Golub, G.H., and LeVeque, R.J. |
|
| 98 | * 1983, American Statistician, vol. 37, pp. 242-247, referenced in |
|
| 99 | * Weisberg, S. "Applied Linear Regression". 2nd Ed. 1985 |
|
| 100 | * |
|
| 101 | * |
|
| 102 | * @param x independent variable value |
|
| 103 | * @param y dependent variable value |
|
| 104 | */ |
|
| 105 | public void addData(double x, double y) { |
|
| 106 | 886 | if (n == 0) { |
| 107 | 22 | xbar = x; |
| 108 | 22 | ybar = y; |
| 109 | } else { |
|
| 110 | 864 | double dx = x - xbar; |
| 111 | 864 | double dy = y - ybar; |
| 112 | 864 | sumXX += dx * dx * (double) n / (double) (n + 1.0); |
| 113 | 864 | sumYY += dy * dy * (double) n / (double) (n + 1.0); |
| 114 | 864 | sumXY += dx * dy * (double) n / (double) (n + 1.0); |
| 115 | 864 | xbar += dx / (double) (n + 1.0); |
| 116 | 864 | ybar += dy / (double) (n + 1.0); |
| 117 | } |
|
| 118 | 886 | sumX += x; |
| 119 | 886 | sumY += y; |
| 120 | 886 | n++; |
| 121 | 886 | } |
| 122 | ||
| 123 | /** |
|
| 124 | * Adds the observations represented by the elements in |
|
| 125 | * <code>data</code>. |
|
| 126 | * <p> |
|
| 127 | * <code>(data[0][0],data[0][1])</code> will be the first observation, then |
|
| 128 | * <code>(data[1][0],data[1][1])</code>, etc. |
|
| 129 | * <p> |
|
| 130 | * This method does not replace data that has already been added. The |
|
| 131 | * observations represented by <code>data</code> are added to the existing |
|
| 132 | * dataset. |
|
| 133 | * <p> |
|
| 134 | * To replace all data, use <code>clear()</code> before adding the new |
|
| 135 | * data. |
|
| 136 | * |
|
| 137 | * @param data array of observations to be added |
|
| 138 | */ |
|
| 139 | public void addData(double[][] data) { |
|
| 140 | 216 | for (int i = 0; i < data.length; i++) { |
| 141 | 204 | addData(data[i][0], data[i][1]); |
| 142 | } |
|
| 143 | 12 | } |
| 144 | ||
| 145 | /** |
|
| 146 | * Clears all data from the model. |
|
| 147 | */ |
|
| 148 | public void clear() { |
|
| 149 | 2 | sumX = 0d; |
| 150 | 2 | sumXX = 0d; |
| 151 | 2 | sumY = 0d; |
| 152 | 2 | sumYY = 0d; |
| 153 | 2 | sumXY = 0d; |
| 154 | 2 | n = 0; |
| 155 | 2 | } |
| 156 | ||
| 157 | /** |
|
| 158 | * Returns the number of observations that have been added to the model. |
|
| 159 | * |
|
| 160 | * @return n number of observations that have been added. |
|
| 161 | */ |
|
| 162 | public long getN() { |
|
| 163 | 10 | return n; |
| 164 | } |
|
| 165 | ||
| 166 | /** |
|
| 167 | * Returns the "predicted" <code>y</code> value associated with the |
|
| 168 | * supplied <code>x</code> value, based on the data that has been |
|
| 169 | * added to the model when this method is activated. |
|
| 170 | * <p> |
|
| 171 | * <code> predict(x) = intercept + slope * x </code> |
|
| 172 | * <p> |
|
| 173 | * <strong>Preconditions</strong>: <ul> |
|
| 174 | * <li>At least two observations (with at least two different x values) |
|
| 175 | * must have been added before invoking this method. If this method is |
|
| 176 | * invoked before a model can be estimated, <code>Double,NaN</code> is |
|
| 177 | * returned. |
|
| 178 | * </li></ul> |
|
| 179 | * |
|
| 180 | * @param x input <code>x</code> value |
|
| 181 | * @return predicted <code>y</code> value |
|
| 182 | */ |
|
| 183 | public double predict(double x) { |
|
| 184 | 10 | double b1 = getSlope(); |
| 185 | 10 | return getIntercept(b1) + b1 * x; |
| 186 | } |
|
| 187 | ||
| 188 | /** |
|
| 189 | * Returns the intercept of the estimated regression line. |
|
| 190 | * <p> |
|
| 191 | * The least squares estimate of the intercept is computed using the |
|
| 192 | * <a href="http://www.xycoon.com/estimation4.htm">normal equations</a>. |
|
| 193 | * The intercept is sometimes denoted b0. |
|
| 194 | * <p> |
|
| 195 | * <strong>Preconditions</strong>: <ul> |
|
| 196 | * <li>At least two observations (with at least two different x values) |
|
| 197 | * must have been added before invoking this method. If this method is |
|
| 198 | * invoked before a model can be estimated, <code>Double,NaN</code> is |
|
| 199 | * returned. |
|
| 200 | * </li></ul> |
|
| 201 | * |
|
| 202 | * @return the intercept of the regression line |
|
| 203 | */ |
|
| 204 | public double getIntercept() { |
|
| 205 | 8 | return getIntercept(getSlope()); |
| 206 | } |
|
| 207 | ||
| 208 | /** |
|
| 209 | * Returns the slope of the estimated regression line. |
|
| 210 | * <p> |
|
| 211 | * The least squares estimate of the slope is computed using the |
|
| 212 | * <a href="http://www.xycoon.com/estimation4.htm">normal equations</a>. |
|
| 213 | * The slope is sometimes denoted b1. |
|
| 214 | * <p> |
|
| 215 | * <strong>Preconditions</strong>: <ul> |
|
| 216 | * <li>At least two observations (with at least two different x values) |
|
| 217 | * must have been added before invoking this method. If this method is |
|
| 218 | * invoked before a model can be estimated, <code>Double.NaN</code> is |
|
| 219 | * returned. |
|
| 220 | * </li></ul> |
|
| 221 | * |
|
| 222 | * @return the slope of the regression line |
|
| 223 | */ |
|
| 224 | public double getSlope() { |
|
| 225 | 118 | if (n < 2) { |
| 226 | 14 | return Double.NaN; //not enough data |
| 227 | } |
|
| 228 | 104 | if (Math.abs(sumXX) < 10 * Double.MIN_VALUE) { |
| 229 | 14 | return Double.NaN; //not enough variation in x |
| 230 | } |
|
| 231 | 90 | return sumXY / sumXX; |
| 232 | } |
|
| 233 | ||
| 234 | /** |
|
| 235 | * Returns the <a href="http://www.xycoon.com/SumOfSquares.htm"> |
|
| 236 | * sum of squared errors</a> (SSE) associated with the regression |
|
| 237 | * model. |
|
| 238 | * <p> |
|
| 239 | * <strong>Preconditions</strong>: <ul> |
|
| 240 | * <li>At least two observations (with at least two different x values) |
|
| 241 | * must have been added before invoking this method. If this method is |
|
| 242 | * invoked before a model can be estimated, <code>Double,NaN</code> is |
|
| 243 | * returned. |
|
| 244 | * </li></ul> |
|
| 245 | * |
|
| 246 | * @return sum of squared errors associated with the regression model |
|
| 247 | */ |
|
| 248 | public double getSumSquaredErrors() { |
|
| 249 | 48 | return getSumSquaredErrors(getSlope()); |
| 250 | } |
|
| 251 | ||
| 252 | /** |
|
| 253 | * Returns the sum of squared deviations of the y values about their mean. |
|
| 254 | * <p> |
|
| 255 | * This is defined as SSTO |
|
| 256 | * <a href="http://www.xycoon.com/SumOfSquares.htm">here</a>. |
|
| 257 | * <p> |
|
| 258 | * If <code>n < 2</code>, this returns <code>Double.NaN</code>. |
|
| 259 | * |
|
| 260 | * @return sum of squared deviations of y values |
|
| 261 | */ |
|
| 262 | public double getTotalSumSquares() { |
|
| 263 | 26 | if (n < 2) { |
| 264 | 6 | return Double.NaN; |
| 265 | } |
|
| 266 | 20 | return sumYY; |
| 267 | } |
|
| 268 | ||
| 269 | /** |
|
| 270 | * Returns the sum of squared deviations of the predicted y values about |
|
| 271 | * their mean (which equals the mean of y). |
|
| 272 | * <p> |
|
| 273 | * This is usually abbreviated SSR or SSM. It is defined as SSM |
|
| 274 | * <a href="http://www.xycoon.com/SumOfSquares.htm">here</a> |
|
| 275 | * <p> |
|
| 276 | * <strong>Preconditions</strong>: <ul> |
|
| 277 | * <li>At least two observations (with at least two different x values) |
|
| 278 | * must have been added before invoking this method. If this method is |
|
| 279 | * invoked before a model can be estimated, <code>Double.NaN</code> is |
|
| 280 | * returned. |
|
| 281 | * </li></ul> |
|
| 282 | * |
|
| 283 | * @return sum of squared deviations of predicted y values |
|
| 284 | */ |
|
| 285 | public double getRegressionSumSquares() { |
|
| 286 | 8 | return getRegressionSumSquares(getSlope()); |
| 287 | } |
|
| 288 | ||
| 289 | /** |
|
| 290 | * Returns the sum of squared errors divided by the degrees of freedom, |
|
| 291 | * usually abbreviated MSE. |
|
| 292 | * <p> |
|
| 293 | * If there are fewer than <strong>three</strong> data pairs in the model, |
|
| 294 | * or if there is no variation in <code>x</code>, this returns |
|
| 295 | * <code>Double.NaN</code>. |
|
| 296 | * |
|
| 297 | * @return sum of squared deviations of y values |
|
| 298 | */ |
|
| 299 | public double getMeanSquareError() { |
|
| 300 | 58 | if (n < 3) { |
| 301 | 18 | return Double.NaN; |
| 302 | } |
|
| 303 | 40 | return getSumSquaredErrors() / (double) (n - 2); |
| 304 | } |
|
| 305 | ||
| 306 | /** |
|
| 307 | * Returns <a href="http://mathworld.wolfram.com/CorrelationCoefficient.html"> |
|
| 308 | * Pearson's product moment correlation coefficient</a>, |
|
| 309 | * usually denoted r. |
|
| 310 | * <p> |
|
| 311 | * <strong>Preconditions</strong>: <ul> |
|
| 312 | * <li>At least two observations (with at least two different x values) |
|
| 313 | * must have been added before invoking this method. If this method is |
|
| 314 | * invoked before a model can be estimated, <code>Double,NaN</code> is |
|
| 315 | * returned. |
|
| 316 | * </li></ul> |
|
| 317 | * |
|
| 318 | * @return Pearson's r |
|
| 319 | */ |
|
| 320 | public double getR() { |
|
| 321 | 8 | double b1 = getSlope(); |
| 322 | 8 | double result = Math.sqrt(getRSquare(b1)); |
| 323 | 8 | if (b1 < 0) { |
| 324 | 2 | result = -result; |
| 325 | } |
|
| 326 | 8 | return result; |
| 327 | } |
|
| 328 | ||
| 329 | /** |
|
| 330 | * Returns the <a href="http://www.xycoon.com/coefficient1.htm"> |
|
| 331 | * coefficient of determination</a>, |
|
| 332 | * usually denoted r-square. |
|
| 333 | * <p> |
|
| 334 | * <strong>Preconditions</strong>: <ul> |
|
| 335 | * <li>At least two observations (with at least two different x values) |
|
| 336 | * must have been added before invoking this method. If this method is |
|
| 337 | * invoked before a model can be estimated, <code>Double,NaN</code> is |
|
| 338 | * returned. |
|
| 339 | * </li></ul> |
|
| 340 | * |
|
| 341 | * @return r-square |
|
| 342 | */ |
|
| 343 | public double getRSquare() { |
|
| 344 | 12 | return getRSquare(getSlope()); |
| 345 | } |
|
| 346 | ||
| 347 | /** |
|
| 348 | * Returns the <a href="http://www.xycoon.com/standarderrorb0.htm"> |
|
| 349 | * standard error of the intercept estimate</a>, |
|
| 350 | * usually denoted s(b0). |
|
| 351 | * <p> |
|
| 352 | * If there are fewer that <strong>three</strong> observations in the |
|
| 353 | * model, or if there is no variation in x, this returns |
|
| 354 | * <code>Double.NaN</code>. |
|
| 355 | * |
|
| 356 | * @return standard error associated with intercept estimate |
|
| 357 | */ |
|
| 358 | public double getInterceptStdErr() { |
|
| 359 | 14 | return Math.sqrt( |
| 360 | getMeanSquareError() * ((1d / (double) n) + (xbar * xbar) / sumXX)); |
|
| 361 | } |
|
| 362 | ||
| 363 | /** |
|
| 364 | * Returns the <a href="http://www.xycoon.com/standerrorb(1).htm">standard |
|
| 365 | * error of the slope estimate</a>, |
|
| 366 | * usually denoted s(b1). |
|
| 367 | * <p> |
|
| 368 | * If there are fewer that <strong>three</strong> data pairs in the model, |
|
| 369 | * or if there is no variation in x, this returns <code>Double.NaN</code>. |
|
| 370 | * |
|
| 371 | * @return standard error associated with slope estimate |
|
| 372 | */ |
|
| 373 | public double getSlopeStdErr() { |
|
| 374 | 34 | return Math.sqrt(getMeanSquareError() / sumXX); |
| 375 | } |
|
| 376 | ||
| 377 | /** |
|
| 378 | * Returns the half-width of a 95% confidence interval for the slope |
|
| 379 | * estimate. |
|
| 380 | * <p> |
|
| 381 | * The 95% confidence interval is |
|
| 382 | * <p> |
|
| 383 | * <code>(getSlope() - getSlopeConfidenceInterval(), |
|
| 384 | * getSlope() + getSlopeConfidenceInterval())</code> |
|
| 385 | * <p> |
|
| 386 | * If there are fewer that <strong>three</strong> observations in the |
|
| 387 | * model, or if there is no variation in x, this returns |
|
| 388 | * <code>Double.NaN</code>. |
|
| 389 | * <p> |
|
| 390 | * <strong>Usage Note</strong>:<br> |
|
| 391 | * The validity of this statistic depends on the assumption that the |
|
| 392 | * observations included in the model are drawn from a |
|
| 393 | * <a href="http://mathworld.wolfram.com/BivariateNormalDistribution.html"> |
|
| 394 | * Bivariate Normal Distribution</a>. |
|
| 395 | * |
|
| 396 | * @return half-width of 95% confidence interval for the slope estimate |
|
| 397 | * |
|
| 398 | * @throws MathException if the confidence interval can not be computed. |
|
| 399 | */ |
|
| 400 | public double getSlopeConfidenceInterval() throws MathException { |
|
| 401 | 6 | return getSlopeConfidenceInterval(0.05d); |
| 402 | } |
|
| 403 | ||
| 404 | /** |
|
| 405 | * Returns the half-width of a (100-100*alpha)% confidence interval for |
|
| 406 | * the slope estimate. |
|
| 407 | * <p> |
|
| 408 | * The (100-100*alpha)% confidence interval is |
|
| 409 | * <p> |
|
| 410 | * <code>(getSlope() - getSlopeConfidenceInterval(), |
|
| 411 | * getSlope() + getSlopeConfidenceInterval())</code> |
|
| 412 | * <p> |
|
| 413 | * To request, for example, a 99% confidence interval, use |
|
| 414 | * <code>alpha = .01</code> |
|
| 415 | * <p> |
|
| 416 | * <strong>Usage Note</strong>:<br> |
|
| 417 | * The validity of this statistic depends on the assumption that the |
|
| 418 | * observations included in the model are drawn from a |
|
| 419 | * <a href="http://mathworld.wolfram.com/BivariateNormalDistribution.html"> |
|
| 420 | * Bivariate Normal Distribution</a>. |
|
| 421 | * <p> |
|
| 422 | * <strong> Preconditions:</strong><ul> |
|
| 423 | * <li>If there are fewer that <strong>three</strong> observations in the |
|
| 424 | * model, or if there is no variation in x, this returns |
|
| 425 | * <code>Double.NaN</code>. |
|
| 426 | * </li> |
|
| 427 | * <li><code>(0 < alpha < 1)</code>; otherwise an |
|
| 428 | * <code>IllegalArgumentException</code> is thrown. |
|
| 429 | * </li></ul> |
|
| 430 | * |
|
| 431 | * @param alpha the desired significance level |
|
| 432 | * @return half-width of 95% confidence interval for the slope estimate |
|
| 433 | * @throws MathException if the confidence interval can not be computed. |
|
| 434 | */ |
|
| 435 | public double getSlopeConfidenceInterval(double alpha) |
|
| 436 | throws MathException { |
|
| 437 | 10 | if (alpha >= 1 || alpha <= 0) { |
| 438 | 2 | throw new IllegalArgumentException(); |
| 439 | } |
|
| 440 | 8 | return getSlopeStdErr() * |
| 441 | getTDistribution().inverseCumulativeProbability(1d - alpha / 2d); |
|
| 442 | } |
|
| 443 | ||
| 444 | /** |
|
| 445 | * Returns the significance level of the slope (equiv) correlation. |
|
| 446 | * <p> |
|
| 447 | * Specifically, the returned value is the smallest <code>alpha</code> |
|
| 448 | * such that the slope confidence interval with significance level |
|
| 449 | * equal to <code>alpha</code> does not include <code>0</code>. |
|
| 450 | * On regression output, this is often denoted <code>Prob(|t| > 0)</code> |
|
| 451 | * <p> |
|
| 452 | * <strong>Usage Note</strong>:<br> |
|
| 453 | * The validity of this statistic depends on the assumption that the |
|
| 454 | * observations included in the model are drawn from a |
|
| 455 | * <a href="http://mathworld.wolfram.com/BivariateNormalDistribution.html"> |
|
| 456 | * Bivariate Normal Distribution</a>. |
|
| 457 | * <p> |
|
| 458 | * If there are fewer that <strong>three</strong> observations in the |
|
| 459 | * model, or if there is no variation in x, this returns |
|
| 460 | * <code>Double.NaN</code>. |
|
| 461 | * |
|
| 462 | * @return significance level for slope/correlation |
|
| 463 | * @throws MathException if the significance level can not be computed. |
|
| 464 | */ |
|
| 465 | public double getSignificance() throws MathException { |
|
| 466 | 12 | return 2d* (1.0 - getTDistribution().cumulativeProbability( |
| 467 | Math.abs(getSlope()) / getSlopeStdErr())); |
|
| 468 | } |
|
| 469 | ||
| 470 | // ---------------------Private methods----------------------------------- |
|
| 471 | ||
| 472 | /** |
|
| 473 | * Returns the intercept of the estimated regression line, given the slope. |
|
| 474 | * <p> |
|
| 475 | * Will return <code>NaN</code> if slope is <code>NaN</code>. |
|
| 476 | * |
|
| 477 | * @param slope current slope |
|
| 478 | * @return the intercept of the regression line |
|
| 479 | */ |
|
| 480 | private double getIntercept(double slope) { |
|
| 481 | 18 | return (sumY - slope * sumX) / ((double) n); |
| 482 | } |
|
| 483 | ||
| 484 | /** |
|
| 485 | * Returns the sum of squared errors associated with the regression |
|
| 486 | * model, using the slope of the regression line. |
|
| 487 | * <p> |
|
| 488 | * Returns NaN if the slope is NaN. |
|
| 489 | * |
|
| 490 | * @param b1 current slope |
|
| 491 | * @return sum of squared errors associated with the regression model |
|
| 492 | */ |
|
| 493 | private double getSumSquaredErrors(double b1) { |
|
| 494 | 68 | return sumYY - sumXY * sumXY / sumXX; |
| 495 | } |
|
| 496 | ||
| 497 | /** |
|
| 498 | * Computes r-square from the slope. |
|
| 499 | * <p> |
|
| 500 | * will return NaN if slope is Nan. |
|
| 501 | * |
|
| 502 | * @param b1 current slope |
|
| 503 | * @return r-square |
|
| 504 | */ |
|
| 505 | private double getRSquare(double b1) { |
|
| 506 | 20 | double ssto = getTotalSumSquares(); |
| 507 | 20 | return (ssto - getSumSquaredErrors(b1)) / ssto; |
| 508 | } |
|
| 509 | ||
| 510 | /** |
|
| 511 | * Computes SSR from b1. |
|
| 512 | * |
|
| 513 | * @param slope regression slope estimate |
|
| 514 | * @return sum of squared deviations of predicted y values |
|
| 515 | */ |
|
| 516 | private double getRegressionSumSquares(double slope) { |
|
| 517 | 8 | return slope * slope * sumXX; |
| 518 | } |
|
| 519 | ||
| 520 | /** |
|
| 521 | * Uses distribution framework to get a t distribution instance |
|
| 522 | * with df = n - 2 |
|
| 523 | * |
|
| 524 | * @return t distribution with df = n - 2 |
|
| 525 | */ |
|
| 526 | private TDistribution getTDistribution() { |
|
| 527 | 20 | return DistributionFactory.newInstance().createTDistribution(n - 2); |
| 528 | } |
|
| 529 | } |