001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 package org.apache.commons.math.stat.regression; 018 019 import java.util.Random; 020 021 import junit.framework.Test; 022 import junit.framework.TestCase; 023 import junit.framework.TestSuite; 024 /** 025 * Test cases for the TestStatistic class. 026 * 027 * @version $Revision: 764749 $ $Date: 2009-04-14 07:51:40 -0400 (Tue, 14 Apr 2009) $ 028 */ 029 030 public final class SimpleRegressionTest extends TestCase { 031 032 /* 033 * NIST "Norris" refernce data set from 034 * http://www.itl.nist.gov/div898/strd/lls/data/LINKS/DATA/Norris.dat 035 * Strangely, order is {y,x} 036 */ 037 private double[][] data = { { 0.1, 0.2 }, {338.8, 337.4 }, {118.1, 118.2 }, 038 {888.0, 884.6 }, {9.2, 10.1 }, {228.1, 226.5 }, {668.5, 666.3 }, {998.5, 996.3 }, 039 {449.1, 448.6 }, {778.9, 777.0 }, {559.2, 558.2 }, {0.3, 0.4 }, {0.1, 0.6 }, {778.1, 775.5 }, 040 {668.8, 666.9 }, {339.3, 338.0 }, {448.9, 447.5 }, {10.8, 11.6 }, {557.7, 556.0 }, 041 {228.3, 228.1 }, {998.0, 995.8 }, {888.8, 887.6 }, {119.6, 120.2 }, {0.3, 0.3 }, 042 {0.6, 0.3 }, {557.6, 556.8 }, {339.3, 339.1 }, {888.0, 887.2 }, {998.5, 999.0 }, 043 {778.9, 779.0 }, {10.2, 11.1 }, {117.6, 118.3 }, {228.9, 229.2 }, {668.4, 669.1 }, 044 {449.2, 448.9 }, {0.2, 0.5 } 045 }; 046 047 /* 048 * Correlation example from 049 * http://www.xycoon.com/correlation.htm 050 */ 051 private double[][] corrData = { { 101.0, 99.2 }, {100.1, 99.0 }, {100.0, 100.0 }, 052 {90.6, 111.6 }, {86.5, 122.2 }, {89.7, 117.6 }, {90.6, 121.1 }, {82.8, 136.0 }, 053 {70.1, 154.2 }, {65.4, 153.6 }, {61.3, 158.5 }, {62.5, 140.6 }, {63.6, 136.2 }, 054 {52.6, 168.0 }, {59.7, 154.3 }, {59.5, 149.0 }, {61.3, 165.5 } 055 }; 056 057 /* 058 * From Moore and Mcabe, "Introduction to the Practice of Statistics" 059 * Example 10.3 060 */ 061 private double[][] infData = { { 15.6, 5.2 }, {26.8, 6.1 }, {37.8, 8.7 }, {36.4, 8.5 }, 062 {35.5, 8.8 }, {18.6, 4.9 }, {15.3, 4.5 }, {7.9, 2.5 }, {0.0, 1.1 } 063 }; 064 065 /* 066 * Points to remove in the remove tests 067 */ 068 private double[][] removeSingle = {infData[1]}; 069 private double[][] removeMultiple = { infData[1], infData[2] }; 070 private double removeX = infData[0][0]; 071 private double removeY = infData[0][1]; 072 073 074 /* 075 * Data with bad linear fit 076 */ 077 private double[][] infData2 = { { 1, 1 }, {2, 0 }, {3, 5 }, {4, 2 }, 078 {5, -1 }, {6, 12 } 079 }; 080 081 public SimpleRegressionTest(String name) { 082 super(name); 083 } 084 085 public static Test suite() { 086 TestSuite suite = new TestSuite(SimpleRegressionTest.class); 087 suite.setName("BivariateRegression Tests"); 088 return suite; 089 } 090 091 public void testNorris() { 092 SimpleRegression regression = new SimpleRegression(); 093 for (int i = 0; i < data.length; i++) { 094 regression.addData(data[i][1], data[i][0]); 095 } 096 // Tests against certified values from 097 // http://www.itl.nist.gov/div898/strd/lls/data/LINKS/DATA/Norris.dat 098 assertEquals("slope", 1.00211681802045, regression.getSlope(), 10E-12); 099 assertEquals("slope std err", 0.429796848199937E-03, 100 regression.getSlopeStdErr(),10E-12); 101 assertEquals("number of observations", 36, regression.getN()); 102 assertEquals( "intercept", -0.262323073774029, 103 regression.getIntercept(),10E-12); 104 assertEquals("std err intercept", 0.232818234301152, 105 regression.getInterceptStdErr(),10E-12); 106 assertEquals("r-square", 0.999993745883712, 107 regression.getRSquare(), 10E-12); 108 assertEquals("SSR", 4255954.13232369, 109 regression.getRegressionSumSquares(), 10E-9); 110 assertEquals("MSE", 0.782864662630069, 111 regression.getMeanSquareError(), 10E-10); 112 assertEquals("SSE", 26.6173985294224, 113 regression.getSumSquaredErrors(),10E-9); 114 // ------------ End certified data tests 115 116 assertEquals( "predict(0)", -0.262323073774029, 117 regression.predict(0), 10E-12); 118 assertEquals("predict(1)", 1.00211681802045 - 0.262323073774029, 119 regression.predict(1), 10E-12); 120 } 121 122 public void testCorr() { 123 SimpleRegression regression = new SimpleRegression(); 124 regression.addData(corrData); 125 assertEquals("number of observations", 17, regression.getN()); 126 assertEquals("r-square", .896123, regression.getRSquare(), 10E-6); 127 assertEquals("r", -0.94663767742, regression.getR(), 1E-10); 128 } 129 130 public void testNaNs() { 131 SimpleRegression regression = new SimpleRegression(); 132 assertTrue("intercept not NaN", Double.isNaN(regression.getIntercept())); 133 assertTrue("slope not NaN", Double.isNaN(regression.getSlope())); 134 assertTrue("slope std err not NaN", Double.isNaN(regression.getSlopeStdErr())); 135 assertTrue("intercept std err not NaN", Double.isNaN(regression.getInterceptStdErr())); 136 assertTrue("MSE not NaN", Double.isNaN(regression.getMeanSquareError())); 137 assertTrue("e not NaN", Double.isNaN(regression.getR())); 138 assertTrue("r-square not NaN", Double.isNaN(regression.getRSquare())); 139 assertTrue( "RSS not NaN", Double.isNaN(regression.getRegressionSumSquares())); 140 assertTrue("SSE not NaN",Double.isNaN(regression.getSumSquaredErrors())); 141 assertTrue("SSTO not NaN", Double.isNaN(regression.getTotalSumSquares())); 142 assertTrue("predict not NaN", Double.isNaN(regression.predict(0))); 143 144 regression.addData(1, 2); 145 regression.addData(1, 3); 146 147 // No x variation, so these should still blow... 148 assertTrue("intercept not NaN", Double.isNaN(regression.getIntercept())); 149 assertTrue("slope not NaN", Double.isNaN(regression.getSlope())); 150 assertTrue("slope std err not NaN", Double.isNaN(regression.getSlopeStdErr())); 151 assertTrue("intercept std err not NaN", Double.isNaN(regression.getInterceptStdErr())); 152 assertTrue("MSE not NaN", Double.isNaN(regression.getMeanSquareError())); 153 assertTrue("e not NaN", Double.isNaN(regression.getR())); 154 assertTrue("r-square not NaN", Double.isNaN(regression.getRSquare())); 155 assertTrue("RSS not NaN", Double.isNaN(regression.getRegressionSumSquares())); 156 assertTrue("SSE not NaN", Double.isNaN(regression.getSumSquaredErrors())); 157 assertTrue("predict not NaN", Double.isNaN(regression.predict(0))); 158 159 // but SSTO should be OK 160 assertTrue("SSTO NaN", !Double.isNaN(regression.getTotalSumSquares())); 161 162 regression = new SimpleRegression(); 163 164 regression.addData(1, 2); 165 regression.addData(3, 3); 166 167 // All should be OK except MSE, s(b0), s(b1) which need one more df 168 assertTrue("interceptNaN", !Double.isNaN(regression.getIntercept())); 169 assertTrue("slope NaN", !Double.isNaN(regression.getSlope())); 170 assertTrue ("slope std err not NaN", Double.isNaN(regression.getSlopeStdErr())); 171 assertTrue("intercept std err not NaN", Double.isNaN(regression.getInterceptStdErr())); 172 assertTrue("MSE not NaN", Double.isNaN(regression.getMeanSquareError())); 173 assertTrue("r NaN", !Double.isNaN(regression.getR())); 174 assertTrue("r-square NaN", !Double.isNaN(regression.getRSquare())); 175 assertTrue("RSS NaN", !Double.isNaN(regression.getRegressionSumSquares())); 176 assertTrue("SSE NaN", !Double.isNaN(regression.getSumSquaredErrors())); 177 assertTrue("SSTO NaN", !Double.isNaN(regression.getTotalSumSquares())); 178 assertTrue("predict NaN", !Double.isNaN(regression.predict(0))); 179 180 regression.addData(1, 4); 181 182 // MSE, MSE, s(b0), s(b1) should all be OK now 183 assertTrue("MSE NaN", !Double.isNaN(regression.getMeanSquareError())); 184 assertTrue("slope std err NaN", !Double.isNaN(regression.getSlopeStdErr())); 185 assertTrue("intercept std err NaN", !Double.isNaN(regression.getInterceptStdErr())); 186 } 187 188 public void testClear() { 189 SimpleRegression regression = new SimpleRegression(); 190 regression.addData(corrData); 191 assertEquals("number of observations", 17, regression.getN()); 192 regression.clear(); 193 assertEquals("number of observations", 0, regression.getN()); 194 regression.addData(corrData); 195 assertEquals("r-square", .896123, regression.getRSquare(), 10E-6); 196 regression.addData(data); 197 assertEquals("number of observations", 53, regression.getN()); 198 } 199 200 public void testInference() throws Exception { 201 //---------- verified against R, version 1.8.1 ----- 202 // infData 203 SimpleRegression regression = new SimpleRegression(); 204 regression.addData(infData); 205 assertEquals("slope std err", 0.011448491, 206 regression.getSlopeStdErr(), 1E-10); 207 assertEquals("std err intercept", 0.286036932, 208 regression.getInterceptStdErr(),1E-8); 209 assertEquals("significance", 4.596e-07, 210 regression.getSignificance(),1E-8); 211 assertEquals("slope conf interval half-width", 0.0270713794287, 212 regression.getSlopeConfidenceInterval(),1E-8); 213 // infData2 214 regression = new SimpleRegression(); 215 regression.addData(infData2); 216 assertEquals("slope std err", 1.07260253, 217 regression.getSlopeStdErr(), 1E-8); 218 assertEquals("std err intercept",4.17718672, 219 regression.getInterceptStdErr(),1E-8); 220 assertEquals("significance", 0.261829133982, 221 regression.getSignificance(),1E-11); 222 assertEquals("slope conf interval half-width", 2.97802204827, 223 regression.getSlopeConfidenceInterval(),1E-8); 224 //------------- End R-verified tests ------------------------------- 225 226 //FIXME: get a real example to test against with alpha = .01 227 assertTrue("tighter means wider", 228 regression.getSlopeConfidenceInterval() < regression.getSlopeConfidenceInterval(0.01)); 229 230 try { 231 regression.getSlopeConfidenceInterval(1); 232 fail("expecting IllegalArgumentException for alpha = 1"); 233 } catch (IllegalArgumentException ex) { 234 // ignored 235 } 236 237 } 238 239 public void testPerfect() throws Exception { 240 SimpleRegression regression = new SimpleRegression(); 241 int n = 100; 242 for (int i = 0; i < n; i++) { 243 regression.addData(((double) i) / (n - 1), i); 244 } 245 assertEquals(0.0, regression.getSignificance(), 1.0e-5); 246 assertTrue(regression.getSlope() > 0.0); 247 assertTrue(regression.getSumSquaredErrors() >= 0.0); 248 } 249 250 public void testPerfectNegative() throws Exception { 251 SimpleRegression regression = new SimpleRegression(); 252 int n = 100; 253 for (int i = 0; i < n; i++) { 254 regression.addData(- ((double) i) / (n - 1), i); 255 } 256 257 assertEquals(0.0, regression.getSignificance(), 1.0e-5); 258 assertTrue(regression.getSlope() < 0.0); 259 } 260 261 public void testRandom() throws Exception { 262 SimpleRegression regression = new SimpleRegression(); 263 Random random = new Random(1); 264 int n = 100; 265 for (int i = 0; i < n; i++) { 266 regression.addData(((double) i) / (n - 1), random.nextDouble()); 267 } 268 269 assertTrue( 0.0 < regression.getSignificance() 270 && regression.getSignificance() < 1.0); 271 } 272 273 274 // Jira MATH-85 = Bugzilla 39432 275 public void testSSENonNegative() { 276 double[] y = { 8915.102, 8919.302, 8923.502 }; 277 double[] x = { 1.107178495E2, 1.107264895E2, 1.107351295E2 }; 278 SimpleRegression reg = new SimpleRegression(); 279 for (int i = 0; i < x.length; i++) { 280 reg.addData(x[i], y[i]); 281 } 282 assertTrue(reg.getSumSquaredErrors() >= 0.0); 283 } 284 285 // Test remove X,Y (single observation) 286 public void testRemoveXY() throws Exception { 287 // Create regression with inference data then remove to test 288 SimpleRegression regression = new SimpleRegression(); 289 regression.addData(infData); 290 regression.removeData(removeX, removeY); 291 regression.addData(removeX, removeY); 292 // Use the inference assertions to make sure that everything worked 293 assertEquals("slope std err", 0.011448491, 294 regression.getSlopeStdErr(), 1E-10); 295 assertEquals("std err intercept", 0.286036932, 296 regression.getInterceptStdErr(),1E-8); 297 assertEquals("significance", 4.596e-07, 298 regression.getSignificance(),1E-8); 299 assertEquals("slope conf interval half-width", 0.0270713794287, 300 regression.getSlopeConfidenceInterval(),1E-8); 301 } 302 303 304 // Test remove single observation in array 305 public void testRemoveSingle() throws Exception { 306 // Create regression with inference data then remove to test 307 SimpleRegression regression = new SimpleRegression(); 308 regression.addData(infData); 309 regression.removeData(removeSingle); 310 regression.addData(removeSingle); 311 // Use the inference assertions to make sure that everything worked 312 assertEquals("slope std err", 0.011448491, 313 regression.getSlopeStdErr(), 1E-10); 314 assertEquals("std err intercept", 0.286036932, 315 regression.getInterceptStdErr(),1E-8); 316 assertEquals("significance", 4.596e-07, 317 regression.getSignificance(),1E-8); 318 assertEquals("slope conf interval half-width", 0.0270713794287, 319 regression.getSlopeConfidenceInterval(),1E-8); 320 } 321 322 // Test remove multiple observations 323 public void testRemoveMultiple() throws Exception { 324 // Create regression with inference data then remove to test 325 SimpleRegression regression = new SimpleRegression(); 326 regression.addData(infData); 327 regression.removeData(removeMultiple); 328 regression.addData(removeMultiple); 329 // Use the inference assertions to make sure that everything worked 330 assertEquals("slope std err", 0.011448491, 331 regression.getSlopeStdErr(), 1E-10); 332 assertEquals("std err intercept", 0.286036932, 333 regression.getInterceptStdErr(),1E-8); 334 assertEquals("significance", 4.596e-07, 335 regression.getSignificance(),1E-8); 336 assertEquals("slope conf interval half-width", 0.0270713794287, 337 regression.getSlopeConfidenceInterval(),1E-8); 338 } 339 340 // Remove observation when empty 341 public void testRemoveObsFromEmpty() { 342 SimpleRegression regression = new SimpleRegression(); 343 regression.removeData(removeX, removeY); 344 assertEquals(regression.getN(), 0); 345 } 346 347 // Remove single observation to empty 348 public void testRemoveObsFromSingle() { 349 SimpleRegression regression = new SimpleRegression(); 350 regression.addData(removeX, removeY); 351 regression.removeData(removeX, removeY); 352 assertEquals(regression.getN(), 0); 353 } 354 355 // Remove multiple observations to empty 356 public void testRemoveMultipleToEmpty() { 357 SimpleRegression regression = new SimpleRegression(); 358 regression.addData(removeMultiple); 359 regression.removeData(removeMultiple); 360 assertEquals(regression.getN(), 0); 361 } 362 363 // Remove multiple observations past empty (i.e. size of array > n) 364 public void testRemoveMultiplePastEmpty() { 365 SimpleRegression regression = new SimpleRegression(); 366 regression.addData(removeX, removeY); 367 regression.removeData(removeMultiple); 368 assertEquals(regression.getN(), 0); 369 } 370 }