1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.math.stat.correlation;
18  
19  import org.apache.commons.math.TestUtils;
20  import org.apache.commons.math.distribution.TDistribution;
21  import org.apache.commons.math.distribution.TDistributionImpl;
22  import org.apache.commons.math.linear.RealMatrix;
23  import org.apache.commons.math.linear.BlockRealMatrix;
24  
25  import junit.framework.TestCase;
26  
27  public class PearsonsCorrelationTest extends TestCase {
28      
29      protected final double[] longleyData = new double[] {
30              60323,83.0,234289,2356,1590,107608,1947,
31              61122,88.5,259426,2325,1456,108632,1948,
32              60171,88.2,258054,3682,1616,109773,1949,
33              61187,89.5,284599,3351,1650,110929,1950,
34              63221,96.2,328975,2099,3099,112075,1951,
35              63639,98.1,346999,1932,3594,113270,1952,
36              64989,99.0,365385,1870,3547,115094,1953,
37              63761,100.0,363112,3578,3350,116219,1954,
38              66019,101.2,397469,2904,3048,117388,1955,
39              67857,104.6,419180,2822,2857,118734,1956,
40              68169,108.4,442769,2936,2798,120445,1957,
41              66513,110.8,444546,4681,2637,121950,1958,
42              68655,112.6,482704,3813,2552,123366,1959,
43              69564,114.2,502601,3931,2514,125368,1960,
44              69331,115.7,518173,4806,2572,127852,1961,
45              70551,116.9,554894,4007,2827,130081,1962
46          };
47      
48      protected final double[] swissData = new double[] {
49              80.2,17.0,15,12,9.96,
50              83.1,45.1,6,9,84.84,
51              92.5,39.7,5,5,93.40,
52              85.8,36.5,12,7,33.77,
53              76.9,43.5,17,15,5.16,
54              76.1,35.3,9,7,90.57,
55              83.8,70.2,16,7,92.85,
56              92.4,67.8,14,8,97.16,
57              82.4,53.3,12,7,97.67,
58              82.9,45.2,16,13,91.38,
59              87.1,64.5,14,6,98.61,
60              64.1,62.0,21,12,8.52,
61              66.9,67.5,14,7,2.27,
62              68.9,60.7,19,12,4.43,
63              61.7,69.3,22,5,2.82,
64              68.3,72.6,18,2,24.20,
65              71.7,34.0,17,8,3.30,
66              55.7,19.4,26,28,12.11,
67              54.3,15.2,31,20,2.15,
68              65.1,73.0,19,9,2.84,
69              65.5,59.8,22,10,5.23,
70              65.0,55.1,14,3,4.52,
71              56.6,50.9,22,12,15.14,
72              57.4,54.1,20,6,4.20,
73              72.5,71.2,12,1,2.40,
74              74.2,58.1,14,8,5.23,
75              72.0,63.5,6,3,2.56,
76              60.5,60.8,16,10,7.72,
77              58.3,26.8,25,19,18.46,
78              65.4,49.5,15,8,6.10,
79              75.5,85.9,3,2,99.71,
80              69.3,84.9,7,6,99.68,
81              77.3,89.7,5,2,100.00,
82              70.5,78.2,12,6,98.96,
83              79.4,64.9,7,3,98.22,
84              65.0,75.9,9,9,99.06,
85              92.2,84.6,3,3,99.46,
86              79.3,63.1,13,13,96.83,
87              70.4,38.4,26,12,5.62,
88              65.7,7.7,29,11,13.79,
89              72.7,16.7,22,13,11.22,
90              64.4,17.6,35,32,16.92,
91              77.6,37.6,15,7,4.97,
92              67.6,18.7,25,7,8.65,
93              35.0,1.2,37,53,42.34,
94              44.7,46.6,16,29,50.43,
95              42.8,27.7,22,29,58.33
96          };
97   
98      
99      /**
100      * Test Longley dataset against R.
101      */
102     public void testLongly() throws Exception {  
103         RealMatrix matrix = createRealMatrix(longleyData, 16, 7);
104         PearsonsCorrelation corrInstance = new PearsonsCorrelation(matrix); 
105         RealMatrix correlationMatrix = corrInstance.getCorrelationMatrix();
106         double[] rData = new double[] {
107                 1.000000000000000, 0.9708985250610560, 0.9835516111796693, 0.5024980838759942,
108                 0.4573073999764817, 0.960390571594376, 0.9713294591921188,
109                 0.970898525061056, 1.0000000000000000, 0.9915891780247822, 0.6206333925590966,
110                 0.4647441876006747, 0.979163432977498, 0.9911491900672053,
111                 0.983551611179669, 0.9915891780247822, 1.0000000000000000, 0.6042609398895580,
112                 0.4464367918926265, 0.991090069458478, 0.9952734837647849,
113                 0.502498083875994, 0.6206333925590966, 0.6042609398895580, 1.0000000000000000,
114                 -0.1774206295018783, 0.686551516365312, 0.6682566045621746,
115                 0.457307399976482, 0.4647441876006747, 0.4464367918926265, -0.1774206295018783,
116                 1.0000000000000000, 0.364416267189032, 0.4172451498349454,
117                 0.960390571594376, 0.9791634329774981, 0.9910900694584777, 0.6865515163653120,
118                 0.3644162671890320, 1.000000000000000, 0.9939528462329257,
119                 0.971329459192119, 0.9911491900672053, 0.9952734837647849, 0.6682566045621746,
120                 0.4172451498349454, 0.993952846232926, 1.0000000000000000
121         }; 
122         TestUtils.assertEquals("correlation matrix", createRealMatrix(rData, 7, 7), correlationMatrix, 10E-15);
123         
124         double[] rPvalues = new double[] {
125                 4.38904690369668e-10,
126                 8.36353208910623e-12, 7.8159700933611e-14,
127                 0.0472894097790304, 0.01030636128354301, 0.01316878049026582, 
128                 0.0749178049642416, 0.06971758330341182, 0.0830166169296545, 0.510948586323452,
129                 3.693245043123738e-09, 4.327782576751815e-11, 1.167954621905665e-13, 0.00331028281967516, 0.1652293725106684, 
130                 3.95834476307755e-10, 1.114663916723657e-13, 1.332267629550188e-15, 0.00466039138541463, 0.1078477071581498, 7.771561172376096e-15
131         };
132         RealMatrix rPMatrix = createLowerTriangularRealMatrix(rPvalues, 7);
133         fillUpper(rPMatrix, 0d);
134         TestUtils.assertEquals("correlation p values", rPMatrix, corrInstance.getCorrelationPValues(), 10E-15);
135     }
136     
137     /**
138      * Test R Swiss fertility dataset against R.
139      */
140     public void testSwissFertility() throws Exception {
141          RealMatrix matrix = createRealMatrix(swissData, 47, 5);
142          PearsonsCorrelation corrInstance = new PearsonsCorrelation(matrix); 
143          RealMatrix correlationMatrix = corrInstance.getCorrelationMatrix();
144          double[] rData = new double[] {
145                1.0000000000000000, 0.3530791836199747, -0.6458827064572875, -0.6637888570350691,  0.4636847006517939,
146                  0.3530791836199747, 1.0000000000000000,-0.6865422086171366, -0.6395225189483201, 0.4010950530487398,
147                 -0.6458827064572875, -0.6865422086171366, 1.0000000000000000, 0.6984152962884830, -0.5727418060641666,
148                 -0.6637888570350691, -0.6395225189483201, 0.6984152962884830, 1.0000000000000000, -0.1538589170909148,
149                  0.4636847006517939, 0.4010950530487398, -0.5727418060641666, -0.1538589170909148, 1.0000000000000000
150          };
151          TestUtils.assertEquals("correlation matrix", createRealMatrix(rData, 5, 5), correlationMatrix, 10E-15);
152          
153          double[] rPvalues = new double[] {
154                  0.01491720061472623,
155                  9.45043734069043e-07, 9.95151527133974e-08,
156                  3.658616965962355e-07, 1.304590105694471e-06, 4.811397236181847e-08,
157                  0.001028523190118147, 0.005204433539191644, 2.588307925380906e-05, 0.301807756132683
158          };
159          RealMatrix rPMatrix = createLowerTriangularRealMatrix(rPvalues, 5);
160          fillUpper(rPMatrix, 0d);
161          TestUtils.assertEquals("correlation p values", rPMatrix, corrInstance.getCorrelationPValues(), 10E-15);
162     }
163     
164     /**
165      * Constant column
166      */
167     public void testConstant() {
168         double[] noVariance = new double[] {1, 1, 1, 1};
169         double[] values = new double[] {1, 2, 3, 4};
170         assertTrue(Double.isNaN(new PearsonsCorrelation().correlation(noVariance, values)));
171     }
172     
173     
174     /**
175      * Insufficient data
176      */
177      
178     public void testInsufficientData() {
179         double[] one = new double[] {1};
180         double[] two = new double[] {2};
181         try {
182             new PearsonsCorrelation().correlation(one, two);
183             fail("Expecting IllegalArgumentException");
184         } catch (IllegalArgumentException ex) {
185             // Expected
186         }
187         RealMatrix matrix = new BlockRealMatrix(new double[][] {{0},{1}});
188         try {
189             new PearsonsCorrelation(matrix);
190             fail("Expecting IllegalArgumentException");
191         } catch (IllegalArgumentException ex) {
192             // Expected
193         }
194     }
195     
196     /**
197      * Verify that direct t-tests using standard error estimates are consistent
198      * with reported p-values
199      */
200     public void testStdErrorConsistency() throws Exception {
201         TDistribution tDistribution = new TDistributionImpl(45);
202         RealMatrix matrix = createRealMatrix(swissData, 47, 5);
203         PearsonsCorrelation corrInstance = new PearsonsCorrelation(matrix); 
204         RealMatrix rValues = corrInstance.getCorrelationMatrix();
205         RealMatrix pValues = corrInstance.getCorrelationPValues();
206         RealMatrix stdErrors = corrInstance.getCorrelationStandardErrors();
207         for (int i = 0; i < 5; i++) {
208             for (int j = 0; j < i; j++) {
209                 double t = Math.abs(rValues.getEntry(i, j)) / stdErrors.getEntry(i, j);
210                 double p = 2 * (1 - tDistribution.cumulativeProbability(t));
211                 assertEquals(p, pValues.getEntry(i, j), 10E-15);
212             }
213         }
214     }
215     
216     /**
217      * Verify that creating correlation from covariance gives same results as
218      * direct computation from the original matrix
219      */
220     public void testCovarianceConsistency() throws Exception {
221         RealMatrix matrix = createRealMatrix(longleyData, 16, 7);
222         PearsonsCorrelation corrInstance = new PearsonsCorrelation(matrix); 
223         Covariance covInstance = new Covariance(matrix);
224         PearsonsCorrelation corrFromCovInstance = new PearsonsCorrelation(covInstance);
225         TestUtils.assertEquals("correlation values", corrInstance.getCorrelationMatrix(),
226                 corrFromCovInstance.getCorrelationMatrix(), 10E-15);
227         TestUtils.assertEquals("p values", corrInstance.getCorrelationPValues(),
228                 corrFromCovInstance.getCorrelationPValues(), 10E-15);
229         TestUtils.assertEquals("standard errors", corrInstance.getCorrelationStandardErrors(),
230                 corrFromCovInstance.getCorrelationStandardErrors(), 10E-15);
231         
232         PearsonsCorrelation corrFromCovInstance2 = 
233             new PearsonsCorrelation(covInstance.getCovarianceMatrix(), 16);
234         TestUtils.assertEquals("correlation values", corrInstance.getCorrelationMatrix(),
235                 corrFromCovInstance2.getCorrelationMatrix(), 10E-15);
236         TestUtils.assertEquals("p values", corrInstance.getCorrelationPValues(),
237                 corrFromCovInstance2.getCorrelationPValues(), 10E-15);
238         TestUtils.assertEquals("standard errors", corrInstance.getCorrelationStandardErrors(),
239                 corrFromCovInstance2.getCorrelationStandardErrors(), 10E-15);
240     }
241     
242      
243     public void testConsistency() {
244         RealMatrix matrix = createRealMatrix(longleyData, 16, 7);
245         PearsonsCorrelation corrInstance = new PearsonsCorrelation(matrix); 
246         double[][] data = matrix.getData();
247         double[] x = matrix.getColumn(0);
248         double[] y = matrix.getColumn(1);
249         assertEquals(new PearsonsCorrelation().correlation(x, y), 
250                 corrInstance.getCorrelationMatrix().getEntry(0, 1), Double.MIN_VALUE);
251         TestUtils.assertEquals("Correlation matrix", corrInstance.getCorrelationMatrix(),
252                 new PearsonsCorrelation().computeCorrelationMatrix(data), Double.MIN_VALUE);
253     }
254     
255     protected RealMatrix createRealMatrix(double[] data, int nRows, int nCols) {
256         double[][] matrixData = new double[nRows][nCols];
257         int ptr = 0;
258         for (int i = 0; i < nRows; i++) {
259             System.arraycopy(data, ptr, matrixData[i], 0, nCols);
260             ptr += nCols;
261         }
262         return new BlockRealMatrix(matrixData); 
263     }
264     
265     protected RealMatrix createLowerTriangularRealMatrix(double[] data, int dimension) {
266         int ptr = 0;
267         RealMatrix result = new BlockRealMatrix(dimension, dimension);
268         for (int i = 1; i < dimension; i++) {
269             for (int j = 0; j < i; j++) {
270                 result.setEntry(i, j, data[ptr]);
271                 ptr++;
272             }
273         }
274         return result;
275     }
276     
277     protected void fillUpper(RealMatrix matrix, double diagonalValue) {
278         int dimension = matrix.getColumnDimension();
279         for (int i = 0; i < dimension; i++) {
280             matrix.setEntry(i, i, diagonalValue);
281             for (int j = i+1; j < dimension; j++) {
282                 matrix.setEntry(i, j, matrix.getEntry(j, i));
283             }
284         }  
285     }
286 }