001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    package org.apache.commons.math.stat.correlation;
018    
019    import org.apache.commons.math.TestUtils;
020    import org.apache.commons.math.distribution.TDistribution;
021    import org.apache.commons.math.distribution.TDistributionImpl;
022    import org.apache.commons.math.linear.RealMatrix;
023    import org.apache.commons.math.linear.BlockRealMatrix;
024    
025    import junit.framework.TestCase;
026    
027    public class PearsonsCorrelationTest extends TestCase {
028        
029        protected final double[] longleyData = new double[] {
030                60323,83.0,234289,2356,1590,107608,1947,
031                61122,88.5,259426,2325,1456,108632,1948,
032                60171,88.2,258054,3682,1616,109773,1949,
033                61187,89.5,284599,3351,1650,110929,1950,
034                63221,96.2,328975,2099,3099,112075,1951,
035                63639,98.1,346999,1932,3594,113270,1952,
036                64989,99.0,365385,1870,3547,115094,1953,
037                63761,100.0,363112,3578,3350,116219,1954,
038                66019,101.2,397469,2904,3048,117388,1955,
039                67857,104.6,419180,2822,2857,118734,1956,
040                68169,108.4,442769,2936,2798,120445,1957,
041                66513,110.8,444546,4681,2637,121950,1958,
042                68655,112.6,482704,3813,2552,123366,1959,
043                69564,114.2,502601,3931,2514,125368,1960,
044                69331,115.7,518173,4806,2572,127852,1961,
045                70551,116.9,554894,4007,2827,130081,1962
046            };
047        
048        protected final double[] swissData = new double[] {
049                80.2,17.0,15,12,9.96,
050                83.1,45.1,6,9,84.84,
051                92.5,39.7,5,5,93.40,
052                85.8,36.5,12,7,33.77,
053                76.9,43.5,17,15,5.16,
054                76.1,35.3,9,7,90.57,
055                83.8,70.2,16,7,92.85,
056                92.4,67.8,14,8,97.16,
057                82.4,53.3,12,7,97.67,
058                82.9,45.2,16,13,91.38,
059                87.1,64.5,14,6,98.61,
060                64.1,62.0,21,12,8.52,
061                66.9,67.5,14,7,2.27,
062                68.9,60.7,19,12,4.43,
063                61.7,69.3,22,5,2.82,
064                68.3,72.6,18,2,24.20,
065                71.7,34.0,17,8,3.30,
066                55.7,19.4,26,28,12.11,
067                54.3,15.2,31,20,2.15,
068                65.1,73.0,19,9,2.84,
069                65.5,59.8,22,10,5.23,
070                65.0,55.1,14,3,4.52,
071                56.6,50.9,22,12,15.14,
072                57.4,54.1,20,6,4.20,
073                72.5,71.2,12,1,2.40,
074                74.2,58.1,14,8,5.23,
075                72.0,63.5,6,3,2.56,
076                60.5,60.8,16,10,7.72,
077                58.3,26.8,25,19,18.46,
078                65.4,49.5,15,8,6.10,
079                75.5,85.9,3,2,99.71,
080                69.3,84.9,7,6,99.68,
081                77.3,89.7,5,2,100.00,
082                70.5,78.2,12,6,98.96,
083                79.4,64.9,7,3,98.22,
084                65.0,75.9,9,9,99.06,
085                92.2,84.6,3,3,99.46,
086                79.3,63.1,13,13,96.83,
087                70.4,38.4,26,12,5.62,
088                65.7,7.7,29,11,13.79,
089                72.7,16.7,22,13,11.22,
090                64.4,17.6,35,32,16.92,
091                77.6,37.6,15,7,4.97,
092                67.6,18.7,25,7,8.65,
093                35.0,1.2,37,53,42.34,
094                44.7,46.6,16,29,50.43,
095                42.8,27.7,22,29,58.33
096            };
097     
098        
099        /**
100         * Test Longley dataset against R.
101         */
102        public void testLongly() throws Exception {  
103            RealMatrix matrix = createRealMatrix(longleyData, 16, 7);
104            PearsonsCorrelation corrInstance = new PearsonsCorrelation(matrix); 
105            RealMatrix correlationMatrix = corrInstance.getCorrelationMatrix();
106            double[] rData = new double[] {
107                    1.000000000000000, 0.9708985250610560, 0.9835516111796693, 0.5024980838759942,
108                    0.4573073999764817, 0.960390571594376, 0.9713294591921188,
109                    0.970898525061056, 1.0000000000000000, 0.9915891780247822, 0.6206333925590966,
110                    0.4647441876006747, 0.979163432977498, 0.9911491900672053,
111                    0.983551611179669, 0.9915891780247822, 1.0000000000000000, 0.6042609398895580,
112                    0.4464367918926265, 0.991090069458478, 0.9952734837647849,
113                    0.502498083875994, 0.6206333925590966, 0.6042609398895580, 1.0000000000000000,
114                    -0.1774206295018783, 0.686551516365312, 0.6682566045621746,
115                    0.457307399976482, 0.4647441876006747, 0.4464367918926265, -0.1774206295018783,
116                    1.0000000000000000, 0.364416267189032, 0.4172451498349454,
117                    0.960390571594376, 0.9791634329774981, 0.9910900694584777, 0.6865515163653120,
118                    0.3644162671890320, 1.000000000000000, 0.9939528462329257,
119                    0.971329459192119, 0.9911491900672053, 0.9952734837647849, 0.6682566045621746,
120                    0.4172451498349454, 0.993952846232926, 1.0000000000000000
121            }; 
122            TestUtils.assertEquals("correlation matrix", createRealMatrix(rData, 7, 7), correlationMatrix, 10E-15);
123            
124            double[] rPvalues = new double[] {
125                    4.38904690369668e-10,
126                    8.36353208910623e-12, 7.8159700933611e-14,
127                    0.0472894097790304, 0.01030636128354301, 0.01316878049026582, 
128                    0.0749178049642416, 0.06971758330341182, 0.0830166169296545, 0.510948586323452,
129                    3.693245043123738e-09, 4.327782576751815e-11, 1.167954621905665e-13, 0.00331028281967516, 0.1652293725106684, 
130                    3.95834476307755e-10, 1.114663916723657e-13, 1.332267629550188e-15, 0.00466039138541463, 0.1078477071581498, 7.771561172376096e-15
131            };
132            RealMatrix rPMatrix = createLowerTriangularRealMatrix(rPvalues, 7);
133            fillUpper(rPMatrix, 0d);
134            TestUtils.assertEquals("correlation p values", rPMatrix, corrInstance.getCorrelationPValues(), 10E-15);
135        }
136        
137        /**
138         * Test R Swiss fertility dataset against R.
139         */
140        public void testSwissFertility() throws Exception {
141             RealMatrix matrix = createRealMatrix(swissData, 47, 5);
142             PearsonsCorrelation corrInstance = new PearsonsCorrelation(matrix); 
143             RealMatrix correlationMatrix = corrInstance.getCorrelationMatrix();
144             double[] rData = new double[] {
145                   1.0000000000000000, 0.3530791836199747, -0.6458827064572875, -0.6637888570350691,  0.4636847006517939,
146                     0.3530791836199747, 1.0000000000000000,-0.6865422086171366, -0.6395225189483201, 0.4010950530487398,
147                    -0.6458827064572875, -0.6865422086171366, 1.0000000000000000, 0.6984152962884830, -0.5727418060641666,
148                    -0.6637888570350691, -0.6395225189483201, 0.6984152962884830, 1.0000000000000000, -0.1538589170909148,
149                     0.4636847006517939, 0.4010950530487398, -0.5727418060641666, -0.1538589170909148, 1.0000000000000000
150             };
151             TestUtils.assertEquals("correlation matrix", createRealMatrix(rData, 5, 5), correlationMatrix, 10E-15);
152             
153             double[] rPvalues = new double[] {
154                     0.01491720061472623,
155                     9.45043734069043e-07, 9.95151527133974e-08,
156                     3.658616965962355e-07, 1.304590105694471e-06, 4.811397236181847e-08,
157                     0.001028523190118147, 0.005204433539191644, 2.588307925380906e-05, 0.301807756132683
158             };
159             RealMatrix rPMatrix = createLowerTriangularRealMatrix(rPvalues, 5);
160             fillUpper(rPMatrix, 0d);
161             TestUtils.assertEquals("correlation p values", rPMatrix, corrInstance.getCorrelationPValues(), 10E-15);
162        }
163        
164        /**
165         * Constant column
166         */
167        public void testConstant() {
168            double[] noVariance = new double[] {1, 1, 1, 1};
169            double[] values = new double[] {1, 2, 3, 4};
170            assertTrue(Double.isNaN(new PearsonsCorrelation().correlation(noVariance, values)));
171        }
172        
173        
174        /**
175         * Insufficient data
176         */
177         
178        public void testInsufficientData() {
179            double[] one = new double[] {1};
180            double[] two = new double[] {2};
181            try {
182                new PearsonsCorrelation().correlation(one, two);
183                fail("Expecting IllegalArgumentException");
184            } catch (IllegalArgumentException ex) {
185                // Expected
186            }
187            RealMatrix matrix = new BlockRealMatrix(new double[][] {{0},{1}});
188            try {
189                new PearsonsCorrelation(matrix);
190                fail("Expecting IllegalArgumentException");
191            } catch (IllegalArgumentException ex) {
192                // Expected
193            }
194        }
195        
196        /**
197         * Verify that direct t-tests using standard error estimates are consistent
198         * with reported p-values
199         */
200        public void testStdErrorConsistency() throws Exception {
201            TDistribution tDistribution = new TDistributionImpl(45);
202            RealMatrix matrix = createRealMatrix(swissData, 47, 5);
203            PearsonsCorrelation corrInstance = new PearsonsCorrelation(matrix); 
204            RealMatrix rValues = corrInstance.getCorrelationMatrix();
205            RealMatrix pValues = corrInstance.getCorrelationPValues();
206            RealMatrix stdErrors = corrInstance.getCorrelationStandardErrors();
207            for (int i = 0; i < 5; i++) {
208                for (int j = 0; j < i; j++) {
209                    double t = Math.abs(rValues.getEntry(i, j)) / stdErrors.getEntry(i, j);
210                    double p = 2 * (1 - tDistribution.cumulativeProbability(t));
211                    assertEquals(p, pValues.getEntry(i, j), 10E-15);
212                }
213            }
214        }
215        
216        /**
217         * Verify that creating correlation from covariance gives same results as
218         * direct computation from the original matrix
219         */
220        public void testCovarianceConsistency() throws Exception {
221            RealMatrix matrix = createRealMatrix(longleyData, 16, 7);
222            PearsonsCorrelation corrInstance = new PearsonsCorrelation(matrix); 
223            Covariance covInstance = new Covariance(matrix);
224            PearsonsCorrelation corrFromCovInstance = new PearsonsCorrelation(covInstance);
225            TestUtils.assertEquals("correlation values", corrInstance.getCorrelationMatrix(),
226                    corrFromCovInstance.getCorrelationMatrix(), 10E-15);
227            TestUtils.assertEquals("p values", corrInstance.getCorrelationPValues(),
228                    corrFromCovInstance.getCorrelationPValues(), 10E-15);
229            TestUtils.assertEquals("standard errors", corrInstance.getCorrelationStandardErrors(),
230                    corrFromCovInstance.getCorrelationStandardErrors(), 10E-15);
231            
232            PearsonsCorrelation corrFromCovInstance2 = 
233                new PearsonsCorrelation(covInstance.getCovarianceMatrix(), 16);
234            TestUtils.assertEquals("correlation values", corrInstance.getCorrelationMatrix(),
235                    corrFromCovInstance2.getCorrelationMatrix(), 10E-15);
236            TestUtils.assertEquals("p values", corrInstance.getCorrelationPValues(),
237                    corrFromCovInstance2.getCorrelationPValues(), 10E-15);
238            TestUtils.assertEquals("standard errors", corrInstance.getCorrelationStandardErrors(),
239                    corrFromCovInstance2.getCorrelationStandardErrors(), 10E-15);
240        }
241        
242         
243        public void testConsistency() {
244            RealMatrix matrix = createRealMatrix(longleyData, 16, 7);
245            PearsonsCorrelation corrInstance = new PearsonsCorrelation(matrix); 
246            double[][] data = matrix.getData();
247            double[] x = matrix.getColumn(0);
248            double[] y = matrix.getColumn(1);
249            assertEquals(new PearsonsCorrelation().correlation(x, y), 
250                    corrInstance.getCorrelationMatrix().getEntry(0, 1), Double.MIN_VALUE);
251            TestUtils.assertEquals("Correlation matrix", corrInstance.getCorrelationMatrix(),
252                    new PearsonsCorrelation().computeCorrelationMatrix(data), Double.MIN_VALUE);
253        }
254        
255        protected RealMatrix createRealMatrix(double[] data, int nRows, int nCols) {
256            double[][] matrixData = new double[nRows][nCols];
257            int ptr = 0;
258            for (int i = 0; i < nRows; i++) {
259                System.arraycopy(data, ptr, matrixData[i], 0, nCols);
260                ptr += nCols;
261            }
262            return new BlockRealMatrix(matrixData); 
263        }
264        
265        protected RealMatrix createLowerTriangularRealMatrix(double[] data, int dimension) {
266            int ptr = 0;
267            RealMatrix result = new BlockRealMatrix(dimension, dimension);
268            for (int i = 1; i < dimension; i++) {
269                for (int j = 0; j < i; j++) {
270                    result.setEntry(i, j, data[ptr]);
271                    ptr++;
272                }
273            }
274            return result;
275        }
276        
277        protected void fillUpper(RealMatrix matrix, double diagonalValue) {
278            int dimension = matrix.getColumnDimension();
279            for (int i = 0; i < dimension; i++) {
280                matrix.setEntry(i, i, diagonalValue);
281                for (int j = i+1; j < dimension; j++) {
282                    matrix.setEntry(i, j, matrix.getEntry(j, i));
283                }
284            }  
285        }
286    }