001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    package org.apache.commons.math.stat.inference;
018    
019    import org.apache.commons.math.MathException;
020    import org.apache.commons.math.MathRuntimeException;
021    import org.apache.commons.math.distribution.ChiSquaredDistribution;
022    import org.apache.commons.math.distribution.ChiSquaredDistributionImpl;
023    
024    /**
025     * Implements Chi-Square test statistics defined in the
026     * {@link UnknownDistributionChiSquareTest} interface.
027     *
028     * @version $Revision: 775470 $ $Date: 2009-05-16 10:29:07 -0400 (Sat, 16 May 2009) $
029     */
030    public class ChiSquareTestImpl implements UnknownDistributionChiSquareTest {
031    
032        /** Distribution used to compute inference statistics. */
033        private ChiSquaredDistribution distribution;
034      
035        /**
036         * Construct a ChiSquareTestImpl 
037         */
038        public ChiSquareTestImpl() {
039            this(new ChiSquaredDistributionImpl(1.0));
040        }
041    
042        /**
043         * Create a test instance using the given distribution for computing
044         * inference statistics.
045         * @param x distribution used to compute inference statistics.
046         * @since 1.2
047         */
048        public ChiSquareTestImpl(ChiSquaredDistribution x) {
049            super();
050            setDistribution(x);
051        }
052         /**
053         * {@inheritDoc}
054         * <p><strong>Note: </strong>This implementation rescales the 
055         * <code>expected</code> array if necessary to ensure that the sum of the
056         * expected and observed counts are equal.</p>
057         * 
058         * @param observed array of observed frequency counts
059         * @param expected array of expected frequency counts
060         * @return chi-square test statistic
061         * @throws IllegalArgumentException if preconditions are not met
062         * or length is less than 2
063         */
064        public double chiSquare(double[] expected, long[] observed)
065            throws IllegalArgumentException {
066            if (expected.length < 2) {
067                throw MathRuntimeException.createIllegalArgumentException(
068                      "expected array length = {0}, must be at least 2",
069                      expected.length);
070            }
071            if (expected.length != observed.length) {
072                throw MathRuntimeException.createIllegalArgumentException(
073                      "dimension mismatch {0} != {1}", expected.length, observed.length);
074            }
075            checkPositive(expected);
076            checkNonNegative(observed);
077            double sumExpected = 0d;
078            double sumObserved = 0d;
079            for (int i = 0; i < observed.length; i++) {
080                sumExpected += expected[i];
081                sumObserved += observed[i];
082            }
083            double ratio = 1.0d;
084            boolean rescale = false;
085            if (Math.abs(sumExpected - sumObserved) > 10E-6) {
086                ratio = sumObserved / sumExpected;
087                rescale = true;
088            }
089            double sumSq = 0.0d;
090            double dev = 0.0d;
091            for (int i = 0; i < observed.length; i++) {
092                if (rescale) {
093                    dev = (observed[i] - ratio * expected[i]);
094                    sumSq += dev * dev / (ratio * expected[i]);
095                } else {
096                    dev = (observed[i] - expected[i]);
097                    sumSq += dev * dev / expected[i];
098                }
099            }
100            return sumSq;
101        }
102    
103        /**
104         * {@inheritDoc}
105         * <p><strong>Note: </strong>This implementation rescales the 
106         * <code>expected</code> array if necessary to ensure that the sum of the
107         * expected and observed counts are equal.</p>
108         * 
109         * @param observed array of observed frequency counts
110         * @param expected array of expected frequency counts
111         * @return p-value
112         * @throws IllegalArgumentException if preconditions are not met
113         * @throws MathException if an error occurs computing the p-value
114         */
115        public double chiSquareTest(double[] expected, long[] observed)
116            throws IllegalArgumentException, MathException {
117            distribution.setDegreesOfFreedom(expected.length - 1.0);
118            return 1.0 - distribution.cumulativeProbability(
119                chiSquare(expected, observed));
120        }
121    
122        /**
123         * {@inheritDoc}
124         * <p><strong>Note: </strong>This implementation rescales the 
125         * <code>expected</code> array if necessary to ensure that the sum of the
126         * expected and observed counts are equal.</p>
127         * 
128         * @param observed array of observed frequency counts
129         * @param expected array of expected frequency counts
130         * @param alpha significance level of the test
131         * @return true iff null hypothesis can be rejected with confidence
132         * 1 - alpha
133         * @throws IllegalArgumentException if preconditions are not met
134         * @throws MathException if an error occurs performing the test
135         */
136        public boolean chiSquareTest(double[] expected, long[] observed, 
137                double alpha) throws IllegalArgumentException, MathException {
138            if ((alpha <= 0) || (alpha > 0.5)) {
139                throw MathRuntimeException.createIllegalArgumentException(
140                      "out of bounds significance level {0}, must be between {1} and {2}",
141                      alpha, 0, 0.5);
142            }
143            return (chiSquareTest(expected, observed) < alpha);
144        }
145        
146        /**
147         * @param counts array representation of 2-way table
148         * @return chi-square test statistic
149         * @throws IllegalArgumentException if preconditions are not met
150         */
151        public double chiSquare(long[][] counts) throws IllegalArgumentException {
152            
153            checkArray(counts);
154            int nRows = counts.length;
155            int nCols = counts[0].length;
156            
157            // compute row, column and total sums
158            double[] rowSum = new double[nRows];
159            double[] colSum = new double[nCols];
160            double total = 0.0d;
161            for (int row = 0; row < nRows; row++) {
162                for (int col = 0; col < nCols; col++) {
163                    rowSum[row] += counts[row][col];
164                    colSum[col] += counts[row][col];
165                    total += counts[row][col];
166                }
167            }
168            
169            // compute expected counts and chi-square
170            double sumSq = 0.0d;
171            double expected = 0.0d;
172            for (int row = 0; row < nRows; row++) {
173                for (int col = 0; col < nCols; col++) {
174                    expected = (rowSum[row] * colSum[col]) / total;
175                    sumSq += ((counts[row][col] - expected) * 
176                            (counts[row][col] - expected)) / expected; 
177                }
178            } 
179            return sumSq;
180        }
181    
182        /**
183         * @param counts array representation of 2-way table
184         * @return p-value
185         * @throws IllegalArgumentException if preconditions are not met
186         * @throws MathException if an error occurs computing the p-value
187         */
188        public double chiSquareTest(long[][] counts)
189        throws IllegalArgumentException, MathException {
190            checkArray(counts);
191            double df = ((double) counts.length -1) * ((double) counts[0].length - 1);
192            distribution.setDegreesOfFreedom(df);
193            return 1 - distribution.cumulativeProbability(chiSquare(counts));
194        }
195    
196        /**
197         * @param counts array representation of 2-way table
198         * @param alpha significance level of the test
199         * @return true iff null hypothesis can be rejected with confidence
200         * 1 - alpha
201         * @throws IllegalArgumentException if preconditions are not met
202         * @throws MathException if an error occurs performing the test
203         */
204        public boolean chiSquareTest(long[][] counts, double alpha)
205        throws IllegalArgumentException, MathException {
206            if ((alpha <= 0) || (alpha > 0.5)) {
207                throw MathRuntimeException.createIllegalArgumentException(
208                      "out of bounds significance level {0}, must be between {1} and {2}",
209                      alpha, 0.0, 0.5);
210            }
211            return (chiSquareTest(counts) < alpha);
212        }
213        
214        /**
215         * @param observed1 array of observed frequency counts of the first data set
216         * @param observed2 array of observed frequency counts of the second data set
217         * @return chi-square test statistic
218         * @throws IllegalArgumentException if preconditions are not met
219         * @since 1.2
220         */
221        public double chiSquareDataSetsComparison(long[] observed1, long[] observed2)
222            throws IllegalArgumentException {
223            
224            // Make sure lengths are same
225            if (observed1.length < 2) {
226                throw MathRuntimeException.createIllegalArgumentException(
227                      "observed array length = {0}, must be at least 2",
228                      observed1.length);
229            }
230            if (observed1.length != observed2.length) {
231                throw MathRuntimeException.createIllegalArgumentException(
232                      "dimension mismatch {0} != {1}",
233                      observed1.length, observed2.length);
234            }
235    
236            // Ensure non-negative counts
237            checkNonNegative(observed1);
238            checkNonNegative(observed2);
239    
240            // Compute and compare count sums
241            long countSum1 = 0;
242            long countSum2 = 0;
243            boolean unequalCounts = false;
244            double weight = 0.0;
245            for (int i = 0; i < observed1.length; i++) {
246                countSum1 += observed1[i];
247                countSum2 += observed2[i];   
248            }
249            // Ensure neither sample is uniformly 0
250            if (countSum1 == 0) {
251                throw MathRuntimeException.createIllegalArgumentException(
252                      "observed counts are all 0 in first observed array"); 
253            }
254            if (countSum2 == 0) {
255                throw MathRuntimeException.createIllegalArgumentException(
256                      "observed counts are all 0 in second observed array"); 
257            }
258            // Compare and compute weight only if different
259            unequalCounts = (countSum1 != countSum2);
260            if (unequalCounts) {
261                weight = Math.sqrt((double) countSum1 / (double) countSum2);
262            }
263            // Compute ChiSquare statistic
264            double sumSq = 0.0d;
265            double dev = 0.0d;
266            double obs1 = 0.0d;
267            double obs2 = 0.0d;
268            for (int i = 0; i < observed1.length; i++) {
269                if (observed1[i] == 0 && observed2[i] == 0) {
270                    throw MathRuntimeException.createIllegalArgumentException(
271                          "observed counts are both zero for entry {0}", i);
272                } else {
273                    obs1 = observed1[i];
274                    obs2 = observed2[i];
275                    if (unequalCounts) { // apply weights
276                        dev = obs1/weight - obs2 * weight;
277                    } else {
278                        dev = obs1 - obs2;
279                    }
280                    sumSq += (dev * dev) / (obs1 + obs2);
281                }
282            }
283            return sumSq;
284        }
285    
286        /**
287         * @param observed1 array of observed frequency counts of the first data set
288         * @param observed2 array of observed frequency counts of the second data set
289         * @return p-value
290         * @throws IllegalArgumentException if preconditions are not met
291         * @throws MathException if an error occurs computing the p-value
292         * @since 1.2
293         */
294        public double chiSquareTestDataSetsComparison(long[] observed1, long[] observed2)
295            throws IllegalArgumentException, MathException {
296            distribution.setDegreesOfFreedom((double) observed1.length - 1);
297            return 1 - distribution.cumulativeProbability(
298                    chiSquareDataSetsComparison(observed1, observed2));
299        }
300    
301        /**
302         * @param observed1 array of observed frequency counts of the first data set
303         * @param observed2 array of observed frequency counts of the second data set
304         * @param alpha significance level of the test
305         * @return true iff null hypothesis can be rejected with confidence
306         * 1 - alpha
307         * @throws IllegalArgumentException if preconditions are not met
308         * @throws MathException if an error occurs performing the test
309         * @since 1.2
310         */
311        public boolean chiSquareTestDataSetsComparison(long[] observed1, long[] observed2,
312                double alpha) throws IllegalArgumentException, MathException {
313            if ((alpha <= 0) || (alpha > 0.5)) {
314                throw MathRuntimeException.createIllegalArgumentException(
315                      "out of bounds significance level {0}, must be between {1} and {2}",
316                      alpha, 0.0, 0.5);
317            }
318            return (chiSquareTestDataSetsComparison(observed1, observed2) < alpha);
319        }
320    
321        /**
322         * Checks to make sure that the input long[][] array is rectangular,
323         * has at least 2 rows and 2 columns, and has all non-negative entries,
324         * throwing IllegalArgumentException if any of these checks fail.
325         * 
326         * @param in input 2-way table to check
327         * @throws IllegalArgumentException if the array is not valid
328         */
329        private void checkArray(long[][] in) throws IllegalArgumentException {
330            
331            if (in.length < 2) {
332                throw MathRuntimeException.createIllegalArgumentException(
333                      "invalid row dimension: {0} (must be at least 2)",
334                      in.length);
335            }
336            
337            if (in[0].length < 2) {
338                throw MathRuntimeException.createIllegalArgumentException(
339                      "invalid column dimension: {0} (must be at least 2)",
340                      in[0].length);
341            }    
342            
343            checkRectangular(in);
344            checkNonNegative(in);
345            
346        }
347        
348        //---------------------  Private array methods -- should find a utility home for these
349        
350        /**
351         * Throws IllegalArgumentException if the input array is not rectangular.
352         * 
353         * @param in array to be tested
354         * @throws NullPointerException if input array is null
355         * @throws IllegalArgumentException if input array is not rectangular
356         */
357        private void checkRectangular(long[][] in) {
358            for (int i = 1; i < in.length; i++) {
359                if (in[i].length != in[0].length) {
360                    throw MathRuntimeException.createIllegalArgumentException(
361                          "some rows have length {0} while others have length {1}",
362                          in[i].length, in[0].length);
363                }
364            }  
365        }
366        
367        /**
368         * Check all entries of the input array are > 0.
369         * 
370         * @param in array to be tested
371         * @exception IllegalArgumentException if one entry is not positive
372         */
373        private void checkPositive(double[] in) throws IllegalArgumentException {
374            for (int i = 0; i < in.length; i++) {
375                if (in[i] <= 0) {
376                    throw MathRuntimeException.createIllegalArgumentException(
377                          "element {0} is not positive: {1}",
378                          i, in[i]);
379                }
380            }
381        }
382        
383        /**
384         * Check all entries of the input array are >= 0.
385         * 
386         * @param in array to be tested
387         * @exception IllegalArgumentException if one entry is negative
388         */
389        private void checkNonNegative(long[] in) throws IllegalArgumentException {
390            for (int i = 0; i < in.length; i++) {
391                if (in[i] < 0) {
392                    throw MathRuntimeException.createIllegalArgumentException(
393                          "element {0} is negative: {1}",
394                          i, in[i]);
395                }
396            }
397        }
398        
399        /**
400         * Check all entries of the input array are >= 0.
401         * 
402         * @param in array to be tested
403         * @exception IllegalArgumentException if one entry is negative
404         */
405        private void checkNonNegative(long[][] in) throws IllegalArgumentException {
406            for (int i = 0; i < in.length; i ++) {
407                for (int j = 0; j < in[i].length; j++) {
408                    if (in[i][j] < 0) {
409                        throw MathRuntimeException.createIllegalArgumentException(
410                              "element ({0}, {1}) is negative: {2}",
411                              i, j, in[i][j]);
412                    }
413                }
414            }
415        }
416     
417        /**
418         * Modify the distribution used to compute inference statistics.
419         * 
420         * @param value
421         *            the new distribution
422         * @since 1.2
423         */
424        public void setDistribution(ChiSquaredDistribution value) {
425            distribution = value;
426        }
427    }