View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.math.stat.inference;
18  
19  import java.util.Collection;
20  
21  import org.apache.commons.math.MathException;
22  import org.apache.commons.math.MathRuntimeException;
23  import org.apache.commons.math.distribution.FDistribution;
24  import org.apache.commons.math.distribution.FDistributionImpl;
25  import org.apache.commons.math.stat.descriptive.summary.Sum;
26  import org.apache.commons.math.stat.descriptive.summary.SumOfSquares;
27  
28  
29  /**
30   * Implements one-way ANOVA statistics defined in the {@link OneWayAnovaImpl}
31   * interface.
32   * 
33   * <p>Uses the 
34   * {@link org.apache.commons.math.distribution.FDistribution
35   *  commons-math F Distribution implementation} to estimate exact p-values.</p>
36   *
37   * <p>This implementation is based on a description at 
38   * http://faculty.vassar.edu/lowry/ch13pt1.html</p>
39   * <pre>
40   * Abbreviations: bg = between groups,
41   *                wg = within groups,
42   *                ss = sum squared deviations
43   * </pre>
44   *
45   * @since 1.2
46   * @version $Revision: 773189 $ $Date: 2009-05-09 05:57:04 -0400 (Sat, 09 May 2009) $
47   */
48  public class OneWayAnovaImpl implements OneWayAnova  {
49  
50      /**
51       * Default constructor.
52       */
53      public OneWayAnovaImpl() {
54      }
55      
56      /**
57       * {@inheritDoc}<p>
58       * This implementation computes the F statistic using the definitional 
59       * formula<pre>
60       *   F = msbg/mswg</pre>
61       * where<pre>
62       *  msbg = between group mean square
63       *  mswg = within group mean square</pre>
64       * are as defined <a href="http://faculty.vassar.edu/lowry/ch13pt1.html">
65       * here</a></p>
66       */
67      public double anovaFValue(Collection<double[]> categoryData)
68          throws IllegalArgumentException, MathException {
69          AnovaStats a = anovaStats(categoryData);
70          return a.F;
71      }
72  
73      /**
74       * {@inheritDoc}<p>
75       * This implementation uses the
76       * {@link org.apache.commons.math.distribution.FDistribution
77       * commons-math F Distribution implementation} to estimate the exact
78       * p-value, using the formula<pre>
79       *   p = 1 - cumulativeProbability(F)</pre>
80       * where <code>F</code> is the F value and <code>cumulativeProbability</code>
81       * is the commons-math implementation of the F distribution.</p>
82       */
83      public double anovaPValue(Collection<double[]> categoryData)
84          throws IllegalArgumentException, MathException {
85          AnovaStats a = anovaStats(categoryData);
86          FDistribution fdist = new FDistributionImpl(a.dfbg, a.dfwg);
87          return 1.0 - fdist.cumulativeProbability(a.F);
88      }
89  
90      /**
91       * {@inheritDoc}<p>
92       * This implementation uses the
93       * {@link org.apache.commons.math.distribution.FDistribution
94       * commons-math F Distribution implementation} to estimate the exact
95       * p-value, using the formula<pre>
96       *   p = 1 - cumulativeProbability(F)</pre>
97       * where <code>F</code> is the F value and <code>cumulativeProbability</code>
98       * is the commons-math implementation of the F distribution.</p>
99       * <p>True is returned iff the estimated p-value is less than alpha.</p>
100      */
101     public boolean anovaTest(Collection<double[]> categoryData, double alpha)
102         throws IllegalArgumentException, MathException {
103         if ((alpha <= 0) || (alpha > 0.5)) {
104             throw MathRuntimeException.createIllegalArgumentException(
105                   "out of bounds significance level {0}, must be between {1} and {2}",
106                   alpha, 0, 0.5);
107         }
108         return (anovaPValue(categoryData) < alpha);
109     }
110 
111 
112     /**
113      * This method actually does the calculations (except P-value).
114      * 
115      * @param categoryData <code>Collection</code> of <code>double[]</code>
116      * arrays each containing data for one category
117      * @return computed AnovaStats
118      * @throws IllegalArgumentException if categoryData does not meet
119      * preconditions specified in the interface definition
120      * @throws MathException if an error occurs computing the Anova stats
121      */
122     private AnovaStats anovaStats(Collection<double[]> categoryData)
123         throws IllegalArgumentException, MathException {
124 
125         // check if we have enough categories
126         if (categoryData.size() < 2) {
127             throw MathRuntimeException.createIllegalArgumentException(
128                   "two or more categories required, got {0}",
129                   categoryData.size());
130         }
131         
132         // check if each category has enough data and all is double[]
133         for (double[] array : categoryData) {
134             if (array.length <= 1) {
135                 throw MathRuntimeException.createIllegalArgumentException(
136                       "two or more values required in each category, one has {0}",
137                       array.length);
138             }
139         }
140 
141         int dfwg = 0;
142         double sswg = 0;
143         Sum totsum = new Sum();
144         SumOfSquares totsumsq = new SumOfSquares();
145         int totnum = 0;
146         
147         for (double[] data : categoryData) {
148 
149             Sum sum = new Sum();
150             SumOfSquares sumsq = new SumOfSquares();
151             int num = 0;
152 
153             for (int i = 0; i < data.length; i++) {
154                 double val = data[i];
155 
156                 // within category
157                 num++;
158                 sum.increment(val);
159                 sumsq.increment(val);
160 
161                 // for all categories
162                 totnum++;
163                 totsum.increment(val);
164                 totsumsq.increment(val);
165             }
166             dfwg += num - 1;
167             double ss = sumsq.getResult() - sum.getResult() * sum.getResult() / num;
168             sswg += ss;
169         }
170         double sst = totsumsq.getResult() - totsum.getResult() * 
171             totsum.getResult()/totnum;
172         double ssbg = sst - sswg;
173         int dfbg = categoryData.size() - 1;
174         double msbg = ssbg/dfbg;
175         double mswg = sswg/dfwg;
176         double F = msbg/mswg;
177 
178         return new AnovaStats(dfbg, dfwg, F);
179     }
180 
181     /** 
182         Convenience class to pass dfbg,dfwg,F values around within AnovaImpl.
183         No get/set methods provided.
184     */
185     private static class AnovaStats {
186 
187         /** Degrees of freedom in numerator (between groups). */
188         private int dfbg;
189 
190         /** Degrees of freedom in denominator (within groups). */
191         private int dfwg;
192 
193         /** Statistic. */
194         private double F;
195 
196         /**
197          * Constructor
198          * @param dfbg degrees of freedom in numerator (between groups)
199          * @param dfwg degrees of freedom in denominator (within groups)
200          * @param F statistic
201          */
202         AnovaStats(int dfbg, int dfwg, double F) {
203             this.dfbg = dfbg;
204             this.dfwg = dfwg;
205             this.F = F;
206         }
207     }
208 
209 }