001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    package org.apache.commons.math.stat.inference;
018    
019    import org.apache.commons.math.MathException;
020    import org.apache.commons.math.MathRuntimeException;
021    import org.apache.commons.math.distribution.TDistribution;
022    import org.apache.commons.math.distribution.TDistributionImpl;
023    import org.apache.commons.math.stat.StatUtils;
024    import org.apache.commons.math.stat.descriptive.StatisticalSummary;
025    
026    /**
027     * Implements t-test statistics defined in the {@link TTest} interface.
028     * <p>
029     * Uses commons-math {@link org.apache.commons.math.distribution.TDistribution}
030     * implementation to estimate exact p-values.</p>
031     *
032     * @version $Revision: 773189 $ $Date: 2009-05-09 05:57:04 -0400 (Sat, 09 May 2009) $
033     */
034    public class TTestImpl implements TTest  {
035    
036        /** Distribution used to compute inference statistics. */
037        private TDistribution distribution;
038        
039        /**
040         * Default constructor.
041         */
042        public TTestImpl() {
043            this(new TDistributionImpl(1.0));
044        }
045        
046        /**
047         * Create a test instance using the given distribution for computing
048         * inference statistics.
049         * @param t distribution used to compute inference statistics.
050         * @since 1.2
051         */
052        public TTestImpl(TDistribution t) {
053            super();
054            setDistribution(t);
055        }
056        
057        /**
058         * Computes a paired, 2-sample t-statistic based on the data in the input 
059         * arrays.  The t-statistic returned is equivalent to what would be returned by
060         * computing the one-sample t-statistic {@link #t(double, double[])}, with
061         * <code>mu = 0</code> and the sample array consisting of the (signed) 
062         * differences between corresponding entries in <code>sample1</code> and 
063         * <code>sample2.</code>
064         * <p>
065         * <strong>Preconditions</strong>: <ul>
066         * <li>The input arrays must have the same length and their common length
067         * must be at least 2.
068         * </li></ul></p>
069         *
070         * @param sample1 array of sample data values
071         * @param sample2 array of sample data values
072         * @return t statistic
073         * @throws IllegalArgumentException if the precondition is not met
074         * @throws MathException if the statistic can not be computed do to a
075         *         convergence or other numerical error.
076         */
077        public double pairedT(double[] sample1, double[] sample2)
078            throws IllegalArgumentException, MathException {
079            checkSampleData(sample1);
080            checkSampleData(sample2);
081            double meanDifference = StatUtils.meanDifference(sample1, sample2);
082            return t(meanDifference, 0,  
083                    StatUtils.varianceDifference(sample1, sample2, meanDifference),
084                    sample1.length);
085        }
086    
087         /**
088         * Returns the <i>observed significance level</i>, or 
089         * <i> p-value</i>, associated with a paired, two-sample, two-tailed t-test 
090         * based on the data in the input arrays.
091         * <p>
092         * The number returned is the smallest significance level
093         * at which one can reject the null hypothesis that the mean of the paired
094         * differences is 0 in favor of the two-sided alternative that the mean paired 
095         * difference is not equal to 0. For a one-sided test, divide the returned 
096         * value by 2.</p>
097         * <p>
098         * This test is equivalent to a one-sample t-test computed using
099         * {@link #tTest(double, double[])} with <code>mu = 0</code> and the sample
100         * array consisting of the signed differences between corresponding elements of 
101         * <code>sample1</code> and <code>sample2.</code></p>
102         * <p>
103         * <strong>Usage Note:</strong><br>
104         * The validity of the p-value depends on the assumptions of the parametric
105         * t-test procedure, as discussed 
106         * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
107         * here</a></p>
108         * <p>
109         * <strong>Preconditions</strong>: <ul>
110         * <li>The input array lengths must be the same and their common length must
111         * be at least 2.
112         * </li></ul></p>
113         *
114         * @param sample1 array of sample data values
115         * @param sample2 array of sample data values
116         * @return p-value for t-test
117         * @throws IllegalArgumentException if the precondition is not met
118         * @throws MathException if an error occurs computing the p-value
119         */
120        public double pairedTTest(double[] sample1, double[] sample2)
121            throws IllegalArgumentException, MathException {
122            double meanDifference = StatUtils.meanDifference(sample1, sample2);
123            return tTest(meanDifference, 0, 
124                    StatUtils.varianceDifference(sample1, sample2, meanDifference), 
125                    sample1.length);
126        }
127    
128         /**
129         * Performs a paired t-test evaluating the null hypothesis that the 
130         * mean of the paired differences between <code>sample1</code> and
131         * <code>sample2</code> is 0 in favor of the two-sided alternative that the 
132         * mean paired difference is not equal to 0, with significance level 
133         * <code>alpha</code>.
134         * <p>
135         * Returns <code>true</code> iff the null hypothesis can be rejected with 
136         * confidence <code>1 - alpha</code>.  To perform a 1-sided test, use 
137         * <code>alpha * 2</code></p>
138         * <p>
139         * <strong>Usage Note:</strong><br>
140         * The validity of the test depends on the assumptions of the parametric
141         * t-test procedure, as discussed 
142         * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
143         * here</a></p>
144         * <p>
145         * <strong>Preconditions</strong>: <ul>
146         * <li>The input array lengths must be the same and their common length 
147         * must be at least 2.
148         * </li>
149         * <li> <code> 0 < alpha < 0.5 </code>
150         * </li></ul></p>
151         *
152         * @param sample1 array of sample data values
153         * @param sample2 array of sample data values
154         * @param alpha significance level of the test
155         * @return true if the null hypothesis can be rejected with 
156         * confidence 1 - alpha
157         * @throws IllegalArgumentException if the preconditions are not met
158         * @throws MathException if an error occurs performing the test
159         */
160        public boolean pairedTTest(double[] sample1, double[] sample2, double alpha)
161            throws IllegalArgumentException, MathException {
162            checkSignificanceLevel(alpha);
163            return (pairedTTest(sample1, sample2) < alpha);
164        }
165    
166        /**
167         * Computes a <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc22.htm#formula"> 
168         * t statistic </a> given observed values and a comparison constant.
169         * <p>
170         * This statistic can be used to perform a one sample t-test for the mean.
171         * </p><p>
172         * <strong>Preconditions</strong>: <ul>
173         * <li>The observed array length must be at least 2.
174         * </li></ul></p>
175         *
176         * @param mu comparison constant
177         * @param observed array of values
178         * @return t statistic
179         * @throws IllegalArgumentException if input array length is less than 2
180         */
181        public double t(double mu, double[] observed)
182        throws IllegalArgumentException {
183            checkSampleData(observed);
184            return t(StatUtils.mean(observed), mu, StatUtils.variance(observed),
185                    observed.length);
186        }
187    
188        /**
189         * Computes a <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc22.htm#formula">
190         * t statistic </a> to use in comparing the mean of the dataset described by 
191         * <code>sampleStats</code> to <code>mu</code>.
192         * <p>
193         * This statistic can be used to perform a one sample t-test for the mean.
194         * </p><p>
195         * <strong>Preconditions</strong>: <ul>
196         * <li><code>observed.getN() > = 2</code>.
197         * </li></ul></p>
198         *
199         * @param mu comparison constant
200         * @param sampleStats DescriptiveStatistics holding sample summary statitstics
201         * @return t statistic
202         * @throws IllegalArgumentException if the precondition is not met
203         */
204        public double t(double mu, StatisticalSummary sampleStats)
205        throws IllegalArgumentException {
206            checkSampleData(sampleStats);
207            return t(sampleStats.getMean(), mu, sampleStats.getVariance(),
208                    sampleStats.getN());
209        }
210    
211        /**
212         * Computes a 2-sample t statistic,  under the hypothesis of equal 
213         * subpopulation variances.  To compute a t-statistic without the
214         * equal variances hypothesis, use {@link #t(double[], double[])}.
215         * <p>
216         * This statistic can be used to perform a (homoscedastic) two-sample
217         * t-test to compare sample means.</p>
218         * <p>
219         * The t-statisitc is</p>
220         * <p>
221         * &nbsp;&nbsp;<code>  t = (m1 - m2) / (sqrt(1/n1 +1/n2) sqrt(var))</code>
222         * </p><p>
223         * where <strong><code>n1</code></strong> is the size of first sample; 
224         * <strong><code> n2</code></strong> is the size of second sample; 
225         * <strong><code> m1</code></strong> is the mean of first sample;  
226         * <strong><code> m2</code></strong> is the mean of second sample</li>
227         * </ul>
228         * and <strong><code>var</code></strong> is the pooled variance estimate:
229         * </p><p>
230         * <code>var = sqrt(((n1 - 1)var1 + (n2 - 1)var2) / ((n1-1) + (n2-1)))</code>
231         * </p><p> 
232         * with <strong><code>var1<code></strong> the variance of the first sample and
233         * <strong><code>var2</code></strong> the variance of the second sample.
234         * </p><p>
235         * <strong>Preconditions</strong>: <ul>
236         * <li>The observed array lengths must both be at least 2.
237         * </li></ul></p>
238         *
239         * @param sample1 array of sample data values
240         * @param sample2 array of sample data values
241         * @return t statistic
242         * @throws IllegalArgumentException if the precondition is not met
243         */
244        public double homoscedasticT(double[] sample1, double[] sample2)
245        throws IllegalArgumentException {
246            checkSampleData(sample1);
247            checkSampleData(sample2);
248            return homoscedasticT(StatUtils.mean(sample1), StatUtils.mean(sample2),
249                    StatUtils.variance(sample1), StatUtils.variance(sample2),
250                    sample1.length, sample2.length);
251        }
252        
253        /**
254         * Computes a 2-sample t statistic, without the hypothesis of equal
255         * subpopulation variances.  To compute a t-statistic assuming equal
256         * variances, use {@link #homoscedasticT(double[], double[])}.
257         * <p>
258         * This statistic can be used to perform a two-sample t-test to compare
259         * sample means.</p>
260         * <p>
261         * The t-statisitc is</p>
262         * <p>
263         * &nbsp;&nbsp; <code>  t = (m1 - m2) / sqrt(var1/n1 + var2/n2)</code>
264         * </p><p>
265         *  where <strong><code>n1</code></strong> is the size of the first sample
266         * <strong><code> n2</code></strong> is the size of the second sample; 
267         * <strong><code> m1</code></strong> is the mean of the first sample;  
268         * <strong><code> m2</code></strong> is the mean of the second sample;
269         * <strong><code> var1</code></strong> is the variance of the first sample;
270         * <strong><code> var2</code></strong> is the variance of the second sample;  
271         * </p><p>
272         * <strong>Preconditions</strong>: <ul>
273         * <li>The observed array lengths must both be at least 2.
274         * </li></ul></p>
275         *
276         * @param sample1 array of sample data values
277         * @param sample2 array of sample data values
278         * @return t statistic
279         * @throws IllegalArgumentException if the precondition is not met
280         */
281        public double t(double[] sample1, double[] sample2)
282        throws IllegalArgumentException {
283            checkSampleData(sample1);
284            checkSampleData(sample2);
285            return t(StatUtils.mean(sample1), StatUtils.mean(sample2),
286                    StatUtils.variance(sample1), StatUtils.variance(sample2),
287                    sample1.length, sample2.length);
288        }
289    
290        /**
291         * Computes a 2-sample t statistic </a>, comparing the means of the datasets
292         * described by two {@link StatisticalSummary} instances, without the
293         * assumption of equal subpopulation variances.  Use 
294         * {@link #homoscedasticT(StatisticalSummary, StatisticalSummary)} to
295         * compute a t-statistic under the equal variances assumption.
296         * <p>
297         * This statistic can be used to perform a two-sample t-test to compare
298         * sample means.</p>
299         * <p>
300          * The returned  t-statisitc is</p>
301         * <p>
302         * &nbsp;&nbsp; <code>  t = (m1 - m2) / sqrt(var1/n1 + var2/n2)</code>
303         * </p><p>
304         * where <strong><code>n1</code></strong> is the size of the first sample; 
305         * <strong><code> n2</code></strong> is the size of the second sample; 
306         * <strong><code> m1</code></strong> is the mean of the first sample;  
307         * <strong><code> m2</code></strong> is the mean of the second sample
308         * <strong><code> var1</code></strong> is the variance of the first sample;  
309         * <strong><code> var2</code></strong> is the variance of the second sample
310         * </p><p>
311         * <strong>Preconditions</strong>: <ul>
312         * <li>The datasets described by the two Univariates must each contain
313         * at least 2 observations.
314         * </li></ul></p>
315         *
316         * @param sampleStats1 StatisticalSummary describing data from the first sample
317         * @param sampleStats2 StatisticalSummary describing data from the second sample
318         * @return t statistic
319         * @throws IllegalArgumentException if the precondition is not met
320         */
321        public double t(StatisticalSummary sampleStats1, 
322                        StatisticalSummary sampleStats2)
323        throws IllegalArgumentException {
324            checkSampleData(sampleStats1);
325            checkSampleData(sampleStats2);
326            return t(sampleStats1.getMean(), sampleStats2.getMean(), 
327                    sampleStats1.getVariance(), sampleStats2.getVariance(),
328                    sampleStats1.getN(), sampleStats2.getN());
329        }
330        
331        /**
332         * Computes a 2-sample t statistic, comparing the means of the datasets
333         * described by two {@link StatisticalSummary} instances, under the
334         * assumption of equal subpopulation variances.  To compute a t-statistic
335         * without the equal variances assumption, use 
336         * {@link #t(StatisticalSummary, StatisticalSummary)}.
337         * <p>
338         * This statistic can be used to perform a (homoscedastic) two-sample
339         * t-test to compare sample means.</p>
340         * <p>
341         * The t-statisitc returned is</p>
342         * <p>
343         * &nbsp;&nbsp;<code>  t = (m1 - m2) / (sqrt(1/n1 +1/n2) sqrt(var))</code>
344         * </p><p>
345         * where <strong><code>n1</code></strong> is the size of first sample; 
346         * <strong><code> n2</code></strong> is the size of second sample; 
347         * <strong><code> m1</code></strong> is the mean of first sample;  
348         * <strong><code> m2</code></strong> is the mean of second sample
349         * and <strong><code>var</code></strong> is the pooled variance estimate:
350         * </p><p>
351         * <code>var = sqrt(((n1 - 1)var1 + (n2 - 1)var2) / ((n1-1) + (n2-1)))</code>
352         * <p> 
353         * with <strong><code>var1<code></strong> the variance of the first sample and
354         * <strong><code>var2</code></strong> the variance of the second sample.
355         * </p><p>
356         * <strong>Preconditions</strong>: <ul>
357         * <li>The datasets described by the two Univariates must each contain
358         * at least 2 observations.
359         * </li></ul></p>
360         *
361         * @param sampleStats1 StatisticalSummary describing data from the first sample
362         * @param sampleStats2 StatisticalSummary describing data from the second sample
363         * @return t statistic
364         * @throws IllegalArgumentException if the precondition is not met
365         */
366        public double homoscedasticT(StatisticalSummary sampleStats1, 
367                StatisticalSummary sampleStats2)
368        throws IllegalArgumentException {
369            checkSampleData(sampleStats1);
370            checkSampleData(sampleStats2);
371            return homoscedasticT(sampleStats1.getMean(), sampleStats2.getMean(), 
372                    sampleStats1.getVariance(), sampleStats2.getVariance(), 
373                    sampleStats1.getN(), sampleStats2.getN());
374        }
375    
376         /**
377         * Returns the <i>observed significance level</i>, or 
378         * <i>p-value</i>, associated with a one-sample, two-tailed t-test 
379         * comparing the mean of the input array with the constant <code>mu</code>.
380         * <p>
381         * The number returned is the smallest significance level
382         * at which one can reject the null hypothesis that the mean equals 
383         * <code>mu</code> in favor of the two-sided alternative that the mean
384         * is different from <code>mu</code>. For a one-sided test, divide the 
385         * returned value by 2.</p>
386         * <p>
387         * <strong>Usage Note:</strong><br>
388         * The validity of the test depends on the assumptions of the parametric
389         * t-test procedure, as discussed 
390         * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">here</a>
391         * </p><p>
392         * <strong>Preconditions</strong>: <ul>
393         * <li>The observed array length must be at least 2.
394         * </li></ul></p>
395         *
396         * @param mu constant value to compare sample mean against
397         * @param sample array of sample data values
398         * @return p-value
399         * @throws IllegalArgumentException if the precondition is not met
400         * @throws MathException if an error occurs computing the p-value
401         */
402        public double tTest(double mu, double[] sample)
403        throws IllegalArgumentException, MathException {
404            checkSampleData(sample);
405            return tTest( StatUtils.mean(sample), mu, StatUtils.variance(sample),
406                    sample.length);
407        }
408    
409        /**
410         * Performs a <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
411         * two-sided t-test</a> evaluating the null hypothesis that the mean of the population from
412         * which <code>sample</code> is drawn equals <code>mu</code>.
413         * <p>
414         * Returns <code>true</code> iff the null hypothesis can be 
415         * rejected with confidence <code>1 - alpha</code>.  To 
416         * perform a 1-sided test, use <code>alpha * 2</code>
417         * </p><p>
418         * <strong>Examples:</strong><br><ol>
419         * <li>To test the (2-sided) hypothesis <code>sample mean = mu </code> at
420         * the 95% level, use <br><code>tTest(mu, sample, 0.05) </code>
421         * </li>
422         * <li>To test the (one-sided) hypothesis <code> sample mean < mu </code>
423         * at the 99% level, first verify that the measured sample mean is less 
424         * than <code>mu</code> and then use 
425         * <br><code>tTest(mu, sample, 0.02) </code>
426         * </li></ol></p>
427         * <p>
428         * <strong>Usage Note:</strong><br>
429         * The validity of the test depends on the assumptions of the one-sample 
430         * parametric t-test procedure, as discussed 
431         * <a href="http://www.basic.nwu.edu/statguidefiles/sg_glos.html#one-sample">here</a>
432         * </p><p>
433         * <strong>Preconditions</strong>: <ul>
434         * <li>The observed array length must be at least 2.
435         * </li></ul></p>
436         *
437         * @param mu constant value to compare sample mean against
438         * @param sample array of sample data values
439         * @param alpha significance level of the test
440         * @return p-value
441         * @throws IllegalArgumentException if the precondition is not met
442         * @throws MathException if an error computing the p-value
443         */
444        public boolean tTest(double mu, double[] sample, double alpha)
445        throws IllegalArgumentException, MathException {
446            checkSignificanceLevel(alpha);
447            return (tTest(mu, sample) < alpha);
448        }
449    
450        /**
451         * Returns the <i>observed significance level</i>, or 
452         * <i>p-value</i>, associated with a one-sample, two-tailed t-test 
453         * comparing the mean of the dataset described by <code>sampleStats</code>
454         * with the constant <code>mu</code>.
455         * <p>
456         * The number returned is the smallest significance level
457         * at which one can reject the null hypothesis that the mean equals 
458         * <code>mu</code> in favor of the two-sided alternative that the mean
459         * is different from <code>mu</code>. For a one-sided test, divide the 
460         * returned value by 2.</p>
461         * <p>
462         * <strong>Usage Note:</strong><br>
463         * The validity of the test depends on the assumptions of the parametric
464         * t-test procedure, as discussed 
465         * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
466         * here</a></p>
467         * <p>
468         * <strong>Preconditions</strong>: <ul>
469         * <li>The sample must contain at least 2 observations.
470         * </li></ul></p>
471         *
472         * @param mu constant value to compare sample mean against
473         * @param sampleStats StatisticalSummary describing sample data
474         * @return p-value
475         * @throws IllegalArgumentException if the precondition is not met
476         * @throws MathException if an error occurs computing the p-value
477         */
478        public double tTest(double mu, StatisticalSummary sampleStats)
479        throws IllegalArgumentException, MathException {
480            checkSampleData(sampleStats);
481            return tTest(sampleStats.getMean(), mu, sampleStats.getVariance(),
482                    sampleStats.getN());
483        }
484    
485         /**
486         * Performs a <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
487         * two-sided t-test</a> evaluating the null hypothesis that the mean of the
488         * population from which the dataset described by <code>stats</code> is
489         * drawn equals <code>mu</code>.
490         * <p>
491         * Returns <code>true</code> iff the null hypothesis can be rejected with
492         * confidence <code>1 - alpha</code>.  To  perform a 1-sided test, use
493         * <code>alpha * 2.</code></p>
494         * <p>
495         * <strong>Examples:</strong><br><ol>
496         * <li>To test the (2-sided) hypothesis <code>sample mean = mu </code> at
497         * the 95% level, use <br><code>tTest(mu, sampleStats, 0.05) </code>
498         * </li>
499         * <li>To test the (one-sided) hypothesis <code> sample mean < mu </code>
500         * at the 99% level, first verify that the measured sample mean is less 
501         * than <code>mu</code> and then use 
502         * <br><code>tTest(mu, sampleStats, 0.02) </code>
503         * </li></ol></p>
504         * <p>
505         * <strong>Usage Note:</strong><br>
506         * The validity of the test depends on the assumptions of the one-sample 
507         * parametric t-test procedure, as discussed 
508         * <a href="http://www.basic.nwu.edu/statguidefiles/sg_glos.html#one-sample">here</a>
509         * </p><p>
510         * <strong>Preconditions</strong>: <ul>
511         * <li>The sample must include at least 2 observations.
512         * </li></ul></p>
513         *
514         * @param mu constant value to compare sample mean against
515         * @param sampleStats StatisticalSummary describing sample data values
516         * @param alpha significance level of the test
517         * @return p-value
518         * @throws IllegalArgumentException if the precondition is not met
519         * @throws MathException if an error occurs computing the p-value
520         */
521        public boolean tTest( double mu, StatisticalSummary sampleStats,
522                double alpha)
523        throws IllegalArgumentException, MathException {
524            checkSignificanceLevel(alpha);
525            return (tTest(mu, sampleStats) < alpha);
526        }
527    
528        /**
529         * Returns the <i>observed significance level</i>, or 
530         * <i>p-value</i>, associated with a two-sample, two-tailed t-test 
531         * comparing the means of the input arrays.
532         * <p>
533         * The number returned is the smallest significance level
534         * at which one can reject the null hypothesis that the two means are
535         * equal in favor of the two-sided alternative that they are different. 
536         * For a one-sided test, divide the returned value by 2.</p>
537         * <p>
538         * The test does not assume that the underlying popuation variances are
539         * equal  and it uses approximated degrees of freedom computed from the 
540         * sample data to compute the p-value.  The t-statistic used is as defined in
541         * {@link #t(double[], double[])} and the Welch-Satterthwaite approximation
542         * to the degrees of freedom is used, 
543         * as described 
544         * <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">
545         * here.</a>  To perform the test under the assumption of equal subpopulation
546         * variances, use {@link #homoscedasticTTest(double[], double[])}.</p>
547         * <p>
548         * <strong>Usage Note:</strong><br>
549         * The validity of the p-value depends on the assumptions of the parametric
550         * t-test procedure, as discussed 
551         * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
552         * here</a></p>
553         * <p>
554         * <strong>Preconditions</strong>: <ul>
555         * <li>The observed array lengths must both be at least 2.
556         * </li></ul></p>
557         *
558         * @param sample1 array of sample data values
559         * @param sample2 array of sample data values
560         * @return p-value for t-test
561         * @throws IllegalArgumentException if the precondition is not met
562         * @throws MathException if an error occurs computing the p-value
563         */
564        public double tTest(double[] sample1, double[] sample2)
565        throws IllegalArgumentException, MathException {
566            checkSampleData(sample1);
567            checkSampleData(sample2);
568            return tTest(StatUtils.mean(sample1), StatUtils.mean(sample2),
569                    StatUtils.variance(sample1), StatUtils.variance(sample2),
570                    sample1.length, sample2.length);
571        }
572        
573        /**
574         * Returns the <i>observed significance level</i>, or 
575         * <i>p-value</i>, associated with a two-sample, two-tailed t-test 
576         * comparing the means of the input arrays, under the assumption that
577         * the two samples are drawn from subpopulations with equal variances.
578         * To perform the test without the equal variances assumption, use
579         * {@link #tTest(double[], double[])}.
580         * <p>
581         * The number returned is the smallest significance level
582         * at which one can reject the null hypothesis that the two means are
583         * equal in favor of the two-sided alternative that they are different. 
584         * For a one-sided test, divide the returned value by 2.</p>
585         * <p>
586         * A pooled variance estimate is used to compute the t-statistic.  See
587         * {@link #homoscedasticT(double[], double[])}. The sum of the sample sizes
588         * minus 2 is used as the degrees of freedom.</p>
589         * <p>
590         * <strong>Usage Note:</strong><br>
591         * The validity of the p-value depends on the assumptions of the parametric
592         * t-test procedure, as discussed 
593         * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
594         * here</a></p>
595         * <p>
596         * <strong>Preconditions</strong>: <ul>
597         * <li>The observed array lengths must both be at least 2.
598         * </li></ul></p>
599         *
600         * @param sample1 array of sample data values
601         * @param sample2 array of sample data values
602         * @return p-value for t-test
603         * @throws IllegalArgumentException if the precondition is not met
604         * @throws MathException if an error occurs computing the p-value
605         */
606        public double homoscedasticTTest(double[] sample1, double[] sample2)
607        throws IllegalArgumentException, MathException {
608            checkSampleData(sample1);
609            checkSampleData(sample2);
610            return homoscedasticTTest(StatUtils.mean(sample1), 
611                    StatUtils.mean(sample2), StatUtils.variance(sample1),
612                    StatUtils.variance(sample2), sample1.length, 
613                    sample2.length);
614        }
615        
616    
617         /**
618         * Performs a 
619         * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
620         * two-sided t-test</a> evaluating the null hypothesis that <code>sample1</code> 
621         * and <code>sample2</code> are drawn from populations with the same mean, 
622         * with significance level <code>alpha</code>.  This test does not assume
623         * that the subpopulation variances are equal.  To perform the test assuming
624         * equal variances, use 
625         * {@link #homoscedasticTTest(double[], double[], double)}.
626         * <p>
627         * Returns <code>true</code> iff the null hypothesis that the means are
628         * equal can be rejected with confidence <code>1 - alpha</code>.  To 
629         * perform a 1-sided test, use <code>alpha / 2</code></p>
630         * <p>
631         * See {@link #t(double[], double[])} for the formula used to compute the
632         * t-statistic.  Degrees of freedom are approximated using the
633         * <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">
634         * Welch-Satterthwaite approximation.</a></p>
635          
636         * <p>
637         * <strong>Examples:</strong><br><ol>
638         * <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at
639         * the 95% level,  use 
640         * <br><code>tTest(sample1, sample2, 0.05). </code>
641         * </li>
642         * <li>To test the (one-sided) hypothesis <code> mean 1 < mean 2 </code> at
643         * the 99% level, first verify that the measured  mean of <code>sample 1</code>
644         * is less than the mean of <code>sample 2</code> and then use 
645         * <br><code>tTest(sample1, sample2, 0.02) </code>
646         * </li></ol></p>
647         * <p>
648         * <strong>Usage Note:</strong><br>
649         * The validity of the test depends on the assumptions of the parametric
650         * t-test procedure, as discussed 
651         * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
652         * here</a></p>
653         * <p>
654         * <strong>Preconditions</strong>: <ul>
655         * <li>The observed array lengths must both be at least 2.
656         * </li>
657         * <li> <code> 0 < alpha < 0.5 </code>
658         * </li></ul></p>
659         *
660         * @param sample1 array of sample data values
661         * @param sample2 array of sample data values
662         * @param alpha significance level of the test
663         * @return true if the null hypothesis can be rejected with 
664         * confidence 1 - alpha
665         * @throws IllegalArgumentException if the preconditions are not met
666         * @throws MathException if an error occurs performing the test
667         */
668        public boolean tTest(double[] sample1, double[] sample2,
669                double alpha)
670        throws IllegalArgumentException, MathException {
671            checkSignificanceLevel(alpha);
672            return (tTest(sample1, sample2) < alpha);
673        }
674        
675        /**
676         * Performs a 
677         * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
678         * two-sided t-test</a> evaluating the null hypothesis that <code>sample1</code> 
679         * and <code>sample2</code> are drawn from populations with the same mean, 
680         * with significance level <code>alpha</code>,  assuming that the
681         * subpopulation variances are equal.  Use 
682         * {@link #tTest(double[], double[], double)} to perform the test without
683         * the assumption of equal variances.
684         * <p>
685         * Returns <code>true</code> iff the null hypothesis that the means are
686         * equal can be rejected with confidence <code>1 - alpha</code>.  To 
687         * perform a 1-sided test, use <code>alpha * 2.</code>  To perform the test
688         * without the assumption of equal subpopulation variances, use 
689         * {@link #tTest(double[], double[], double)}.</p>
690         * <p>
691         * A pooled variance estimate is used to compute the t-statistic. See
692         * {@link #t(double[], double[])} for the formula. The sum of the sample
693         * sizes minus 2 is used as the degrees of freedom.</p>
694         * <p>
695         * <strong>Examples:</strong><br><ol>
696         * <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at
697         * the 95% level, use <br><code>tTest(sample1, sample2, 0.05). </code>
698         * </li>
699         * <li>To test the (one-sided) hypothesis <code> mean 1 < mean 2, </code>
700         * at the 99% level, first verify that the measured mean of 
701         * <code>sample 1</code> is less than the mean of <code>sample 2</code>
702         * and then use
703         * <br><code>tTest(sample1, sample2, 0.02) </code>
704         * </li></ol></p>
705         * <p>
706         * <strong>Usage Note:</strong><br>
707         * The validity of the test depends on the assumptions of the parametric
708         * t-test procedure, as discussed 
709         * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
710         * here</a></p>
711         * <p>
712         * <strong>Preconditions</strong>: <ul>
713         * <li>The observed array lengths must both be at least 2.
714         * </li>
715         * <li> <code> 0 < alpha < 0.5 </code>
716         * </li></ul></p>
717         *
718         * @param sample1 array of sample data values
719         * @param sample2 array of sample data values
720         * @param alpha significance level of the test
721         * @return true if the null hypothesis can be rejected with 
722         * confidence 1 - alpha
723         * @throws IllegalArgumentException if the preconditions are not met
724         * @throws MathException if an error occurs performing the test
725         */
726        public boolean homoscedasticTTest(double[] sample1, double[] sample2,
727                double alpha)
728        throws IllegalArgumentException, MathException {
729            checkSignificanceLevel(alpha);
730            return (homoscedasticTTest(sample1, sample2) < alpha);
731        }
732    
733         /**
734         * Returns the <i>observed significance level</i>, or 
735         * <i>p-value</i>, associated with a two-sample, two-tailed t-test 
736         * comparing the means of the datasets described by two StatisticalSummary
737         * instances.
738         * <p>
739         * The number returned is the smallest significance level
740         * at which one can reject the null hypothesis that the two means are
741         * equal in favor of the two-sided alternative that they are different. 
742         * For a one-sided test, divide the returned value by 2.</p>
743         * <p>
744         * The test does not assume that the underlying popuation variances are
745         * equal  and it uses approximated degrees of freedom computed from the 
746         * sample data to compute the p-value.   To perform the test assuming
747         * equal variances, use 
748         * {@link #homoscedasticTTest(StatisticalSummary, StatisticalSummary)}.</p>
749         * <p>
750         * <strong>Usage Note:</strong><br>
751         * The validity of the p-value depends on the assumptions of the parametric
752         * t-test procedure, as discussed 
753         * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
754         * here</a></p>
755         * <p>
756         * <strong>Preconditions</strong>: <ul>
757         * <li>The datasets described by the two Univariates must each contain
758         * at least 2 observations.
759         * </li></ul></p>
760         *
761         * @param sampleStats1  StatisticalSummary describing data from the first sample
762         * @param sampleStats2  StatisticalSummary describing data from the second sample
763         * @return p-value for t-test
764         * @throws IllegalArgumentException if the precondition is not met
765         * @throws MathException if an error occurs computing the p-value
766         */
767        public double tTest(StatisticalSummary sampleStats1, StatisticalSummary sampleStats2)
768        throws IllegalArgumentException, MathException {
769            checkSampleData(sampleStats1);
770            checkSampleData(sampleStats2);
771            return tTest(sampleStats1.getMean(), sampleStats2.getMean(), sampleStats1.getVariance(),
772                    sampleStats2.getVariance(), sampleStats1.getN(), 
773                    sampleStats2.getN());
774        }
775        
776        /**
777         * Returns the <i>observed significance level</i>, or 
778         * <i>p-value</i>, associated with a two-sample, two-tailed t-test 
779         * comparing the means of the datasets described by two StatisticalSummary
780         * instances, under the hypothesis of equal subpopulation variances. To
781         * perform a test without the equal variances assumption, use
782         * {@link #tTest(StatisticalSummary, StatisticalSummary)}.
783         * <p>
784         * The number returned is the smallest significance level
785         * at which one can reject the null hypothesis that the two means are
786         * equal in favor of the two-sided alternative that they are different. 
787         * For a one-sided test, divide the returned value by 2.</p>
788         * <p>
789         * See {@link #homoscedasticT(double[], double[])} for the formula used to
790         * compute the t-statistic. The sum of the  sample sizes minus 2 is used as
791         * the degrees of freedom.</p>
792         * <p>
793         * <strong>Usage Note:</strong><br>
794         * The validity of the p-value depends on the assumptions of the parametric
795         * t-test procedure, as discussed 
796         * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">here</a>
797         * </p><p>
798         * <strong>Preconditions</strong>: <ul>
799         * <li>The datasets described by the two Univariates must each contain
800         * at least 2 observations.
801         * </li></ul></p>
802         *
803         * @param sampleStats1  StatisticalSummary describing data from the first sample
804         * @param sampleStats2  StatisticalSummary describing data from the second sample
805         * @return p-value for t-test
806         * @throws IllegalArgumentException if the precondition is not met
807         * @throws MathException if an error occurs computing the p-value
808         */
809        public double homoscedasticTTest(StatisticalSummary sampleStats1, 
810                                         StatisticalSummary sampleStats2)
811        throws IllegalArgumentException, MathException {
812            checkSampleData(sampleStats1);
813            checkSampleData(sampleStats2);
814            return homoscedasticTTest(sampleStats1.getMean(),
815                    sampleStats2.getMean(), sampleStats1.getVariance(),
816                    sampleStats2.getVariance(), sampleStats1.getN(), 
817                    sampleStats2.getN());
818        }
819    
820        /**
821         * Performs a 
822         * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
823         * two-sided t-test</a> evaluating the null hypothesis that 
824         * <code>sampleStats1</code> and <code>sampleStats2</code> describe
825         * datasets drawn from populations with the same mean, with significance
826         * level <code>alpha</code>.   This test does not assume that the
827         * subpopulation variances are equal.  To perform the test under the equal
828         * variances assumption, use
829         * {@link #homoscedasticTTest(StatisticalSummary, StatisticalSummary)}.
830         * <p>
831         * Returns <code>true</code> iff the null hypothesis that the means are
832         * equal can be rejected with confidence <code>1 - alpha</code>.  To 
833         * perform a 1-sided test, use <code>alpha * 2</code></p>
834         * <p>
835         * See {@link #t(double[], double[])} for the formula used to compute the
836         * t-statistic.  Degrees of freedom are approximated using the
837         * <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">
838         * Welch-Satterthwaite approximation.</a></p>
839         * <p>
840         * <strong>Examples:</strong><br><ol>
841         * <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at
842         * the 95%, use 
843         * <br><code>tTest(sampleStats1, sampleStats2, 0.05) </code>
844         * </li>
845         * <li>To test the (one-sided) hypothesis <code> mean 1 < mean 2 </code>
846         * at the 99% level,  first verify that the measured mean of  
847         * <code>sample 1</code> is less than  the mean of <code>sample 2</code>
848         * and then use 
849         * <br><code>tTest(sampleStats1, sampleStats2, 0.02) </code>
850         * </li></ol></p>
851         * <p>
852         * <strong>Usage Note:</strong><br>
853         * The validity of the test depends on the assumptions of the parametric
854         * t-test procedure, as discussed 
855         * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
856         * here</a></p>
857         * <p>
858         * <strong>Preconditions</strong>: <ul>
859         * <li>The datasets described by the two Univariates must each contain
860         * at least 2 observations.
861         * </li>
862         * <li> <code> 0 < alpha < 0.5 </code>
863         * </li></ul></p>
864         *
865         * @param sampleStats1 StatisticalSummary describing sample data values
866         * @param sampleStats2 StatisticalSummary describing sample data values
867         * @param alpha significance level of the test
868         * @return true if the null hypothesis can be rejected with 
869         * confidence 1 - alpha
870         * @throws IllegalArgumentException if the preconditions are not met
871         * @throws MathException if an error occurs performing the test
872         */
873        public boolean tTest(StatisticalSummary sampleStats1,
874                StatisticalSummary sampleStats2, double alpha)
875        throws IllegalArgumentException, MathException {
876            checkSignificanceLevel(alpha);
877            return (tTest(sampleStats1, sampleStats2) < alpha);
878        }
879        
880        //----------------------------------------------- Protected methods 
881    
882        /**
883         * Computes approximate degrees of freedom for 2-sample t-test.
884         * 
885         * @param v1 first sample variance
886         * @param v2 second sample variance
887         * @param n1 first sample n
888         * @param n2 second sample n
889         * @return approximate degrees of freedom
890         */
891        protected double df(double v1, double v2, double n1, double n2) {
892            return (((v1 / n1) + (v2 / n2)) * ((v1 / n1) + (v2 / n2))) /
893            ((v1 * v1) / (n1 * n1 * (n1 - 1d)) + (v2 * v2) /
894                    (n2 * n2 * (n2 - 1d)));
895        }
896    
897        /**
898         * Computes t test statistic for 1-sample t-test.
899         * 
900         * @param m sample mean
901         * @param mu constant to test against
902         * @param v sample variance
903         * @param n sample n
904         * @return t test statistic
905         */
906        protected double t(double m, double mu, double v, double n) {
907            return (m - mu) / Math.sqrt(v / n);
908        }
909        
910        /**
911         * Computes t test statistic for 2-sample t-test.
912         * <p>
913         * Does not assume that subpopulation variances are equal.</p>
914         * 
915         * @param m1 first sample mean
916         * @param m2 second sample mean
917         * @param v1 first sample variance
918         * @param v2 second sample variance
919         * @param n1 first sample n
920         * @param n2 second sample n
921         * @return t test statistic
922         */
923        protected double t(double m1, double m2,  double v1, double v2, double n1,
924                double n2)  {
925                return (m1 - m2) / Math.sqrt((v1 / n1) + (v2 / n2));
926        }
927        
928        /**
929         * Computes t test statistic for 2-sample t-test under the hypothesis
930         * of equal subpopulation variances.
931         * 
932         * @param m1 first sample mean
933         * @param m2 second sample mean
934         * @param v1 first sample variance
935         * @param v2 second sample variance
936         * @param n1 first sample n
937         * @param n2 second sample n
938         * @return t test statistic
939         */
940        protected double homoscedasticT(double m1, double m2,  double v1,
941                double v2, double n1, double n2)  {
942                double pooledVariance = ((n1  - 1) * v1 + (n2 -1) * v2 ) / (n1 + n2 - 2); 
943                return (m1 - m2) / Math.sqrt(pooledVariance * (1d / n1 + 1d / n2));
944        }
945        
946        /**
947         * Computes p-value for 2-sided, 1-sample t-test.
948         * 
949         * @param m sample mean
950         * @param mu constant to test against
951         * @param v sample variance
952         * @param n sample n
953         * @return p-value
954         * @throws MathException if an error occurs computing the p-value
955         */
956        protected double tTest(double m, double mu, double v, double n)
957        throws MathException {
958            double t = Math.abs(t(m, mu, v, n));
959            distribution.setDegreesOfFreedom(n - 1);
960            return 2.0 * distribution.cumulativeProbability(-t);
961        }
962    
963        /**
964         * Computes p-value for 2-sided, 2-sample t-test.
965         * <p>
966         * Does not assume subpopulation variances are equal. Degrees of freedom
967         * are estimated from the data.</p>
968         * 
969         * @param m1 first sample mean
970         * @param m2 second sample mean
971         * @param v1 first sample variance
972         * @param v2 second sample variance
973         * @param n1 first sample n
974         * @param n2 second sample n
975         * @return p-value
976         * @throws MathException if an error occurs computing the p-value
977         */
978        protected double tTest(double m1, double m2, double v1, double v2, 
979                double n1, double n2)
980        throws MathException {
981            double t = Math.abs(t(m1, m2, v1, v2, n1, n2));
982            double degreesOfFreedom = 0;
983            degreesOfFreedom = df(v1, v2, n1, n2);
984            distribution.setDegreesOfFreedom(degreesOfFreedom);
985            return 2.0 * distribution.cumulativeProbability(-t);
986        }
987        
988        /**
989         * Computes p-value for 2-sided, 2-sample t-test, under the assumption
990         * of equal subpopulation variances.
991         * <p>
992         * The sum of the sample sizes minus 2 is used as degrees of freedom.</p>
993         * 
994         * @param m1 first sample mean
995         * @param m2 second sample mean
996         * @param v1 first sample variance
997         * @param v2 second sample variance
998         * @param n1 first sample n
999         * @param n2 second sample n
1000         * @return p-value
1001         * @throws MathException if an error occurs computing the p-value
1002         */
1003        protected double homoscedasticTTest(double m1, double m2, double v1,
1004                double v2, double n1, double n2)
1005        throws MathException {
1006            double t = Math.abs(homoscedasticT(m1, m2, v1, v2, n1, n2));
1007            double degreesOfFreedom = n1 + n2 - 2;
1008            distribution.setDegreesOfFreedom(degreesOfFreedom);
1009            return 2.0 * distribution.cumulativeProbability(-t);
1010        }
1011        
1012        /**
1013         * Modify the distribution used to compute inference statistics.
1014         * @param value the new distribution
1015         * @since 1.2
1016         */
1017        public void setDistribution(TDistribution value) {
1018            distribution = value;
1019        }
1020    
1021        /** Check significance level.
1022         * @param alpha significance level
1023         * @exception IllegalArgumentException if significance level is out of bounds
1024         */
1025        private void checkSignificanceLevel(final double alpha)
1026            throws IllegalArgumentException {
1027            if ((alpha <= 0) || (alpha > 0.5)) {
1028                throw MathRuntimeException.createIllegalArgumentException(
1029                      "out of bounds significance level {0}, must be between {1} and {2}",
1030                      alpha, 0.0, 0.5);
1031            }
1032        }
1033    
1034        /** Check sample data.
1035         * @param data sample data
1036         * @exception IllegalArgumentException if there is not enough sample data
1037         */
1038        private void checkSampleData(final double[] data)
1039            throws IllegalArgumentException {
1040            if ((data == null) || (data.length < 2)) {
1041                throw MathRuntimeException.createIllegalArgumentException(
1042                      "insufficient data for t statistic, needs at least 2, got {0}",
1043                      (data == null) ? 0 : data.length);
1044            }
1045        }
1046    
1047        /** Check sample data.
1048         * @param stat statistical summary
1049         * @exception IllegalArgumentException if there is not enough sample data
1050         */
1051        private void checkSampleData(final StatisticalSummary stat)
1052            throws IllegalArgumentException {
1053            if ((stat == null) || (stat.getN() < 2)) {
1054                throw MathRuntimeException.createIllegalArgumentException(
1055                      "insufficient data for t statistic, needs at least 2, got {0}",
1056                      (stat == null) ? 0 : stat.getN());
1057            }
1058        }
1059    
1060    }