001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     * 
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     * 
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    
018    package org.apache.commons.math.stat.descriptive;
019    
020    import junit.framework.Test;
021    import junit.framework.TestCase;
022    import junit.framework.TestSuite;
023    
024    import java.util.Collection;
025    import java.util.ArrayList;
026    
027    import org.apache.commons.math.random.RandomData;
028    import org.apache.commons.math.random.RandomDataImpl;
029    import org.apache.commons.math.TestUtils;
030    
031    
032    /**
033     * Test cases for {@link AggregateSummaryStatistics}
034     *
035     */
036    public class AggregateSummaryStatisticsTest extends TestCase {
037        
038        /**
039         * Creates and returns a {@code Test} representing all the test cases in this
040         * class
041         *
042         * @return a {@code Test} representing all the test cases in this class
043         */
044        public static Test suite() {
045            TestSuite suite = new TestSuite(AggregateSummaryStatisticsTest.class);
046            suite.setName("AggregateSummaryStatistics tests");
047            return suite;
048        }
049        
050        /**
051         * Tests the standard aggregation behavior
052         */
053        public void testAggregation() {
054            AggregateSummaryStatistics aggregate = new AggregateSummaryStatistics();
055            SummaryStatistics setOneStats = aggregate.createContributingStatistics();
056            SummaryStatistics setTwoStats = aggregate.createContributingStatistics();
057            
058            assertNotNull("The set one contributing stats are null", setOneStats);
059            assertNotNull("The set two contributing stats are null", setTwoStats);
060            assertNotSame("Contributing stats objects are the same", setOneStats, setTwoStats);
061            
062            setOneStats.addValue(2);
063            setOneStats.addValue(3);
064            setOneStats.addValue(5);
065            setOneStats.addValue(7);
066            setOneStats.addValue(11);
067            assertEquals("Wrong number of set one values", 5, setOneStats.getN());
068            assertEquals("Wrong sum of set one values", 28.0, setOneStats.getSum());
069            
070            setTwoStats.addValue(2);
071            setTwoStats.addValue(4);
072            setTwoStats.addValue(8);
073            assertEquals("Wrong number of set two values", 3, setTwoStats.getN());
074            assertEquals("Wrong sum of set two values", 14.0, setTwoStats.getSum());
075            
076            assertEquals("Wrong number of aggregate values", 8, aggregate.getN());
077            assertEquals("Wrong aggregate sum", 42.0, aggregate.getSum());
078        }
079        
080        /**
081         * Verify that aggregating over a partition gives the same results
082         * as direct computation.
083         * 
084         *  1) Randomly generate a dataset of 10-100 values
085         *     from [-100, 100]
086         *  2) Divide the dataset it into 2-5 partitions
087         *  3) Create an AggregateSummaryStatistic and ContributingStatistics
088         *     for each partition 
089         *  4) Compare results from the AggregateSummaryStatistic with values
090         *     returned by a single SummaryStatistics instance that is provided 
091         *     the full dataset
092         */
093        public void testAggregationConsistency() throws Exception {
094            
095            // Generate a random sample and random partition
096            double[] totalSample = generateSample();
097            double[][] subSamples = generatePartition(totalSample);
098            int nSamples = subSamples.length;
099            
100            // Create aggregator and total stats for comparison
101            AggregateSummaryStatistics aggregate = new AggregateSummaryStatistics();
102            SummaryStatistics totalStats = new SummaryStatistics();
103            
104            // Create array of component stats
105            SummaryStatistics componentStats[] = new SummaryStatistics[nSamples];
106            
107            for (int i = 0; i < nSamples; i++) {
108                
109                // Make componentStats[i] a contributing statistic to aggregate
110                componentStats[i] = aggregate.createContributingStatistics();
111                
112                // Add values from subsample
113                for (int j = 0; j < subSamples[i].length; j++) {
114                    componentStats[i].addValue(subSamples[i][j]);
115                }
116            }
117            
118            // Compute totalStats directly
119            for (int i = 0; i < totalSample.length; i++) {
120                totalStats.addValue(totalSample[i]);
121            }
122            
123            /*
124             * Compare statistics in totalStats with aggregate.
125             * Note that guaranteed success of this comparison depends on the
126             * fact that <aggregate> gets values in exactly the same order
127             * as <totalStats>. 
128             *  
129             */
130            assertEquals(totalStats.getSummary(), aggregate.getSummary());  
131            
132        }
133        
134        /**
135         * Test aggregate function by randomly generating a dataset of 10-100 values
136         * from [-100, 100], dividing it into 2-5 partitions, computing stats for each
137         * partition and comparing the result of aggregate(...) applied to the collection
138         * of per-partition SummaryStatistics with a single SummaryStatistics computed
139         * over the full sample.
140         * 
141         * @throws Exception
142         */
143        public void testAggregate() throws Exception {
144            
145            // Generate a random sample and random partition
146            double[] totalSample = generateSample();
147            double[][] subSamples = generatePartition(totalSample);
148            int nSamples = subSamples.length;
149           
150            // Compute combined stats directly
151            SummaryStatistics totalStats = new SummaryStatistics();
152            for (int i = 0; i < totalSample.length; i++) {
153                totalStats.addValue(totalSample[i]);
154            }
155            
156            // Now compute subsample stats individually and aggregate
157            SummaryStatistics[] subSampleStats = new SummaryStatistics[nSamples];
158            for (int i = 0; i < nSamples; i++) {
159                subSampleStats[i] = new SummaryStatistics();
160            }
161            Collection<SummaryStatistics> aggregate = new ArrayList<SummaryStatistics>();
162            for (int i = 0; i < nSamples; i++) {
163                for (int j = 0; j < subSamples[i].length; j++) { 
164                    subSampleStats[i].addValue(subSamples[i][j]);
165                }
166                aggregate.add(subSampleStats[i]);
167            }
168            
169            // Compare values
170            StatisticalSummary aggregatedStats = AggregateSummaryStatistics.aggregate(aggregate);
171            assertEquals(totalStats.getSummary(), aggregatedStats, 10E-12);
172        }
173        
174        
175        public void testAggregateDegenerate() throws Exception {
176            double[] totalSample = {1, 2, 3, 4, 5};
177            double[][] subSamples = {{1}, {2}, {3}, {4}, {5}};
178            
179            // Compute combined stats directly
180            SummaryStatistics totalStats = new SummaryStatistics();
181            for (int i = 0; i < totalSample.length; i++) {
182                totalStats.addValue(totalSample[i]);
183            }
184            
185            // Now compute subsample stats individually and aggregate
186            SummaryStatistics[] subSampleStats = new SummaryStatistics[5];
187            for (int i = 0; i < 5; i++) {
188                subSampleStats[i] = new SummaryStatistics();
189            }
190            Collection<SummaryStatistics> aggregate = new ArrayList<SummaryStatistics>();
191            for (int i = 0; i < 5; i++) {
192                for (int j = 0; j < subSamples[i].length; j++) { 
193                    subSampleStats[i].addValue(subSamples[i][j]);
194                }
195                aggregate.add(subSampleStats[i]);
196            }
197            
198            // Compare values
199            StatisticalSummaryValues aggregatedStats = AggregateSummaryStatistics.aggregate(aggregate);
200            assertEquals(totalStats.getSummary(), aggregatedStats, 10E-12);
201        }
202        
203        public void testAggregateSpecialValues() throws Exception {
204            double[] totalSample = {Double.POSITIVE_INFINITY, 2, 3, Double.NaN, 5};
205            double[][] subSamples = {{Double.POSITIVE_INFINITY, 2}, {3}, {Double.NaN}, {5}};
206            
207            // Compute combined stats directly
208            SummaryStatistics totalStats = new SummaryStatistics();
209            for (int i = 0; i < totalSample.length; i++) {
210                totalStats.addValue(totalSample[i]);
211            }
212            
213            // Now compute subsample stats individually and aggregate
214            SummaryStatistics[] subSampleStats = new SummaryStatistics[5];
215            for (int i = 0; i < 4; i++) {
216                subSampleStats[i] = new SummaryStatistics();
217            }
218            Collection<SummaryStatistics> aggregate = new ArrayList<SummaryStatistics>();
219            for (int i = 0; i < 4; i++) {
220                for (int j = 0; j < subSamples[i].length; j++) { 
221                    subSampleStats[i].addValue(subSamples[i][j]);
222                }
223                aggregate.add(subSampleStats[i]);
224            }
225            
226            // Compare values
227            StatisticalSummaryValues aggregatedStats = AggregateSummaryStatistics.aggregate(aggregate);
228            assertEquals(totalStats.getSummary(), aggregatedStats, 10E-12);
229            
230        }
231        
232        /**
233         * Verifies that a StatisticalSummary and a StatisticalSummaryValues are equal up
234         * to delta, with NaNs, infinities returned in the same spots. For max, min, n, values
235         * have to agree exactly, delta is used only for sum, mean, variance, std dev.
236         */
237        protected static void assertEquals(StatisticalSummary expected, StatisticalSummary observed, double delta) {
238            TestUtils.assertEquals(expected.getMax(), observed.getMax(), 0);
239            TestUtils.assertEquals(expected.getMin(), observed.getMin(), 0);
240            assertEquals(expected.getN(), observed.getN());
241            TestUtils.assertEquals(expected.getSum(), observed.getSum(), delta);
242            TestUtils.assertEquals(expected.getMean(), observed.getMean(), delta);
243            TestUtils.assertEquals(expected.getStandardDeviation(), observed.getStandardDeviation(), delta);
244            TestUtils.assertEquals(expected.getVariance(), observed.getVariance(), delta);
245        }
246    
247        
248        /**
249         * Generates a random sample of double values.
250         * Sample size is random, between 10 and 100 and values are 
251         * uniformly distributed over [-100, 100].
252         * 
253         * @return array of random double values
254         */
255        private double[] generateSample() {
256            final RandomData randomData = new RandomDataImpl();
257            final int sampleSize = randomData.nextInt(10,100);
258            double[] out = new double[sampleSize];
259            for (int i = 0; i < out.length; i++) {
260                out[i] = randomData.nextUniform(-100, 100);
261            }
262            return out;     
263        }
264        
265        /**
266         * Generates a partition of <sample> into up to 5 sequentially selected
267         * subsamples with randomly selected partition points.
268         * 
269         * @param sample array to partition
270         * @return rectangular array with rows = subsamples
271         */
272        private double[][] generatePartition(double[] sample) {
273            final int length = sample.length;
274            final double[][] out = new double[5][];
275            final RandomData randomData = new RandomDataImpl();
276            int cur = 0;
277            int offset = 0;
278            int sampleCount = 0;
279            for (int i = 0; i < 5; i++) {
280                if (cur == length || offset == length) {
281                    break;
282                }
283                final int next = (i == 4 || cur == length - 1) ? length - 1 : randomData.nextInt(cur, length - 1);
284                final int subLength = next - cur + 1;
285                out[i] = new double[subLength];
286                System.arraycopy(sample, offset, out[i], 0, subLength);
287                cur = next + 1;
288                sampleCount++;
289                offset += subLength;
290            }
291            if (sampleCount < 5) {
292                double[][] out2 = new double[sampleCount][];
293                for (int j = 0; j < sampleCount; j++) {
294                    final int curSize = out[j].length;
295                    out2[j] = new double[curSize];
296                    System.arraycopy(out[j], 0, out2[j], 0, curSize);
297                }
298                return out2;
299            } else {
300                return out;
301            }
302        }
303        
304    }