001    /* ===========================================================
002     * JFreeChart : a free chart library for the Java(tm) platform
003     * ===========================================================
004     *
005     * (C) Copyright 2000-2008, by Object Refinery Limited and Contributors.
006     *
007     * Project Info:  http://www.jfree.org/jfreechart/index.html
008     *
009     * This library is free software; you can redistribute it and/or modify it
010     * under the terms of the GNU Lesser General Public License as published by
011     * the Free Software Foundation; either version 2.1 of the License, or
012     * (at your option) any later version.
013     *
014     * This library is distributed in the hope that it will be useful, but
015     * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
016     * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
017     * License for more details.
018     *
019     * You should have received a copy of the GNU Lesser General Public
020     * License along with this library; if not, write to the Free Software
021     * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301,
022     * USA.
023     *
024     * [Java is a trademark or registered trademark of Sun Microsystems, Inc.
025     * in the United States and other countries.]
026     *
027     * ---------------
028     * Statistics.java
029     * ---------------
030     * (C) Copyright 2000-2008, by Matthew Wright and Contributors.
031     *
032     * Original Author:  Matthew Wright;
033     * Contributor(s):   David Gilbert (for Object Refinery Limited);
034     *
035     * Changes (from 08-Nov-2001)
036     * --------------------------
037     * 08-Nov-2001 : Added standard header and tidied Javadoc comments (DG);
038     *               Moved from JFreeChart to package com.jrefinery.data.* in
039     *               JCommon class library (DG);
040     * 24-Jun-2002 : Removed unnecessary local variable (DG);
041     * 07-Oct-2002 : Fixed errors reported by Checkstyle (DG);
042     * 26-May-2004 : Moved calculateMean() method from BoxAndWhiskerCalculator (DG);
043     * 02-Jun-2004 : Fixed bug in calculateMedian() method (DG);
044     * 11-Jan-2005 : Removed deprecated code in preparation for the 1.0.0
045     *               release (DG);
046     *
047     */
048    
049    package org.jfree.data.statistics;
050    
051    import java.util.ArrayList;
052    import java.util.Collection;
053    import java.util.Collections;
054    import java.util.Iterator;
055    import java.util.List;
056    
057    /**
058     * A utility class that provides some common statistical functions.
059     */
060    public abstract class Statistics {
061    
062        /**
063         * Returns the mean of an array of numbers.  This is equivalent to calling
064         * <code>calculateMean(values, true)</code>.
065         *
066         * @param values  the values (<code>null</code> not permitted).
067         *
068         * @return The mean.
069         */
070        public static double calculateMean(Number[] values) {
071            return calculateMean(values, true);
072        }
073    
074        /**
075         * Returns the mean of an array of numbers.
076         *
077         * @param values  the values (<code>null</code> not permitted).
078         * @param includeNullAndNaN  a flag that controls whether or not
079         *     <code>null</code> and <code>Double.NaN</code> values are included
080         *     in the calculation (if either is present in the array, the result is
081         *     {@link Double#NaN}).
082         *
083         * @return The mean.
084         *
085         * @since 1.0.3
086         */
087        public static double calculateMean(Number[] values,
088                boolean includeNullAndNaN) {
089    
090            if (values == null) {
091                throw new IllegalArgumentException("Null 'values' argument.");
092            }
093            double sum = 0.0;
094            double current;
095            int counter = 0;
096            for (int i = 0; i < values.length; i++) {
097                // treat nulls the same as NaNs
098                if (values[i] != null) {
099                    current = values[i].doubleValue();
100                }
101                else {
102                    current = Double.NaN;
103                }
104                // calculate the sum and count
105                if (includeNullAndNaN || !Double.isNaN(current)) {
106                    sum = sum + current;
107                    counter++;
108                }
109            }
110            double result = (sum / counter);
111            return result;
112        }
113    
114        /**
115         * Returns the mean of a collection of <code>Number</code> objects.
116         *
117         * @param values  the values (<code>null</code> not permitted).
118         *
119         * @return The mean.
120         */
121        public static double calculateMean(Collection values) {
122            return calculateMean(values, true);
123        }
124    
125        /**
126         * Returns the mean of a collection of <code>Number</code> objects.
127         *
128         * @param values  the values (<code>null</code> not permitted).
129         * @param includeNullAndNaN  a flag that controls whether or not
130         *     <code>null</code> and <code>Double.NaN</code> values are included
131         *     in the calculation (if either is present in the array, the result is
132         *     {@link Double#NaN}).
133         *
134         * @return The mean.
135         *
136         * @since 1.0.3
137         */
138        public static double calculateMean(Collection values,
139                boolean includeNullAndNaN) {
140    
141            if (values == null) {
142                throw new IllegalArgumentException("Null 'values' argument.");
143            }
144            int count = 0;
145            double total = 0.0;
146            Iterator iterator = values.iterator();
147            while (iterator.hasNext()) {
148                Object object = iterator.next();
149                if (object == null) {
150                    if (includeNullAndNaN) {
151                        return Double.NaN;
152                    }
153                }
154                else {
155                    if (object instanceof Number) {
156                        Number number = (Number) object;
157                        double value = number.doubleValue();
158                        if (Double.isNaN(value)) {
159                            if (includeNullAndNaN) {
160                                return Double.NaN;
161                            }
162                        }
163                        else {
164                            total = total + number.doubleValue();
165                            count = count + 1;
166                        }
167                    }
168                }
169            }
170            return total / count;
171        }
172    
173        /**
174         * Calculates the median for a list of values (<code>Number</code> objects).
175         * The list of values will be copied, and the copy sorted, before
176         * calculating the median.  To avoid this step (if your list of values
177         * is already sorted), use the {@link #calculateMedian(List, boolean)}
178         * method.
179         *
180         * @param values  the values (<code>null</code> permitted).
181         *
182         * @return The median.
183         */
184        public static double calculateMedian(List values) {
185            return calculateMedian(values, true);
186        }
187    
188        /**
189         * Calculates the median for a list of values (<code>Number</code> objects).
190         * If <code>copyAndSort</code> is <code>false</code>, the list is assumed
191         * to be presorted in ascending order by value.
192         *
193         * @param values  the values (<code>null</code> permitted).
194         * @param copyAndSort  a flag that controls whether the list of values is
195         *                     copied and sorted.
196         *
197         * @return The median.
198         */
199        public static double calculateMedian(List values, boolean copyAndSort) {
200    
201            double result = Double.NaN;
202            if (values != null) {
203                if (copyAndSort) {
204                    int itemCount = values.size();
205                    List copy = new ArrayList(itemCount);
206                    for (int i = 0; i < itemCount; i++) {
207                        copy.add(i, values.get(i));
208                    }
209                    Collections.sort(copy);
210                    values = copy;
211                }
212                int count = values.size();
213                if (count > 0) {
214                    if (count % 2 == 1) {
215                        if (count > 1) {
216                            Number value = (Number) values.get((count - 1) / 2);
217                            result = value.doubleValue();
218                        }
219                        else {
220                            Number value = (Number) values.get(0);
221                            result = value.doubleValue();
222                        }
223                    }
224                    else {
225                        Number value1 = (Number) values.get(count / 2 - 1);
226                        Number value2 = (Number) values.get(count / 2);
227                        result = (value1.doubleValue() + value2.doubleValue())
228                                 / 2.0;
229                    }
230                }
231            }
232            return result;
233        }
234    
235        /**
236         * Calculates the median for a sublist within a list of values
237         * (<code>Number</code> objects).
238         *
239         * @param values  the values, in any order (<code>null</code> not
240         *                permitted).
241         * @param start  the start index.
242         * @param end  the end index.
243         *
244         * @return The median.
245         */
246        public static double calculateMedian(List values, int start, int end) {
247            return calculateMedian(values, start, end, true);
248        }
249    
250        /**
251         * Calculates the median for a sublist within a list of values
252         * (<code>Number</code> objects).  The entire list will be sorted if the
253         * <code>ascending</code< argument is <code>false</code>.
254         *
255         * @param values  the values (<code>null</code> not permitted).
256         * @param start  the start index.
257         * @param end  the end index.
258         * @param copyAndSort  a flag that that controls whether the list of values
259         *                     is copied and sorted.
260         *
261         * @return The median.
262         */
263        public static double calculateMedian(List values, int start, int end,
264                                             boolean copyAndSort) {
265    
266            double result = Double.NaN;
267            if (copyAndSort) {
268                List working = new ArrayList(end - start + 1);
269                for (int i = start; i <= end; i++) {
270                    working.add(values.get(i));
271                }
272                Collections.sort(working);
273                result = calculateMedian(working, false);
274            }
275            else {
276                int count = end - start + 1;
277                if (count > 0) {
278                    if (count % 2 == 1) {
279                        if (count > 1) {
280                            Number value
281                                = (Number) values.get(start + (count - 1) / 2);
282                            result = value.doubleValue();
283                        }
284                        else {
285                            Number value = (Number) values.get(start);
286                            result = value.doubleValue();
287                        }
288                    }
289                    else {
290                        Number value1 = (Number) values.get(start + count / 2 - 1);
291                        Number value2 = (Number) values.get(start + count / 2);
292                        result
293                            = (value1.doubleValue() + value2.doubleValue()) / 2.0;
294                    }
295                }
296            }
297            return result;
298    
299        }
300    
301        /**
302         * Returns the standard deviation of a set of numbers.
303         *
304         * @param data  the data (<code>null</code> or zero length array not
305         *     permitted).
306         *
307         * @return The standard deviation of a set of numbers.
308         */
309        public static double getStdDev(Number[] data) {
310            if (data == null) {
311                throw new IllegalArgumentException("Null 'data' array.");
312            }
313            if (data.length == 0) {
314                throw new IllegalArgumentException("Zero length 'data' array.");
315            }
316            double avg = calculateMean(data);
317            double sum = 0.0;
318    
319            for (int counter = 0; counter < data.length; counter++) {
320                double diff = data[counter].doubleValue() - avg;
321                sum = sum + diff * diff;
322            }
323            return Math.sqrt(sum / (data.length - 1));
324        }
325    
326        /**
327         * Fits a straight line to a set of (x, y) data, returning the slope and
328         * intercept.
329         *
330         * @param xData  the x-data (<code>null</code> not permitted).
331         * @param yData  the y-data (<code>null</code> not permitted).
332         *
333         * @return A double array with the intercept in [0] and the slope in [1].
334         */
335        public static double[] getLinearFit(Number[] xData, Number[] yData) {
336    
337            if (xData == null) {
338                throw new IllegalArgumentException("Null 'xData' argument.");
339            }
340            if (yData == null) {
341                throw new IllegalArgumentException("Null 'yData' argument.");
342            }
343            if (xData.length != yData.length) {
344                throw new IllegalArgumentException(
345                    "Statistics.getLinearFit(): array lengths must be equal.");
346            }
347    
348            double[] result = new double[2];
349            // slope
350            result[1] = getSlope(xData, yData);
351            // intercept
352            result[0] = calculateMean(yData) - result[1] * calculateMean(xData);
353    
354            return result;
355    
356        }
357    
358        /**
359         * Finds the slope of a regression line using least squares.
360         *
361         * @param xData  the x-values (<code>null</code> not permitted).
362         * @param yData  the y-values (<code>null</code> not permitted).
363         *
364         * @return The slope.
365         */
366        public static double getSlope(Number[] xData, Number[] yData) {
367    
368            if (xData == null) {
369                throw new IllegalArgumentException("Null 'xData' argument.");
370            }
371            if (yData == null) {
372                throw new IllegalArgumentException("Null 'yData' argument.");
373            }
374            if (xData.length != yData.length) {
375                throw new IllegalArgumentException("Array lengths must be equal.");
376            }
377    
378            // ********* stat function for linear slope ********
379            // y = a + bx
380            // a = ybar - b * xbar
381            //     sum(x * y) - (sum (x) * sum(y)) / n
382            // b = ------------------------------------
383            //     sum (x^2) - (sum(x)^2 / n
384            // *************************************************
385    
386            // sum of x, x^2, x * y, y
387            double sx = 0.0, sxx = 0.0, sxy = 0.0, sy = 0.0;
388            int counter;
389            for (counter = 0; counter < xData.length; counter++) {
390                sx = sx + xData[counter].doubleValue();
391                sxx = sxx + Math.pow(xData[counter].doubleValue(), 2);
392                sxy = sxy + yData[counter].doubleValue()
393                          * xData[counter].doubleValue();
394                sy = sy + yData[counter].doubleValue();
395            }
396            return (sxy - (sx * sy) / counter) / (sxx - (sx * sx) / counter);
397    
398        }
399    
400        /**
401         * Calculates the correlation between two datasets.  Both arrays should
402         * contain the same number of items.  Null values are treated as zero.
403         * <P>
404         * Information about the correlation calculation was obtained from:
405         *
406         * http://trochim.human.cornell.edu/kb/statcorr.htm
407         *
408         * @param data1  the first dataset.
409         * @param data2  the second dataset.
410         *
411         * @return The correlation.
412         */
413        public static double getCorrelation(Number[] data1, Number[] data2) {
414            if (data1 == null) {
415                throw new IllegalArgumentException("Null 'data1' argument.");
416            }
417            if (data2 == null) {
418                throw new IllegalArgumentException("Null 'data2' argument.");
419            }
420            if (data1.length != data2.length) {
421                throw new IllegalArgumentException(
422                    "'data1' and 'data2' arrays must have same length."
423                );
424            }
425            int n = data1.length;
426            double sumX = 0.0;
427            double sumY = 0.0;
428            double sumX2 = 0.0;
429            double sumY2 = 0.0;
430            double sumXY = 0.0;
431            for (int i = 0; i < n; i++) {
432                double x = 0.0;
433                if (data1[i] != null) {
434                    x = data1[i].doubleValue();
435                }
436                double y = 0.0;
437                if (data2[i] != null) {
438                    y = data2[i].doubleValue();
439                }
440                sumX = sumX + x;
441                sumY = sumY + y;
442                sumXY = sumXY + (x * y);
443                sumX2 = sumX2 + (x * x);
444                sumY2 = sumY2 + (y * y);
445            }
446            return (n * sumXY - sumX * sumY) / Math.pow((n * sumX2 - sumX * sumX)
447                    * (n * sumY2 - sumY * sumY), 0.5);
448        }
449    
450        /**
451         * Returns a data set for a moving average on the data set passed in.
452         *
453         * @param xData  an array of the x data.
454         * @param yData  an array of the y data.
455         * @param period  the number of data points to average
456         *
457         * @return A double[][] the length of the data set in the first dimension,
458         *         with two doubles for x and y in the second dimension
459         */
460        public static double[][] getMovingAverage(Number[] xData,
461                                                  Number[] yData,
462                                                  int period) {
463    
464            // check arguments...
465            if (xData.length != yData.length) {
466                throw new IllegalArgumentException("Array lengths must be equal.");
467            }
468    
469            if (period > xData.length) {
470                throw new IllegalArgumentException(
471                    "Period can't be longer than dataset."
472                );
473            }
474    
475            double[][] result = new double[xData.length - period][2];
476            for (int i = 0; i < result.length; i++) {
477                result[i][0] = xData[i + period].doubleValue();
478                // holds the moving average sum
479                double sum = 0.0;
480                for (int j = 0; j < period; j++) {
481                    sum += yData[i + j].doubleValue();
482                }
483                sum = sum / period;
484                result[i][1] = sum;
485            }
486            return result;
487    
488        }
489    
490    }