001 /* =========================================================== 002 * JFreeChart : a free chart library for the Java(tm) platform 003 * =========================================================== 004 * 005 * (C) Copyright 2000-2008, by Object Refinery Limited and Contributors. 006 * 007 * Project Info: http://www.jfree.org/jfreechart/index.html 008 * 009 * This library is free software; you can redistribute it and/or modify it 010 * under the terms of the GNU Lesser General Public License as published by 011 * the Free Software Foundation; either version 2.1 of the License, or 012 * (at your option) any later version. 013 * 014 * This library is distributed in the hope that it will be useful, but 015 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 016 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 017 * License for more details. 018 * 019 * You should have received a copy of the GNU Lesser General Public 020 * License along with this library; if not, write to the Free Software 021 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, 022 * USA. 023 * 024 * [Java is a trademark or registered trademark of Sun Microsystems, Inc. 025 * in the United States and other countries.] 026 * 027 * --------------- 028 * Statistics.java 029 * --------------- 030 * (C) Copyright 2000-2008, by Matthew Wright and Contributors. 031 * 032 * Original Author: Matthew Wright; 033 * Contributor(s): David Gilbert (for Object Refinery Limited); 034 * 035 * Changes (from 08-Nov-2001) 036 * -------------------------- 037 * 08-Nov-2001 : Added standard header and tidied Javadoc comments (DG); 038 * Moved from JFreeChart to package com.jrefinery.data.* in 039 * JCommon class library (DG); 040 * 24-Jun-2002 : Removed unnecessary local variable (DG); 041 * 07-Oct-2002 : Fixed errors reported by Checkstyle (DG); 042 * 26-May-2004 : Moved calculateMean() method from BoxAndWhiskerCalculator (DG); 043 * 02-Jun-2004 : Fixed bug in calculateMedian() method (DG); 044 * 11-Jan-2005 : Removed deprecated code in preparation for the 1.0.0 045 * release (DG); 046 * 047 */ 048 049 package org.jfree.data.statistics; 050 051 import java.util.ArrayList; 052 import java.util.Collection; 053 import java.util.Collections; 054 import java.util.Iterator; 055 import java.util.List; 056 057 /** 058 * A utility class that provides some common statistical functions. 059 */ 060 public abstract class Statistics { 061 062 /** 063 * Returns the mean of an array of numbers. This is equivalent to calling 064 * <code>calculateMean(values, true)</code>. 065 * 066 * @param values the values (<code>null</code> not permitted). 067 * 068 * @return The mean. 069 */ 070 public static double calculateMean(Number[] values) { 071 return calculateMean(values, true); 072 } 073 074 /** 075 * Returns the mean of an array of numbers. 076 * 077 * @param values the values (<code>null</code> not permitted). 078 * @param includeNullAndNaN a flag that controls whether or not 079 * <code>null</code> and <code>Double.NaN</code> values are included 080 * in the calculation (if either is present in the array, the result is 081 * {@link Double#NaN}). 082 * 083 * @return The mean. 084 * 085 * @since 1.0.3 086 */ 087 public static double calculateMean(Number[] values, 088 boolean includeNullAndNaN) { 089 090 if (values == null) { 091 throw new IllegalArgumentException("Null 'values' argument."); 092 } 093 double sum = 0.0; 094 double current; 095 int counter = 0; 096 for (int i = 0; i < values.length; i++) { 097 // treat nulls the same as NaNs 098 if (values[i] != null) { 099 current = values[i].doubleValue(); 100 } 101 else { 102 current = Double.NaN; 103 } 104 // calculate the sum and count 105 if (includeNullAndNaN || !Double.isNaN(current)) { 106 sum = sum + current; 107 counter++; 108 } 109 } 110 double result = (sum / counter); 111 return result; 112 } 113 114 /** 115 * Returns the mean of a collection of <code>Number</code> objects. 116 * 117 * @param values the values (<code>null</code> not permitted). 118 * 119 * @return The mean. 120 */ 121 public static double calculateMean(Collection values) { 122 return calculateMean(values, true); 123 } 124 125 /** 126 * Returns the mean of a collection of <code>Number</code> objects. 127 * 128 * @param values the values (<code>null</code> not permitted). 129 * @param includeNullAndNaN a flag that controls whether or not 130 * <code>null</code> and <code>Double.NaN</code> values are included 131 * in the calculation (if either is present in the array, the result is 132 * {@link Double#NaN}). 133 * 134 * @return The mean. 135 * 136 * @since 1.0.3 137 */ 138 public static double calculateMean(Collection values, 139 boolean includeNullAndNaN) { 140 141 if (values == null) { 142 throw new IllegalArgumentException("Null 'values' argument."); 143 } 144 int count = 0; 145 double total = 0.0; 146 Iterator iterator = values.iterator(); 147 while (iterator.hasNext()) { 148 Object object = iterator.next(); 149 if (object == null) { 150 if (includeNullAndNaN) { 151 return Double.NaN; 152 } 153 } 154 else { 155 if (object instanceof Number) { 156 Number number = (Number) object; 157 double value = number.doubleValue(); 158 if (Double.isNaN(value)) { 159 if (includeNullAndNaN) { 160 return Double.NaN; 161 } 162 } 163 else { 164 total = total + number.doubleValue(); 165 count = count + 1; 166 } 167 } 168 } 169 } 170 return total / count; 171 } 172 173 /** 174 * Calculates the median for a list of values (<code>Number</code> objects). 175 * The list of values will be copied, and the copy sorted, before 176 * calculating the median. To avoid this step (if your list of values 177 * is already sorted), use the {@link #calculateMedian(List, boolean)} 178 * method. 179 * 180 * @param values the values (<code>null</code> permitted). 181 * 182 * @return The median. 183 */ 184 public static double calculateMedian(List values) { 185 return calculateMedian(values, true); 186 } 187 188 /** 189 * Calculates the median for a list of values (<code>Number</code> objects). 190 * If <code>copyAndSort</code> is <code>false</code>, the list is assumed 191 * to be presorted in ascending order by value. 192 * 193 * @param values the values (<code>null</code> permitted). 194 * @param copyAndSort a flag that controls whether the list of values is 195 * copied and sorted. 196 * 197 * @return The median. 198 */ 199 public static double calculateMedian(List values, boolean copyAndSort) { 200 201 double result = Double.NaN; 202 if (values != null) { 203 if (copyAndSort) { 204 int itemCount = values.size(); 205 List copy = new ArrayList(itemCount); 206 for (int i = 0; i < itemCount; i++) { 207 copy.add(i, values.get(i)); 208 } 209 Collections.sort(copy); 210 values = copy; 211 } 212 int count = values.size(); 213 if (count > 0) { 214 if (count % 2 == 1) { 215 if (count > 1) { 216 Number value = (Number) values.get((count - 1) / 2); 217 result = value.doubleValue(); 218 } 219 else { 220 Number value = (Number) values.get(0); 221 result = value.doubleValue(); 222 } 223 } 224 else { 225 Number value1 = (Number) values.get(count / 2 - 1); 226 Number value2 = (Number) values.get(count / 2); 227 result = (value1.doubleValue() + value2.doubleValue()) 228 / 2.0; 229 } 230 } 231 } 232 return result; 233 } 234 235 /** 236 * Calculates the median for a sublist within a list of values 237 * (<code>Number</code> objects). 238 * 239 * @param values the values, in any order (<code>null</code> not 240 * permitted). 241 * @param start the start index. 242 * @param end the end index. 243 * 244 * @return The median. 245 */ 246 public static double calculateMedian(List values, int start, int end) { 247 return calculateMedian(values, start, end, true); 248 } 249 250 /** 251 * Calculates the median for a sublist within a list of values 252 * (<code>Number</code> objects). The entire list will be sorted if the 253 * <code>ascending</code< argument is <code>false</code>. 254 * 255 * @param values the values (<code>null</code> not permitted). 256 * @param start the start index. 257 * @param end the end index. 258 * @param copyAndSort a flag that that controls whether the list of values 259 * is copied and sorted. 260 * 261 * @return The median. 262 */ 263 public static double calculateMedian(List values, int start, int end, 264 boolean copyAndSort) { 265 266 double result = Double.NaN; 267 if (copyAndSort) { 268 List working = new ArrayList(end - start + 1); 269 for (int i = start; i <= end; i++) { 270 working.add(values.get(i)); 271 } 272 Collections.sort(working); 273 result = calculateMedian(working, false); 274 } 275 else { 276 int count = end - start + 1; 277 if (count > 0) { 278 if (count % 2 == 1) { 279 if (count > 1) { 280 Number value 281 = (Number) values.get(start + (count - 1) / 2); 282 result = value.doubleValue(); 283 } 284 else { 285 Number value = (Number) values.get(start); 286 result = value.doubleValue(); 287 } 288 } 289 else { 290 Number value1 = (Number) values.get(start + count / 2 - 1); 291 Number value2 = (Number) values.get(start + count / 2); 292 result 293 = (value1.doubleValue() + value2.doubleValue()) / 2.0; 294 } 295 } 296 } 297 return result; 298 299 } 300 301 /** 302 * Returns the standard deviation of a set of numbers. 303 * 304 * @param data the data (<code>null</code> or zero length array not 305 * permitted). 306 * 307 * @return The standard deviation of a set of numbers. 308 */ 309 public static double getStdDev(Number[] data) { 310 if (data == null) { 311 throw new IllegalArgumentException("Null 'data' array."); 312 } 313 if (data.length == 0) { 314 throw new IllegalArgumentException("Zero length 'data' array."); 315 } 316 double avg = calculateMean(data); 317 double sum = 0.0; 318 319 for (int counter = 0; counter < data.length; counter++) { 320 double diff = data[counter].doubleValue() - avg; 321 sum = sum + diff * diff; 322 } 323 return Math.sqrt(sum / (data.length - 1)); 324 } 325 326 /** 327 * Fits a straight line to a set of (x, y) data, returning the slope and 328 * intercept. 329 * 330 * @param xData the x-data (<code>null</code> not permitted). 331 * @param yData the y-data (<code>null</code> not permitted). 332 * 333 * @return A double array with the intercept in [0] and the slope in [1]. 334 */ 335 public static double[] getLinearFit(Number[] xData, Number[] yData) { 336 337 if (xData == null) { 338 throw new IllegalArgumentException("Null 'xData' argument."); 339 } 340 if (yData == null) { 341 throw new IllegalArgumentException("Null 'yData' argument."); 342 } 343 if (xData.length != yData.length) { 344 throw new IllegalArgumentException( 345 "Statistics.getLinearFit(): array lengths must be equal."); 346 } 347 348 double[] result = new double[2]; 349 // slope 350 result[1] = getSlope(xData, yData); 351 // intercept 352 result[0] = calculateMean(yData) - result[1] * calculateMean(xData); 353 354 return result; 355 356 } 357 358 /** 359 * Finds the slope of a regression line using least squares. 360 * 361 * @param xData the x-values (<code>null</code> not permitted). 362 * @param yData the y-values (<code>null</code> not permitted). 363 * 364 * @return The slope. 365 */ 366 public static double getSlope(Number[] xData, Number[] yData) { 367 368 if (xData == null) { 369 throw new IllegalArgumentException("Null 'xData' argument."); 370 } 371 if (yData == null) { 372 throw new IllegalArgumentException("Null 'yData' argument."); 373 } 374 if (xData.length != yData.length) { 375 throw new IllegalArgumentException("Array lengths must be equal."); 376 } 377 378 // ********* stat function for linear slope ******** 379 // y = a + bx 380 // a = ybar - b * xbar 381 // sum(x * y) - (sum (x) * sum(y)) / n 382 // b = ------------------------------------ 383 // sum (x^2) - (sum(x)^2 / n 384 // ************************************************* 385 386 // sum of x, x^2, x * y, y 387 double sx = 0.0, sxx = 0.0, sxy = 0.0, sy = 0.0; 388 int counter; 389 for (counter = 0; counter < xData.length; counter++) { 390 sx = sx + xData[counter].doubleValue(); 391 sxx = sxx + Math.pow(xData[counter].doubleValue(), 2); 392 sxy = sxy + yData[counter].doubleValue() 393 * xData[counter].doubleValue(); 394 sy = sy + yData[counter].doubleValue(); 395 } 396 return (sxy - (sx * sy) / counter) / (sxx - (sx * sx) / counter); 397 398 } 399 400 /** 401 * Calculates the correlation between two datasets. Both arrays should 402 * contain the same number of items. Null values are treated as zero. 403 * <P> 404 * Information about the correlation calculation was obtained from: 405 * 406 * http://trochim.human.cornell.edu/kb/statcorr.htm 407 * 408 * @param data1 the first dataset. 409 * @param data2 the second dataset. 410 * 411 * @return The correlation. 412 */ 413 public static double getCorrelation(Number[] data1, Number[] data2) { 414 if (data1 == null) { 415 throw new IllegalArgumentException("Null 'data1' argument."); 416 } 417 if (data2 == null) { 418 throw new IllegalArgumentException("Null 'data2' argument."); 419 } 420 if (data1.length != data2.length) { 421 throw new IllegalArgumentException( 422 "'data1' and 'data2' arrays must have same length." 423 ); 424 } 425 int n = data1.length; 426 double sumX = 0.0; 427 double sumY = 0.0; 428 double sumX2 = 0.0; 429 double sumY2 = 0.0; 430 double sumXY = 0.0; 431 for (int i = 0; i < n; i++) { 432 double x = 0.0; 433 if (data1[i] != null) { 434 x = data1[i].doubleValue(); 435 } 436 double y = 0.0; 437 if (data2[i] != null) { 438 y = data2[i].doubleValue(); 439 } 440 sumX = sumX + x; 441 sumY = sumY + y; 442 sumXY = sumXY + (x * y); 443 sumX2 = sumX2 + (x * x); 444 sumY2 = sumY2 + (y * y); 445 } 446 return (n * sumXY - sumX * sumY) / Math.pow((n * sumX2 - sumX * sumX) 447 * (n * sumY2 - sumY * sumY), 0.5); 448 } 449 450 /** 451 * Returns a data set for a moving average on the data set passed in. 452 * 453 * @param xData an array of the x data. 454 * @param yData an array of the y data. 455 * @param period the number of data points to average 456 * 457 * @return A double[][] the length of the data set in the first dimension, 458 * with two doubles for x and y in the second dimension 459 */ 460 public static double[][] getMovingAverage(Number[] xData, 461 Number[] yData, 462 int period) { 463 464 // check arguments... 465 if (xData.length != yData.length) { 466 throw new IllegalArgumentException("Array lengths must be equal."); 467 } 468 469 if (period > xData.length) { 470 throw new IllegalArgumentException( 471 "Period can't be longer than dataset." 472 ); 473 } 474 475 double[][] result = new double[xData.length - period][2]; 476 for (int i = 0; i < result.length; i++) { 477 result[i][0] = xData[i + period].doubleValue(); 478 // holds the moving average sum 479 double sum = 0.0; 480 for (int j = 0; j < period; j++) { 481 sum += yData[i + j].doubleValue(); 482 } 483 sum = sum / period; 484 result[i][1] = sum; 485 } 486 return result; 487 488 } 489 490 }