001/* ===========================================================
002 * JFreeChart : a free chart library for the Java(tm) platform
003 * ===========================================================
004 *
005 * (C) Copyright 2000-2011, by Object Refinery Limited and Contributors.
006 *
007 * Project Info:  http://www.jfree.org/jfreechart/index.html
008 *
009 * This library is free software; you can redistribute it and/or modify it
010 * under the terms of the GNU Lesser General Public License as published by
011 * the Free Software Foundation; either version 2.1 of the License, or
012 * (at your option) any later version.
013 *
014 * This library is distributed in the hope that it will be useful, but
015 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
016 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
017 * License for more details.
018 *
019 * You should have received a copy of the GNU Lesser General Public
020 * License along with this library; if not, write to the Free Software
021 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301,
022 * USA.
023 *
024 * [Oracle and Java are registered trademarks of Oracle and/or its affiliates. 
025 * Other names may be trademarks of their respective owners.]
026 *
027 * ---------------
028 * Regression.java
029 * ---------------
030 * (C) Copyright 2002-2009, by Object Refinery Limited.
031 *
032 * Original Author:  David Gilbert (for Object Refinery Limited);
033 * Contributor(s):   Peter Kolb (patch 2795746);
034 *
035 * Changes
036 * -------
037 * 30-Sep-2002 : Version 1 (DG);
038 * 18-Aug-2003 : Added 'abstract' (DG);
039 * 15-Jul-2004 : Switched getX() with getXValue() and getY() with
040 *               getYValue() (DG);
041 * 29-May-2009 : Added support for polynomial regression, see patch 2795746
042 *               by Peter Kolb (DG);
043 *
044 */
045
046package org.jfree.data.statistics;
047
048import org.jfree.data.xy.XYDataset;
049
050/**
051 * A utility class for fitting regression curves to data.
052 */
053public abstract class Regression {
054
055    /**
056     * Returns the parameters 'a' and 'b' for an equation y = a + bx, fitted to
057     * the data using ordinary least squares regression.  The result is
058     * returned as a double[], where result[0] --> a, and result[1] --> b.
059     *
060     * @param data  the data.
061     *
062     * @return The parameters.
063     */
064    public static double[] getOLSRegression(double[][] data) {
065
066        int n = data.length;
067        if (n < 2) {
068            throw new IllegalArgumentException("Not enough data.");
069        }
070
071        double sumX = 0;
072        double sumY = 0;
073        double sumXX = 0;
074        double sumXY = 0;
075        for (int i = 0; i < n; i++) {
076            double x = data[i][0];
077            double y = data[i][1];
078            sumX += x;
079            sumY += y;
080            double xx = x * x;
081            sumXX += xx;
082            double xy = x * y;
083            sumXY += xy;
084        }
085        double sxx = sumXX - (sumX * sumX) / n;
086        double sxy = sumXY - (sumX * sumY) / n;
087        double xbar = sumX / n;
088        double ybar = sumY / n;
089
090        double[] result = new double[2];
091        result[1] = sxy / sxx;
092        result[0] = ybar - result[1] * xbar;
093
094        return result;
095
096    }
097
098    /**
099     * Returns the parameters 'a' and 'b' for an equation y = a + bx, fitted to
100     * the data using ordinary least squares regression. The result is returned
101     * as a double[], where result[0] --> a, and result[1] --> b.
102     *
103     * @param data  the data.
104     * @param series  the series (zero-based index).
105     *
106     * @return The parameters.
107     */
108    public static double[] getOLSRegression(XYDataset data, int series) {
109
110        int n = data.getItemCount(series);
111        if (n < 2) {
112            throw new IllegalArgumentException("Not enough data.");
113        }
114
115        double sumX = 0;
116        double sumY = 0;
117        double sumXX = 0;
118        double sumXY = 0;
119        for (int i = 0; i < n; i++) {
120            double x = data.getXValue(series, i);
121            double y = data.getYValue(series, i);
122            sumX += x;
123            sumY += y;
124            double xx = x * x;
125            sumXX += xx;
126            double xy = x * y;
127            sumXY += xy;
128        }
129        double sxx = sumXX - (sumX * sumX) / n;
130        double sxy = sumXY - (sumX * sumY) / n;
131        double xbar = sumX / n;
132        double ybar = sumY / n;
133
134        double[] result = new double[2];
135        result[1] = sxy / sxx;
136        result[0] = ybar - result[1] * xbar;
137
138        return result;
139
140    }
141
142    /**
143     * Returns the parameters 'a' and 'b' for an equation y = ax^b, fitted to
144     * the data using a power regression equation.  The result is returned as
145     * an array, where double[0] --> a, and double[1] --> b.
146     *
147     * @param data  the data.
148     *
149     * @return The parameters.
150     */
151    public static double[] getPowerRegression(double[][] data) {
152
153        int n = data.length;
154        if (n < 2) {
155            throw new IllegalArgumentException("Not enough data.");
156        }
157
158        double sumX = 0;
159        double sumY = 0;
160        double sumXX = 0;
161        double sumXY = 0;
162        for (int i = 0; i < n; i++) {
163            double x = Math.log(data[i][0]);
164            double y = Math.log(data[i][1]);
165            sumX += x;
166            sumY += y;
167            double xx = x * x;
168            sumXX += xx;
169            double xy = x * y;
170            sumXY += xy;
171        }
172        double sxx = sumXX - (sumX * sumX) / n;
173        double sxy = sumXY - (sumX * sumY) / n;
174        double xbar = sumX / n;
175        double ybar = sumY / n;
176
177        double[] result = new double[2];
178        result[1] = sxy / sxx;
179        result[0] = Math.pow(Math.exp(1.0), ybar - result[1] * xbar);
180
181        return result;
182
183    }
184
185    /**
186     * Returns the parameters 'a' and 'b' for an equation y = ax^b, fitted to
187     * the data using a power regression equation.  The result is returned as
188     * an array, where double[0] --> a, and double[1] --> b.
189     *
190     * @param data  the data.
191     * @param series  the series to fit the regression line against.
192     *
193     * @return The parameters.
194     */
195    public static double[] getPowerRegression(XYDataset data, int series) {
196
197        int n = data.getItemCount(series);
198        if (n < 2) {
199            throw new IllegalArgumentException("Not enough data.");
200        }
201
202        double sumX = 0;
203        double sumY = 0;
204        double sumXX = 0;
205        double sumXY = 0;
206        for (int i = 0; i < n; i++) {
207            double x = Math.log(data.getXValue(series, i));
208            double y = Math.log(data.getYValue(series, i));
209            sumX += x;
210            sumY += y;
211            double xx = x * x;
212            sumXX += xx;
213            double xy = x * y;
214            sumXY += xy;
215        }
216        double sxx = sumXX - (sumX * sumX) / n;
217        double sxy = sumXY - (sumX * sumY) / n;
218        double xbar = sumX / n;
219        double ybar = sumY / n;
220
221        double[] result = new double[2];
222        result[1] = sxy / sxx;
223        result[0] = Math.pow(Math.exp(1.0), ybar - result[1] * xbar);
224
225        return result;
226
227    }
228
229    /**
230     * Returns the parameters 'a0', 'a1', 'a2', ..., 'an' for a polynomial 
231     * function of order n, y = a0 + a1 * x + a2 * x^2 + ... + an * x^n,
232     * fitted to the data using a polynomial regression equation.
233     * The result is returned as an array with a length of n + 2,
234     * where double[0] --> a0, double[1] --> a1, .., double[n] --> an.
235     * and double[n + 1] is the correlation coefficient R2
236     * Reference: J. D. Faires, R. L. Burden, Numerische Methoden (german
237     * edition), pp. 243ff and 327ff.
238     *
239     * @param dataset  the dataset (<code>null</code> not permitted).
240     * @param series  the series to fit the regression line against (the series
241     *         must have at least order + 1 non-NaN items).
242     * @param order  the order of the function (> 0).
243     *
244     * @return The parameters.
245     *
246     * @since 1.0.14
247     */
248    public static double[] getPolynomialRegression(XYDataset dataset, int series, int order) {
249        if (dataset == null) {
250            throw new IllegalArgumentException("Null 'dataset' argument.");
251        }
252        int itemCount = dataset.getItemCount(series);
253        if (itemCount < order + 1) {
254            throw new IllegalArgumentException("Not enough data.");
255        }
256        int validItems = 0;
257        double[][] data = new double[2][itemCount];
258        for(int item = 0; item < itemCount; item++){
259            double x = dataset.getXValue(series, item);
260            double y = dataset.getYValue(series, item);
261            if (!Double.isNaN(x) && !Double.isNaN(y)){
262                data[0][validItems] = x;
263                data[1][validItems] = y;
264                validItems++;
265            }
266        }
267        if (validItems < order + 1) {
268            throw new IllegalArgumentException("Not enough data.");
269        }
270        int equations = order + 1;
271        int coefficients = order + 2;
272        double[] result = new double[equations + 1];
273        double[][] matrix = new double[equations][coefficients];
274        double sumX = 0.0;
275        double sumY = 0.0;
276
277        for(int item = 0; item < validItems; item++){
278            sumX += data[0][item];
279            sumY += data[1][item];
280            for(int eq = 0; eq < equations; eq++){
281                for(int coe = 0; coe < coefficients - 1; coe++){
282                    matrix[eq][coe] += Math.pow(data[0][item],eq + coe);
283                }
284                matrix[eq][coefficients - 1] += data[1][item]
285                        * Math.pow(data[0][item],eq);
286            }
287        }
288        double[][] subMatrix = calculateSubMatrix(matrix);
289        for (int eq = 1; eq < equations; eq++) {
290            matrix[eq][0] = 0;
291            for (int coe = 1; coe < coefficients; coe++) {
292                matrix[eq][coe] = subMatrix[eq - 1][coe - 1];
293            }
294        }
295        for (int eq = equations - 1; eq > -1; eq--) {
296            double value = matrix[eq][coefficients - 1];
297            for (int coe = eq; coe < coefficients -1; coe++) {
298                value -= matrix[eq][coe] * result[coe];
299            }
300            result[eq] = value / matrix[eq][eq];
301        }
302        double meanY = sumY / validItems;
303        double yObsSquare = 0.0;
304        double yRegSquare = 0.0;
305        for (int item = 0; item < validItems; item++) {
306            double yCalc = 0;
307            for (int eq = 0; eq < equations; eq++) {
308                yCalc += result[eq] * Math.pow(data[0][item],eq);
309            }
310            yRegSquare += Math.pow(yCalc - meanY, 2);
311            yObsSquare += Math.pow(data[1][item] - meanY, 2);
312        }
313        double rSquare = yRegSquare / yObsSquare;
314        result[equations] = rSquare;
315        return result;
316    }
317
318    /**
319     * Returns a matrix with the following features: (1) the number of rows
320     * and columns is 1 less than that of the original matrix; (2)the matrix
321     * is triangular, i.e. all elements a (row, column) with column > row are
322     * zero.  This method is used for calculating a polynomial regression.
323     * 
324     * @param matrix  the start matrix.
325     *
326     * @return The new matrix.
327     */
328    private static double[][] calculateSubMatrix(double[][] matrix){
329        int equations = matrix.length;
330        int coefficients = matrix[0].length;
331        double[][] result = new double[equations - 1][coefficients - 1];
332        for (int eq = 1; eq < equations; eq++) {
333            double factor = matrix[0][0] / matrix[eq][0];
334            for (int coe = 1; coe < coefficients; coe++) {
335                result[eq - 1][coe -1] = matrix[0][coe] - matrix[eq][coe]
336                        * factor;
337            }
338        }
339        if (equations == 1) {
340            return result;
341        }
342        // check for zero pivot element
343        if (result[0][0] == 0) {
344            boolean found = false;
345            for (int i = 0; i < result.length; i ++) {
346                if (result[i][0] != 0) {
347                    found = true;
348                    double[] temp = result[0];
349                    for (int j = 0; j < result[i].length; j++) {
350                        result[0][j] = result[i][j];
351                    }
352                    for (int j = 0; j < temp.length; j++) {
353                        result[i][j] = temp[j];
354                    }
355                    break;
356                }
357            }
358            if (!found) {
359                System.out.println("Equation has no solution!");
360                return new double[equations - 1][coefficients - 1];
361            }
362        }
363        double[][] subMatrix = calculateSubMatrix(result);
364        for (int eq = 1; eq < equations -  1; eq++) {
365            result[eq][0] = 0;
366            for (int coe = 1; coe < coefficients - 1; coe++) {
367                result[eq][coe] = subMatrix[eq - 1][coe - 1];
368            }
369        }
370        return result;
371    }
372
373}