//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** @author John Miller * @version 1.1 * @date Wed Feb 20 17:39:57 EST 2013 * @see LICENSE (MIT style license file). */ package scalation.analytics import scalation.linalgebra.{MatrixD, QRDecomp, VectorD} import scalation.math.IntWithExp._ import scalation.math.DoubleWithExp._ import scalation.plot.Plot import scalation.util.Error //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `PolyRegression` class supports polynomial regression. In this case, * 't' is expanded to [1, t, t^2 ... t^k]. Fit the parameter vector 'b' in the * regression equation *

* y = b dot x + e = b0 + b1 * t + b1 * t^2... bk * t^k + e *

* where 'e' represents the residuals (the part not explained by the model). * Use Least-Squares (minimizing the residuals) to fit the parameter vector *

* b = x_pinv * y *

* where 'x_pinv' is the pseudo-inverse. By default QR Decomposition (more robust) is * used to compute 'x_pinv', with Gaussian Elimination as an option (set useQR to false). * @see see.stanford.edu/materials/lsoeldsee263/05-ls.pdf * @param t the input vector * @param y the response vector * @param order the order of the polynomial * @param useQR use QR Decomposition for pseudo-inverse, else Gaussian Elimination */ class PolyRegression (t: VectorD, y: VectorD, order: Int, useQR: Boolean = true) extends Predictor with Error { if (t.dim != y.dim) flaw ("constructor", "dimensions of t and y are incompatible") if (t.dim <= order) flaw ("constructor", "not enough data points for the given order") val x = new MatrixD (t.dim, order + 1) // design matrix for (i <- 0 until t.dim) x(i) = expand (t(i)) val rg = new Regression (x, y, useQR) // regular multiple regression //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Expand the scalar 'x' into a vector of powers of 'x': [1, x, x^2 ... x^k]. * @param x the scalar to expand */ def expand (x: Double): VectorD = { val v = new VectorD (order + 1) for (k <- 0 to order) v(k) = x ~^ k v } // expand //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Train the predictor by fitting the parameter vector (b-vector) in the * regression equation * y = b dot x + e = [b0, ... bk] dot [1, t, t^2 ... t^k] + e * using the least squares method. */ def train () { rg.train } //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Retrain the predictor by fitting the parameter vector (b-vector) in the * multiple regression equation * yy = b dot x + e = [b0, ... bk] dot [1, t, t^2 ... t^k] + e * using the least squares method. * @param yy the new response vector */ def train (yy: VectorD) { rg.train (yy) } //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Return the fit (parameter vector b, quality of fit rSquared). */ def fit: Tuple4 [VectorD, Double, Double, Double] = rg.fit //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Predict the value of y = f(z) by evaluating the formula y = b dot expand (z), * e.g., (b0, b1, b2) dot (1, z, z^2). * @param z the new scalar to predict */ def predict (z: Double): Double = rg.predict (expand (z)) //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Predict the value of y = f(z) by evaluating the formula y = b dot z, * e.g., (b0, b1, b2) dot (1, z1, z2). * @param z the new vector to predict */ def predict (z: VectorD): Double = rg.predict (z) //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Predict the value of y = f(z) by evaluating the formula y = b dot zi for * each row zi of matrix z. * @param z the new matrix to predict */ def predict (z: MatrixD): VectorD = rg.predict (z) //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Perform backward elimination to remove the least predictive variable * from the model, returning the variable to eliminate, the new parameter * vector, the new R-squared value and the new F statistic. */ def backElim (): Tuple4 [Int, VectorD, Double, Double] = rg.backElim () //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Compute the Variance Inflation Factor (VIF) for each variable to test * for multi-colinearity by regressing xj against the rest of the variables. * A VIF over 10 indicates that over 90% of the varaince of xj can be predicted * from the other variables, so xj is a candidate for removal from the model. */ def vif: VectorD = rg.vif } // PolyRegression class //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `PolyRegressionTest` object tests `PolyRegression` class using the following * regression equation. *

* y = b dot x = b0 + b1*t + b2*t^2. *

*/ object PolyRegressionTest extends App { import scalation.random.Normal val noise = Normal (0.0, 500.0) val t = VectorD.range (0, 100) val y = new VectorD (t.dim) for (i <- 0 until 100) y(i) = 10.0 - 10.0 * i + i~^2 + noise.gen println ("t = " + t) println ("y = " + y) val order = 8 val prg = new PolyRegression (t, y, order) prg.train () println ("fit = " + prg.fit) val z = 10.5 // predict y for one point val yp = prg.predict (z) println ("predict (" + z + ") = " + yp) val yyp = prg.predict (prg.x) // predict y for several points println ("predict ( ) = " + yyp) new Plot (t, y, yyp) } // PolyRegressionTest object