//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** @author John Miller * @version 1.6 * @date Sat Jan 31 20:59:02 EST 2015 * @see LICENSE (MIT style license file). * * @title Model: Ridge Regression (L2 Shrinkage/Regularization) * * @see math.stackexchange.com/questions/299481/qr-factorization-for-ridge-regression * Ridge Regression using SVD * @see Hastie, T., Tibshirani, R., & Friedman, J. (2009). The Elements of Statistical Learning * * Since regularization reduces near singularity, Cholesky is used as default */ package scalation.analytics import scala.collection.mutable.Set import scala.math.{log, sqrt} import scala.util.control.Breaks.{break, breakable} import scalation.linalgebra._ import scalation.math.noDouble import scalation.minima.GoldenSectionLS import scalation.plot.{Plot, PlotM} import scalation.stat.Statistic import scalation.util.banner import Fit._ import RegTechnique._ //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `RidgeRegression` class supports multiple linear ridge regression. * In this case, 'x' is multi-dimensional [x_1, ... x_k]. Ridge regression puts * a penalty on the L2 norm of the parameters b to reduce the chance of them taking * on large values that may lead to less robust models. Both the input matrix 'x' * and the response vector 'y' are centered (zero mean). Fit the parameter vector * 'b' in the regression equation *

* y = b dot x + e = b_1 * x_1 + ... b_k * x_k + e *

* where 'e' represents the residuals (the part not explained by the model). * Use Least-Squares (minimizing the residuals) to solve for the parameter vector 'b' * using the regularized Normal Equations: *

* b = fac.solve (.) with regularization x.t * x + λ * I *

* Five factorization techniques are provided: *

* 'QR' // QR Factorization: slower, more stable (default) * 'Cholesky' // Cholesky Factorization: faster, less stable (reasonable choice) * 'SVD' // Singular Value Decomposition: slowest, most robust * 'LU' // LU Factorization: similar, but better than inverse * 'Inverse' // Inverse/Gaussian Elimination, classical textbook technique *

* @see statweb.stanford.edu/~tibs/ElemStatLearn/ * @param x the centered data/input m-by-n matrix NOT augmented with a first column of ones * @param y the centered response/output m-vector * @param fname_ the feature/variable names * @param hparam the shrinkage hyper-parameter, lambda (0 => OLS) in the penalty term 'lambda * b dot b' * @param technique the technique used to solve for b in (x.t*x + lambda*I)*b = x.t*y */ class RidgeRegression (x: MatriD, y: VectoD, fname_ : Strings = null, hparam: HyperParameter = RidgeRegression.hp, technique: RegTechnique = Cholesky) extends PredictorMat (x, y, fname_, hparam) { resetDF (x.dim2, x.dim1 - x.dim2 - 1) // no intercept => correct Degrees of Freedom (DoF) // as lambda get larger, need effective DoF private val DEBUG = true // debug flag private var lambda = if (hparam ("lambda") <= 0.0) findLambda._1 else hparam ("lambda") type Fac_QR = Fac_QR_H [MatriD] // change as needed //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Return the value of the shrinkage parameter 'lambda'. */ def lambda_ : Double = lambda //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Create a solver for the Modified Normal Equations using the selected * factorization technique. * @param xx the data/input matrix */ private def solver (xx: MatriD = x): Factorization = { val xtx = xx.t * xx // precompute X.t * X val ey = MatrixD.eye (xx.dim1, xx.dim2) // identity matrix val xtx_ = xtx.copy ().asInstanceOf [MatriD] // copy xtx (X.t * X) for (i <- xtx_.range1) xtx_(i, i) += lambda // add lambda to the diagonal technique match { // select the factorization technique case QR => val x_ = xx ++ (ey * sqrt (lambda)) new Fac_QR (x_) // , false) // QR Factorization case Cholesky => new Fac_Cholesky (xtx_) // Cholesky Factorization // case SVD => new SVD (xx) // Singular Value Decomposition - FIX case LU => new Fac_LU (xtx_) // LU Factorization case _ => new Fac_Inv (xtx_) // Inverse Factorization } // match } // solver //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Train the predictor by fitting the parameter vector (b-vector) in the * multiple regression equation *

* yy = b dot x + e = [b_1, ... b_k] dot [x_1, ... x_k] + e *

* using the least squares method. * @param x_ the data/input matrix * @param y_ the response/ouput vector */ def train (x_ : MatriD, y_ : VectoD): RidgeRegression = { val fac = solver (x_) // create selected factorization technique fac.factor () // factor the matrix, either X or X.t * X b = technique match { // solve for coefficient vector b case QR => fac.solve (y_ ++ new VectorD (y_.dim)) // FIX - give formula case Cholesky => fac.solve (x_.t * y_) // L * L.t * b = X.t * y // case SVD => fac.solve (y_) // FIX - give formula case LU => fac.solve (x_.t * y_) // b = (X.t * X) \ X.t * y case _ => fac.solve (x_.t * y_) // b = (X.t * X)^-1 * X.t * y } // match this } // train //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Find an optimal value for the shrinkage parameter 'λ' using Cross Validation * to minimize 'sse_cv'. The search starts with the low default value for 'λ' * doubles it with each iteration, returning the minimum 'λ' and it corresponding * cross-validated 'sse'. */ def findLambda: (Double, Double) = { var l = lambda var l_best = l var sse = Double.MaxValue for (i <- 0 to 20) { RidgeRegression.hp("lambda") = l val rrg = new RidgeRegression (x, y) val stats = rrg.crossValidate () val sse2 = stats(Fit.index_sse).mean banner (s"RidgeRegession with lambda = ${rrg.lambda_} has sse = $sse2") if (sse2 < sse) { sse = sse2; l_best = l } if (DEBUG) showQofStatTable (stats) l *= 2 } // for (l_best, sse) } // findLambda //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Find an optimal value for the shrinkage parameter 'λ' using Training to * minimize 'sse'. * FIX - try other QoF measures, e.g., sse_cv * @param xx the data/input matrix (full or test) * @param yy the response/output vector (full or test) */ def findLambda2 (xx: MatriD = x, yy: VectoD = y): Double = { def f_sse (λ: Double): Double = { lambda = λ train (xx, yy) e = yy - xx * b val sse = e dot e if (sse.isNaN) throw new ArithmeticException ("sse is NaN") if (DEBUG) println (s"findLambda2: for lambda = $λ, sse = $sse") sse } // f_sse val gs = new GoldenSectionLS (f_sse _) gs.search () } // findLambda2 //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Build a sub-model that is restricted to the given columns of the data matrix. * @param x_cols the columns that the new model is restricted to */ def buildModel (x_cols: MatriD): RidgeRegression = { new RidgeRegression (x_cols, y, null, hparam, technique) } // buildModel } // RidgeRegression class //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `RidgeRegression` companion object defines hyper-paramters and provides * factory functions for the `RidgeRegression` class. */ object RidgeRegression extends ModelFactory { /** Base hyper-parameter specification for `RidgeRegression` */ val hp = new HyperParameter; hp += ("lambda", 0.01, 0.01) // L2 regularization/shrinkage parameter val drp = (null, hp, Cholesky) // default remaining parameters //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Create a Ridge Regression from a combined data matrix. * @param xy the centered data/input m-by-n matrix, NOT augmented with a first column of ones * and the centered response m-vector (combined) * @param fname the feature/variable names * @param hparam the shrinkage hyper-parameter (0 => OLS) in the penalty term 'lambda * b dot b' * @param technique the technique used to solve for b in (x.t*x + lambda*I)*b = x.t*y */ def apply (xy: MatriD, fname: Strings = null, hparam: HyperParameter = hp, technique: RegTechnique = Cholesky): RidgeRegression = { val n = xy.dim2 if (n < 2) { flaw ("apply", s"dim2 = $n of the 'xy' matrix must be at least 2") null } else { val (x, y) = pullResponse (xy) val mu_x = x.mean // columnwise mean of x val mu_y = y.mean // mean of y val x_c = x - mu_x // centered x (columnwise) val y_c = y - mu_y // centered y new RidgeRegression (x_c, y_c, fname, hparam, technique) } // if } // apply //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Create a Ridge Regression from a data matrix and response vector. * @param x the centered data/input m-by-n matrix, NOT augmented with a first column of ones * @param y the centered repsonse/output vector * @param fname the feature/variable names * @param hparam the shrinkage hyper-parameter (0 => OLS) in the penalty term 'lambda * b dot b' * @param technique the technique used to solve for b in (x.t*x + lambda*I)*b = x.t*y */ def apply (x: MatriD, y: VectoD, fname: Strings, hparam: HyperParameter, technique: RegTechnique): RidgeRegression = { val hp2 = if (hparam == null) hp else hparam val n = x.dim2 if (n < 1) { flaw ("apply", s"dim2 = $n of the 'x' matrix must be at least 1") null } else { val mu_x = x.mean // columnwise mean of x val mu_y = y.mean // mean of y val x_c = x - mu_x // centered x (columnwise) val y_c = y - mu_y // centered y new RidgeRegression (x_c, y_c, fname, hp2, technique) } // if } // apply } // RidgeRegression object //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `RidgeRegressionTest` object tests `RidgeRegression` class using the following * regression equation. *

* y = b dot x = b_1*x_1 + b_2*x_2. *

* It compares `RidgeRegression` with `Regression` * @see http://statmaster.sdu.dk/courses/st111/module03/index.html * > runMain scalation.analytics.RidgeRegressionTest */ object RidgeRegressionTest extends App { // 5 data points: x_0 x_1 val x = new MatrixD ((5, 2), 36.0, 66.0, // 5-by-2 matrix data matrix 37.0, 68.0, 47.0, 64.0, 32.0, 53.0, 1.0, 101.0) val y = VectorD (745.0, 895.0, 442.0, 440.0, 1598.0) // 5-dim response vector println ("x = $x \ny = $y") banner ("Regression") val ox = VectorD.one (y.dim) +^: x // prepend a column of all 1's val rg = new Regression (ox, y) // create a Regression model println (rg.analyze ().report) // analyze and report banner ("RidgeRegression") val mu_x = x.mean // columnwise mean of x val mu_y = y.mean // mean of y val x_c = x - mu_x // centered x (columnwise) val y_c = y - mu_y // centered y val rrg = new RidgeRegression (x_c, y_c) // create a Ridge Regression model println (rrg.analyze ().report) // analyze and report banner ("Make Predictions") val z = VectorD (20.0, 80.0) // new instance to predict val _1z = VectorD.++ (1.0, z) // prepend 1 to z val z_c = z - mu_x // center z println (s"rg.predict ($z) = ${rg.predict (_1z)}") // predict using _1z println (s"rrg.predict ($z) = ${rrg.predict (z_c) + mu_y}") // predict using z_c and add y's mean banner ("Compare Summaries") println (rg.summary) println (rrg.summary) } // RidgeRegressionTest object //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `RidgeRegressionTest2` object tests `RidgeRegression` class using the following * regression equation. *

* y = b dot x = b_1*x1 + b_2*x_2. *

* Try non-default value for the 'lambda' hyper-parameter. * > runMain scalation.analytics.RidgeRegressionTest2 */ object RidgeRegressionTest2 extends App { import RidgeRegression.hp println (s"hp = $hp") val hp2 = hp.updateReturn ("lambda", 1.0) // try different values println (s"hp2 = $hp2") // 5 data points: x_0 x_1 val x = new MatrixD ((5, 2), 36.0, 66.0, // 5-by-2 matrix 37.0, 68.0, 47.0, 64.0, 32.0, 53.0, 1.0, 101.0) val y = VectorD (745.0, 895.0, 442.0, 440.0, 1598.0) val z = VectorD (20.0, 80.0) println ("x = " + x + "\ny = " + y + "\nz = " + z) // Compute centered (zero mean) versions of x and y val mu_x = x.mean // columnwise mean of x val mu_y = y.mean // mean of y val x_c = x - mu_x // centered x (columnwise) val y_c = y - mu_y // centered y println (s"x_c = $x_c \ny_c = $y_c") banner ("RidgeRegression") val rrg = new RidgeRegression (x_c, y_c, hparam = hp2) println (rrg.analyze ().report) println (rrg.summary) val z_c = z - mu_x // center z first val yp = rrg.predict (z_c) + mu_y // predict z_c and add y's mean println (s"predict ($z) = $yp") banner ("Optimize lambda") println (s"findLambda2 = ${rrg.findLambda2 ()}") } // RidgeRegressionTest2 object //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `RidgeRegressionTest3` object tests `RidgeRegression` class using the following * regression equation. *

* y = b dot x = b_1*x1 + b_2*x_2. *

* Test regression, forward selection and backward elimination. * > runMain scalation.analytics.RidgeRegressionTest3 */ object RidgeRegressionTest3 extends App { // 5 data points: x_0 x_1 val x = new MatrixD ((5, 2), 36.0, 66.0, // 5-by-2 matrix 37.0, 68.0, 47.0, 64.0, 32.0, 53.0, 1.0, 101.0) val y = VectorD (745.0, 895.0, 442.0, 440.0, 1598.0) val z = VectorD (20.0, 80.0) println ("x = " + x + "\ny = " + y + "\nz = " + z) // Compute centered (zero mean) versions of x and y val mu_x = x.mean // columnwise mean of x val mu_y = y.mean // mean of y val x_c = x - mu_x // centered x (columnwise) val y_c = y - mu_y // centered y println (s"x_c = $x_c \ny_c = $y_c") banner ("RidgeRegression") val rrg = new RidgeRegression (x_c, y_c) println (rrg.analyze ().report) println (rrg.summary) banner ("Forward Selection Test") rrg.forwardSelAll (cross = false) banner ("Backward Elimination Test") rrg.backwardElimAll (cross = false) } // RidgeRegressionTest3 object //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `RidgeRegressionTest4` object tests `RidgeRegression` class using the following * regression equation. *

* y = b dot x = b_1*x1 + b_2*x_2. *

* > runMain scalation.analytics.RidgeRegressionTest4 */ object RidgeRegressionTest4 extends App { // 4 data points: x_1 x_2 y val xy = new MatrixD ((4, 3), 1.0, 1.0, 6.0, // 4-by-3 matrix 1.0, 2.0, 8.0, 2.0, 1.0, 7.0, 2.0, 2.0, 9.0) val (x, y) = pullResponse (xy) // divides into data matrix and response vector val z = VectorD (2.0, 3.0) println ("x = " + x) println ("y = " + y) val rrg = RidgeRegression (xy) // factory function does centering println (rrg.analyze ().report) println (rrg.summary) val z_c = z - x.mean // first center z val yp = rrg.predict (z_c) + y.mean // predict z_c and add y's mean println (s"predict ($z) = $yp") } // RidgeRegressionTest4 object //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `RidgeRegressionTest5` object tests the multi-collinearity method in the * `RidgeRegression` class using the following regression equation. *

* y = b dot x = b_1*x_1 + b_2*x_2 + b_3*x_3 + b_4 * x_4 *

* @see online.stat.psu.edu/online/development/stat501/12multicollinearity/05multico_vif.html * @see online.stat.psu.edu/online/development/stat501/data/bloodpress.txt * > runMain scalation.analytics.RidgeRegressionTest5 */ object RidgeRegressionTest5 extends App { import ExampleBPressure._ var rrg = new RidgeRegression (x, y) // no intercept println (rrg.analyze ().report) println (rrg.summary) } // RidgeRegressionTest5 object //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `RidgeRegressionTest6` object tests the `RidgeRegression` class using the AutoMPG * dataset. It illustrates using the `Relation` class for reading the data * from a .csv file "auto-mpg.csv". Assumes no missing values. * It also combines feature selection with cross-validation and plots * R^2, R^2 bar and R^2 cv vs. the instance index. * > runMain scalation.analytics.RidgeRegressionTest6 */ object RidgeRegressionTest6 extends App { import scalation.columnar_db.Relation banner ("auto_mpg relation") val auto_tab = Relation (BASE_DIR + "auto-mpg.csv", "auto_mpg", null, -1) auto_tab.show () banner ("auto_mpg (x, y) dataset") val (x, y) = auto_tab.toMatriDD (1 to 6, 0) println (s"x = $x") println (s"y = $y") banner ("auto_mpg regression") val rrg = RidgeRegression (x, y, null, RidgeRegression.hp, Cholesky) println (rrg.analyze ().report) println (rrg.summary) val n = x.dim2 // number of variables banner ("Forward Selection Test") val (cols, rSq) = rrg.forwardSelAll () // R^2, R^2 bar, R^2 cv val k = cols.size val t = VectorD.range (1, k) // instance index new PlotM (t, rSq.t, Array ("R^2", "R^2 bar", "R^2 cv"), "R^2 vs n for RidgeRegression", lines = true) println (s"rSq = $rSq") } // RidgeRegressionTest6 object //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `RidgeRegressionTest7` object tests the `RidgeRegression` class using the AutoMPG * dataset. It illustrates using the `Relation` class for reading the data * from a .csv file "auto-mpg.csv". Assumes no missing values. * It also uses the 'findLambda' method to search for a shrinkage parameter * that roughly mininizes 'sse_cv'. * > runMain scalation.analytics.RidgeRegressionTest7 */ object RidgeRegressionTest7 extends App { import scalation.columnar_db.Relation banner ("auto_mpg relation") val auto_tab = Relation (BASE_DIR + "auto-mpg.csv", "auto_mpg", null, -1) auto_tab.show () banner ("auto_mpg (x, y) dataset") val (x, y) = auto_tab.toMatriDD (1 to 6, 0) banner ("auto_mpg Ridge Regression") val rrg = RidgeRegression (x, y, null, RidgeRegression.hp, Cholesky) println (rrg.analyze ().report) println (rrg.summary) println (s"best (lambda, sse) = ${rrg.findLambda}") } // RidgeRegressionTest7 object