//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** @author John Miller * @version 1.3 * @date Tue Apr 18 14:24:14 EDT 2017 * @see LICENSE (MIT style license file). * * This version optimizes λ */ // U N D E R D E V E L O P M E N T package scalation.analytics import scala.collection.immutable.ListMap import scala.math.{abs, log, sqrt} import scalation.linalgebra._ import scalation.minima._ import scalation.plot.Plot import scalation.random.PermutedVecI import scalation.random.RNGStream.ranStream import scalation.util.{Error} import scalation.util.Unicode.sub //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `LassoRegression2` class supports multiple linear regression. In this case, * 'x' is multi-dimensional [1, x_1, ... x_k]. Fit the parameter vector 'b' in * the regression equation *

* y = b dot x + e = b_0 + b_1 * x_1 + ... b_k * x_k + e *

* where 'e' represents the residuals (the part not explained by the model). * Use Least-Squares (minimizing the residuals) to fit the parameter vector *

* b = x_pinv * y [ alternative: b = solve (y) ] *

* where 'x_pinv' is the pseudo-inverse. * @see see.stanford.edu/materials/lsoeldsee263/05-ls.pdf * @param x the input/design m-by-n matrix augmented with a first column of ones * @param y the response vector * @param λ0 the initial vale for the regularization weight */ class LassoRegression2 [MatT <: MatriD, VecT <: VectoD](x: MatT, y: VecT, λ0: Double = -1.0) extends Predictor with Error { if (y != null && x.dim1 != y.dim) flaw("constructor", "dimensions of x and y are incompatible") // if (x.dim1 <= x.dim2) flaw ("constructor", "not enough data rows in matrix to use regression") private val DEBUG = false // debug flag private val k = x.dim2 - 1 // number of variables (k = n-1) private val m = x.dim1.toDouble // number of data points (rows) private val n = x.dim1.toDouble // number of data points (rows) private val df = (m - k - 1).toInt // degrees of freedom private val r_df = (m - 1.0) / df // ratio of degrees of freedom private var rBarSq = -1.0 // adjusted R-squared private var fStat = -1.0 // F statistic (quality of fit) private var aic = -1.0 // Akaike Information Criterion (AIC) private var bic = -1.0 // Bayesian Information Criterion (BIC) private var stdErr: VectoD = null // standard error of coefficients for each x_j private var t: VectoD = null // t statistics for each x_j private var p: VectoD = null // p values for each x_j private var cvSSE: Double = -1.0 // cross validation sse private var cvRMSE: Double = -1.0 // cross validation rmse e = new VectorD (x.dim1) // initialize the residual vector private val useCV = true // flag for using Cross-Validation to compute SSE in the private var λ = -1.0 // shrinkage parameter private val nx = 10 // Number of folds for cross validation private var foldIndices: Array [VectoI] = null private val xtxFolds = Array.ofDim [MatriD](nx) private val xtyFolds = Array.ofDim [VectoD](nx) //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Create Training and Testing folds for cross validation * (x.t * x + ρI).inv and x.t * y are computed for each fold and cached * for performance. */ def createTrainFolds () = { val permutedVec = PermutedVecI (VectorI.range (0, x.dim1), ranStream) val randOrder = permutedVec.igen foldIndices = randOrder.split (nx) // for (l <- (0 until nx).par) { // parallel version for (l <- (0 until nx)) { // l-th fold val v = x.asInstanceOf [MatrixD] () // get array inside of matrix x val trainIndices = (x.range1 diff foldIndices(l)()).toArray val v2 = Array.ofDim [Array [Double]](x.dim1 - foldIndices(l).dim) for (i <- trainIndices.indices) v2(i) = v(trainIndices(i)) val x_l = new MatrixD (v2) val y_l = y.select (trainIndices) val xt = x_l.t val xtx_ρI = xt * x_l for (i <- xtx_ρI.range1) xtx_ρI(i, i) += LassoAdmm.ρ // xtx_ρI xtxFolds(l) = xtx_ρI.inverse // inverse of xtx_ρI xtyFolds(l) = xt * y_l } // for } // createTrainFolds //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Train the predictor by fitting the parameter vector (b-vector) in the * multiple regression equation for the response passed into the class 'y'. */ // def train () { train (y) } //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Given a set of data vectors 'x's and their corresponding responses 'yy's, * train the prediction function 'yy = f(x)' by fitting its parameters. * The 'x' values must be provided by the implementing class. * @param yy the response vector */ override def train (yy: VectoD = y): Unit = { λ = if (λ0 < 0) gcv (yy) else λ0 crossValidateRand (λ) train (x, yy, λ) LassoAdmm.reset e = yy - x * b // residual/error vector diagnose(yy) } // train def eval (yy: VectorD) { /* FIX - to be implemented */ } //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Train the predictor by fitting the parameter vector (b-vector) in the * multiple regression equation *

* y = b dot x + e = [b_0, ... b_k] dot [1, x_1 , ... x_k] + e *

* regularized by the sum of magnitudes of the coefficients. * @see pdfs.semanticscholar.org/969f/077a3a56105a926a3b0c67077a57f3da3ddf.pdf * @see `scalation.minima.LassoAdmm` * @param yy the response vector */ private def train (xx: MatriD, yy: VectoD, λ: Double): Double = { b = LassoAdmm.solve (xx.asInstanceOf [MatrixD], yy, λ) val e = yy - xx * b e dot e } // train //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Given a set of data vectors 'x's and their corresponding responses 'y's, * passed into the implementing class, train the prediction function 'y = f(x)' * by fitting its parameters skipping the testing region. * @param k the k-thi testing fold * @param λ the parameter for the L1 Penalty */ private def train (k: Int, λ: Double): VectoD = { LassoAdmm.solveCached (xtxFolds(k), xtyFolds(k), λ) } // train //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** */ private def test (itest: VectoI, b: VectoD) = { for (i <- itest) e(i) = y(i) - predict (x(i), b) } // test //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Use Generalize Cross-Validation to find an optimal value for the shrinkage * parameter * @param yy the response vector to work with */ private def gcv (yy: VectoD): Double = { if (useCV) createTrainFolds () // initialize folds for cross validation def f_sse (λ: Double): Double = { val sse = if (useCV) crossValidateRand (λ) else train (x, yy, λ) if (sse.isNaN) throw new ArithmeticException ("sse is NaN") sse } // f_sse val gs = new GoldenSectionLS (f_sse _) gs.search () } // gcv //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Test the accuracy of the predicted results by cross-validation, returning * the sse. * @param nx number of crosses and cross-validations (defaults to 10x). */ def crossValidateRand (λ: Double, nx: Int = 10): Double = { // for (l <- (0 until nx).par) { // parallel version for (l <- 0 until nx) { val b = train (l, λ) test (foldIndices(l), b) } // for // diagnose (y) cvSSE = e dot e cvRMSE = sqrt (cvSSE / e.dim) cvSSE } // crossValidateRand //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Compute diagostics for the regression model. * FIX - correct the calculations * @param yy the response vector */ protected override def diagnose (yy: VectoD) { super.diagnose (yy) rBarSq = 1.0 - (1.0 - rSq) * r_df // R-bar-squared (adjusted R-squared) fStat = ((sst - sse) * df) / (sse * k) // F statistic (msr / mse) aic = m * log(sse) - m * log(m) + 2.0 * (k + 1) // Akaike Information Criterion (AIC) bic = aic + (k + 1) * (log(m) - 2.0) // Bayesian Information Criterion (BIC) // val facCho = new Fac_Cholesky (x.t * x) // create a Cholesky factorization // val l_inv = facCho.factor1 ().inverse // take inverse of l from Cholesky factorization // val varEst = sse / df // variance estimate // val varCov = l_inv.t * l_inv * varEst // variance-covariance matrix // // stdErr = varCov.getDiag ().map (sqrt (_)) // standard error of coefficients // t = b / stdErr // Student's T statistic // p = t.map ((x: Double) => 2.0 * studentTCDF (-abs (x), df)) // p values } // diagnose //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Return the quality of fit. */ override def fit: VectoD = super.fit.asInstanceOf [VectorD] ++ VectorD (rBarSq, fStat, aic, bic) //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Return the labels for the fit. */ override def fitLabels: Seq [String] = super.fitLabels ++ Seq ("rBarSq", "fStat", "aic", "bic") //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Predict the value of y = f(z) by evaluating the formula y = b dot z, * e.g., (b_0, b_1, b_2) dot (1, z_1, z_2). * @param z the new vector to predict */ def predict (z: VectoD): Double = predict (z, b) //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Predict the value of y = f(z) by evaluating the formula below. * @param z the new vector to predict * @param b the coefficient vector */ def predict (z: VectoD, b: VectoD): Double = b dot z //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Predict the value of y = f(z) by evaluating the formula y = b dot z for * each row of matrix z. * @param z the new matrix to predict */ def predict (z: MatT): VectoD = z * b //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Print results and diagnostics for each predictor 'x_j' and the overall * quality of fit. */ def report () { println ("Coefficients:") println (" | Estimate | StdErr | t value | Pr(>|t|)") for (j <- 0 until b.dim) { println ("%7s | %10.6f | %10.6f | %8.4f | %9.5f".format("x" + sub(j), b(j), stdErr(j), t(j), p(j))) } // for println () println ("SSE: %.4f".format(sse)) println ("CV-SSE: %.4f".format(cvSSE)) println ("Residual stdErr: %.4f on %d degrees of freedom".format(sqrt(sse / df), k)) println ("R-Squared: %.4f, Adjusted rSquared: %.4f".format(rSq, rBarSq)) println ("F-Statistic: %.4f on %d and %d DF".format(fStat, k, df)) println ("AIC: %.4f".format(aic)) println ("BIC: %.4f".format(bic)) println ("λ: %.10f".format(λ)) println ("-" * 80) } // report //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** */ override def metrics: Map[String, Any] = { ListMap ("Degrees of Freedom" -> (df), "Effective DF" -> (m - b.count(abs(_) < 1E-6) - 1), "Residual stdErr" -> "%.4f".format(sqrt(sse / (df))), "R-Squared" -> "%.4f".format(rSq), "Adjusted R-Squared" -> "%.4f".format(rBarSq), // "Predicted R-Squared" -> "%.4f".format(rSquaredPred), "F-Statistic" -> "%.4f".format(fStat), "SSE" -> "%.4f".format(sse), "RMSE" -> "%.4f".format(rmse), // "CVE" -> "%.4f".format(cve), // "PRESS" -> "%.4f".format(press), // "MAE" -> "%.4f".format(mae), "AIC" -> "%.4f".format(aic), "BIC" -> "%.4f".format(bic), "CV RRSE" -> "%.4f".format(cvRMSE / sqrt(((y - y.mean) ~^ 2).mean)), "CV RMSE" -> "%.4f".format(cvRMSE), "CV R-Squared" -> "%.4f".format(rSq), "λ" -> "%.4f".format(λ)) } // metrics } // LassoRegression class //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `LassoRegression2Test` object tests `LassoRegression2` class using the following * regression equation. *

* y = b dot x = b_0 + b_1*x_1 + b_2*x_2. *

* Test regression and backward elimination. * @see statmaster.sdu.dk/courses/st111/module03/index.html * > runMain scalation.analytics.LassoRegressionTest */ object LassoRegression2Test extends App { // 5 data points: constant term, x_1 coordinate, x_2 coordinate val x = new MatrixD ((5, 3), 1.0, 36.0, 66.0, // 5-by-3 matrix 1.0, 37.0, 68.0, 1.0, 47.0, 64.0, 1.0, 32.0, 53.0, 1.0, 1.0, 101.0) val y = VectorD (745.0, 895.0, 442.0, 440.0, 1598.0) val z = VectorD (1.0, 20.0, 80.0) // println ("model: y = b_0 + b_1*x_1 + b_2*x_2") println ("model: y = b₀ + b₁*x₁ + b₂*x₂") println ("x = " + x) println ("y = " + y) val rg = new LassoRegression (x, y) rg.train () println ("b = " + rg.coefficient) rg.report () println ("fit = " + rg.fit) val yp = rg.predict (z) // predict y for one point println ("predict (" + z + ") = " + yp) val yyp = rg.predict (x) // predict y for several points println ("predict (" + x + ") = " + yyp) new Plot (x.col(1), y, yyp) new Plot (x.col(2), y, yyp) } // LassoRegression2Test object //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `LassoRegression2Test2` object tests `LassoRegression2` class using the following * regression equation. *

* y = b dot x = b_0 + b_1*x_1 + b_2*x_2. *

* Test regression and backward elimination. * @see statmaster.sdu.dk/courses/st111/module03/index.html * > runMain scalation.analytics.LassoRegression2Test2 */ object LassoRegression2Test2 extends App { // 5 data points: constant term, x_1 coordinate, x_2 coordinate val x = new MatrixD ((5, 3), 1.0, 36.0, 66.0, // 5-by-3 matrix 1.0, 37.0, 68.0, 1.0, 47.0, 64.0, 1.0, 32.0, 53.0, 1.0, 1.0, 101.0) val y = VectorD (745.0, 895.0, 442.0, 440.0, 1598.0) val z = VectorD (1.0, 20.0, 80.0) // println ("model: y = b_0 + b_1*x_1 + b_2*x_2") println ("model: y = b₀ + b₁*x₁ + b₂*x₂") println ("x = " + x) println ("y = " + y) val rg = new LassoRegression (x, y, -1) // rg.train () println ("b = " + rg.coefficient) rg.report () println ("fit = " + rg.fit) val yp = rg.predict (z) // predict y for one point println ("predict (" + z + ") = " + yp) val yyp = rg.predict (x) // predict y for several points println ("predict (" + x + ") = " + yyp) new Plot (x.col(1), y, yyp) new Plot (x.col(2), y, yyp) } // LassoRegression2Test2 object