//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** @author John Miller, Michael Cotterell * @version 1.6 * @date Wed Feb 20 17:39:57 EST 2013 * @see LICENSE (MIT style license file). * * @title Model: Weighted Least Squares Regression */ package scalation.analytics import scala.math.{abs, log, pow, sqrt} import scalation.linalgebra._ import scalation.math.noDouble import scalation.plot.Plot import scalation.random.CDF.studentTCDF import scalation.stat.Statistic import scalation.util.{banner, time} import scalation.util.Unicode.sub import RegTechnique._ import Regression_WLS._ //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `Regression_WLS` class supports weighted multiple linear regression. * In this case, 'xx' is multi-dimensional [1, xx_1, ... xx_k]. * Weights are set to the inverse of a variable's variance, so they can compensate * for such variability (heteroscedasticity). Fit the parameter vector 'b' in * the regression equation *

* yy = b dot xx + e = b_0 + b_1 * xx_1 + ... b_k * xx_k + e *

* where 'e' represents the residuals (the part not explained by the model). * Use Weighted Least-Squares (minimizing the residuals) to fit the parameter vector *

* b = fac.solve (.) *

* The data matrix 'xx' is reweighted 'x = rootW * xx' and the response vector 'yy' * is reweighted 'y = rootW * yy' where 'rootW' is the square root of the weights. * @see en.wikipedia.org/wiki/Least_squares#Weighted_least_squares * These are then passed to Ordinary Least Squares (OLS) Regression. * Five factorization techniques are provided: *

* 'QR' // QR Factorization: slower, more stable (default) * 'Cholesky' // Cholesky Factorization: faster, less stable (reasonable choice) * 'SVD' // Singular Value Decomposition: slowest, most robust * 'LU' // LU Factorization: better than Inverse * 'Inverse' // Inverse/Gaussian Elimination, classical textbook technique *

* @see www.markirwin.net/stat149/Lecture/Lecture3.pdf * @param xx the data/input m-by-n matrix (augment with a first column of ones to include intercept in model) * @param yy the response/output m vector * @param fname_ the feature/variable names * @param technique the technique used to solve for b in x.t*w*x*b = x.t*w*y * @param w the weight vector (if null, computed in companion object) */ class Regression_WLS (xx: MatriD, yy: VectoD, fname_ : Strings = null, technique: RegTechnique = QR, private var w: VectoD = null) extends Regression ({ setWeights (xx, yy, technique, w); reweightX (xx, w) }, reweightY (yy, w), fname_, null, technique) // extends Regression ({ setWeights0 (xx, yy, technique, w); reweightX (xx, w) }, // reweightY (yy, w), fname_, null, technique) { if (w == null) w = Regression_WLS.weights //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Return the weight vector. */ def weights: VectoD = w //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Compute diagostics for the regression model. Must override due to weights. * @param e the m-dimensional error/residual vector (yy - yp) * @param yy the actual response vector to use (test/full) * @param yp the predicted response vector (test/full), (defaults to null) * @param w the weights on the instances (defaults to null) * @param ym the mean of the actual response vector to use (train/full) */ override def diagnose (e: VectoD, yy: VectoD, yp: VectoD, w_ : VectoD, ym: Double = noDouble): Unit = { val yp = xx * b super.diagnose (e, yy, yp, w, ym) } // diagnose } // Regression_WLS class //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `Regression_WLS` companion object provides methods for setting weights * and testing. */ object Regression_WLS { private var w: VectoD = null // weights for current model private var rootW: VectoD = null // square root of weights //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Return the weight vector for the current model. */ def weights: VectoD = w //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Estimate weights for the variables according to the reciprocal actual * rad's. Save the weight vector 'w' and root weight vector 'rootW' * for the current model in companion object variables. * @see 'setWeights' that used predicted rad * @param x the input/data m-by-n matrix * @param y the response vector * @param technique the technique used to solve for b in x.t*w*x*b = x.t*w*y * @param w the weight vector (if null, computed it) */ def setWeights0 (x: MatriD, y: VectoD, technique: RegTechnique = QR, w0: VectoD = null): Unit = { if (w0 == null) { val n = x.dim2 val ols_y = new Regression (x, y, null, null, technique) // run OLS on data ols_y.train ().eval () val e = ols_y.residual // deviations/errors val r = e.map ((a: Double) => sqrt (abs (a))) // root absolute deviations (rad's) // w = r.recip * n // set weight vector for WLS to reciprocal of r w = r.recip // set weight vector for WLS to reciprocal of r } else { w = w0 // set weights using custom values } // if rootW = w.map (sqrt _) // set root of weight vector } // setWeights0 //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Estimate weights for the variables according to the reciprocal predicted * rad's. Save the weight vector 'w' and root weight vector 'rootW' * for the current model in companion object variables. * @param x the input/data m-by-n matrix * @param y the response vector * @param technique the technique used to solve for b in x.t*w*x*b = x.t*w*y * @param w the weight vector (if null, computed it) */ def setWeights (x: MatriD, y: VectoD, technique: RegTechnique = QR, w0: VectoD = null): Unit = { if (w0 == null) { val n = x.dim2 val ols_y = new Regression (x, y, null, null, technique) // run OLS on data ols_y.train ().eval () val e = ols_y.residual // deviations/errors val r = e.map ((a: Double) => sqrt (abs (a))) // root absolute deviations (rad's) val ols_r = new Regression (x, r, null, null, technique) // run OLS on rad ols_r.train ().eval () val rp = ols_r.predict (x) // predicted rad // w = rp.recip * n // set weight vector for WLS to reciprocal of rp w = rp.recip // set weight vector for WLS to reciprocal of rp } else { w = w0 // set weights using custom values } // if rootW = w.map (sqrt _) // set root of weight vector } // setWeights //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Reweight the data matrix 'x' by multiplying by the root weight 'rtW'. * @param x the input/data m-by-n matrix * @param rW the root weight vector (rtW: either rootW or rW) */ def reweightX (x: MatriD, rW: VectoD): MatriD = { val rtW = if (rW == null) rootW else rW // root weight vector rtW **: x // vector (as if diagonal matrix) * matrix (right associative) } // reweightX //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Reweight the response vector matrix 'y' by multiplying by the root weight 'rtW'. * @param y the response vector * @param rW the root weight vector (rtW: either rootW or rW) */ def reweightY (y: VectoD, rW: VectoD): VectoD = { val rtW = if (rW == null) rootW else rW // root weight vector rtW * y // vector * vector (element by element) } // reweightY //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Test various regression techniques. * @param x the data matrix * @param y the response vector * @param z a vector to predict * @param w the root weights */ def test (x: MatriD, y: VectoD, z: VectoD, w: VectoD = null): Unit = { for (tec <- techniques) { // use 'tec' Factorization banner (s"Fit the parameter vector b using $tec") val rg = new Regression_WLS (x, y, null, tec) rg.train ().eval () println ("w = " + rg.weights) println ("e = " + rg.residual) println (rg.report) println (rg.summary) val yp = rg.predict (z) // predict y for one point println ("predict (" + z + ") = " + yp) val yyp = rg.predict (x) // predict y for several points println ("predict (" + x + ") = " + yyp) // new Plot (y, yyp, null, tec.toString) } // for } // test } // Regression_WLS object //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `Regression_WLSTest` object tests `Regression_WLS` class using the following * regression equation. *

* y = b dot x = b_0 + b_1*x_1 + b_2*x_2. *

* @see statmaster.sdu.dk/courses/st111/module03/index.html * > runMain scalation.analytics.Regression_WLSTest */ object Regression_WLSTest extends App { // 5 data points: constant term, x_1 coordinate, x_2 coordinate val x = new MatrixD ((5, 3), 1.0, 36.0, 66.0, // 5-by-3 matrix 1.0, 37.0, 68.0, 1.0, 47.0, 64.0, 1.0, 32.0, 53.0, 1.0, 1.0, 101.0) val y = VectorD (745.0, 895.0, 442.0, 440.0, 1598.0) val z = VectorD (1.0, 20.0, 80.0) // println ("model: y = b_0 + b_1*x_1 + b_2*x_2") println ("model: y = b₀ + b₁*x₁ + b₂*x₂") println ("x = " + x) println ("y = " + y) banner ("weights set internally") test (x, y, z) // weights set internally // val w0 = VectorD (0.106085, 0.0997944, 0.0831033, 0.160486, 0.171810) val w0 = VectorD (0.318254, 0.299383, 0.249310, 0.481457, 0.515431) banner ("custom weights") test (x, y, z, w0) // custom weights explicitly given } // Regression_WLSTest object