//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** @author John Miller * @version 1.3 * @date Mon Jan 5 14:00:12 EST 2015 * @see LICENSE (MIT style license file). */ package scalation.analytics.par import scalation.linalgebra.VectorI import scalation.linalgebra.par.{MatrixD, VectorD} import scalation.linalgebra.par.VectorD.one import scalation.math.FunctionS2S import scalation.analytics.RegTechnique._ //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** A General Linear Model 'GLM' can be developed using the `GLM` trait and object * (see below). The implementation currently supports univariate models with * multivariate models (where each response is a vector) planned for the future. * This version uses parallel processing to speed up execution. * It provides factory methods for the following special types of GLMs: * `Regression` - multiple linear regression, * `RidgeRegression` - robust multiple linear regression, * `TranRegression` - transformed (e.g., log) multiple linear regression, * `PolyRegression` - polynomial regression, * `TrigRegression` - trigonometric regression * `ResponseSurface` - response surface regression, * `ANCOVA` - GLM form of ANalysis of COVAriance. * The following special types are excluded since they do not utilize large matrices. * `SimpleRegression` - simple linear regression, * `ANOVA` - GLM form of ANalysis Of VAriance, */ trait GLM { protected var add_1 = true // by default, prepend a column of all ones to the design matrix protected var technique = QR // by default, use QR Factorization for // the regression technique used to solve for b in x.t*x*b = x.t*y //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Explicitly set the add_1 flag (column of all ones corresponding to b_0). * @param _add_1 the value to set the add_1 flag to */ def setAdd_1 (_add_1: Boolean = true) { add_1 = _add_1 } //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Explicitly set the regression technique to use. * @param _technique the value to set technique to */ def setTechnique (_technique: RegTechnique = QR) { technique = _technique } //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Build a Multiple Linear Regression model. * @param x the input/design m-by-n matrix * @param y the response m-vector */ def apply (x: MatrixD, y: VectorD): Regression = { if (add_1) new Regression (one (x.dim1) +^: x, y, technique) else new Regression (x, y, technique) } // apply //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Build a Multiple Linear Regression model. * @param xy the combined input/design m-by-n matrix and response m-vector */ def apply (xy: MatrixD): Regression = { if (add_1) new Regression (one (xy.dim1) +^: xy.sliceCol (0, xy.dim2-1), xy.col (xy.dim2-1), technique) else new Regression (xy.sliceCol (0, xy.dim2-1), xy.col (xy.dim2-1), technique) } // apply //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Build a Multiple Linear Robust Regression model. * @param x the centered input/design m-by-n matrix NOT augmented with a first column of ones * @param y the centered response vector * @param lambda the shrinkage parameter (0 => OLS) in the penalty term 'lambda * b dot b' */ def apply (x: MatrixD, y: VectorD, lambda: Double): RidgeRegression = { new RidgeRegression (x, y, lambda, technique) } // apply //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Build a Multiple Linear Robust Regression model. * @param xY the combined centered input/design m-by-n matrix and response vector * @param lambda the shrinkage parameter (0 => OLS) in the penalty term 'lambda * b dot b' */ def apply (xy: MatrixD, lambda: Double): RidgeRegression = { new RidgeRegression (xy.sliceCol (0, xy.dim2-1), xy.col (xy.dim2-1), lambda, technique) } // apply //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Build a Transformed Multiple Linear Regression model. * @param x the input/design m-by-n matrix * @param y the response m-vector * @param transform the transformation function (e.g., log) */ def apply (x: MatrixD, y: VectorD, transform: FunctionS2S): TranRegression = { if (add_1) new TranRegression (one (x.dim1) +^: x, y, transform, technique) else new TranRegression (x, y, transform, technique) } // apply //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Build a Transformed Multiple Linear Regression model. * @param xy the combined input/design m-by-n matrix and response m-vector * @param transform the transformation function */ def apply (xy: MatrixD, transform: FunctionS2S): TranRegression = { if (add_1) new TranRegression (one (xy.dim1) +^: xy.sliceCol (0, xy.dim2-1), xy.col (xy.dim2-1), transform, technique) else new TranRegression (xy.sliceCol (0, xy.dim2-1), xy.col (xy.dim2-1), transform, technique) } // apply //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Build a Polynomial Regression model. It makes a matrix using 'expand'. * @param t the input vector: t_i expands to x_i = [1, t_i, t_i^2, ... t_i^k] * @param y the response vector * @param k the order of the polynomial */ def apply (t: VectorD, y: VectorD, k: Int): PolyRegression = { new PolyRegression (t, y, k, technique) } // apply //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Build a Polynomial Regression model. * @param ty the combined input vector and response vector * @param k the order of the polynomial */ def apply (ty: MatrixD, k: Int): PolyRegression = { new PolyRegression (ty.col (0), ty.col (1), k, technique) } // apply //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Build a Trigonometric Regression model. It makes a matrix using 'expand'. * @param t the input vector: t_i expands to x_i * @param y the response vector * @param k the maximum multiplier in the trig function 'kwt' * @param w the base displacement angle in radians */ def apply (t: VectorD, y: VectorD, k: Int, w: Double): TrigRegression = { new TrigRegression (t, y, k, w, technique) } // apply //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Build a Trigonometric Regression model. * @param ty the combined input vector and response vector * @param k the maximum multiplier in the trig function 'kwt' * @param w the base displacement angle in radians */ def apply (ty: MatrixD, k: Int, w: Double): TrigRegression = { new TrigRegression (ty.col (0), ty.col (1), k, w, technique) } // apply //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Build a Response Surface model. * @param x_ the input vectors/points * @param y the response vector * @param cubic the order of the surface (false for quadratic, true for cubic) */ def apply (x_ : MatrixD, y: VectorD, cubic: Boolean): ResponseSurface = { new ResponseSurface (x_, y, cubic) } // apply //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Build an ANalysis of COVAriance (ANCOVA) model. * @param x_ the data/design matrix of continuous variables * @param t the treatment/categorical variable vector * @param y the response vector * @param levels the number of treatment levels (1, ... levels) */ def apply (x_ : MatrixD, t: VectorI, y: VectorD, levels: Int): ANCOVA = { new ANCOVA (x_, t, y, levels, technique) } // apply } // GLM trait //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `GLM` object makes the `GLM` trait's methods directly available. * This approach (using traits and objects) allows the methods to also be inherited. */ object GLM extends GLM //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `GLMTest` object tests the `GLM` object using the following regression * equation. *

* y = b dot x = b_0 + b_1*x_1 + b_2*x_2 + b_3*d_1 + b_4*d_2 *

*/ object GLMTest extends App { // 5 data points: constant term, x_1 coordinate, x_2 coordinate val x = new MatrixD ((5, 3), 1.0, 36.0, 66.0, // 5-by-3 matrix 1.0, 37.0, 68.0, 1.0, 47.0, 64.0, 1.0, 32.0, 53.0, 1.0, 1.0, 101.0) val t = VectorI (1, 1, 2, 2, 3) // treatements levels val y = VectorD (745.0, 895.0, 442.0, 440.0, 1598.0) // response vector val z = VectorD (1.0, 20.0, 80.0, 1.0) println ("x = " + x) println ("t = " + t) println ("y = " + y) val levels = 3 /* val glm = GLM ( /* TBD */ ) glm.train () println ("fit = " + glm.fit) val yp = glm.predict (z) println ("predict (" + z + ") = " + yp) */ } // GLMTest object