//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** @author John Miller * @version 1.6 * @date Wed Feb 20 17:39:57 EST 2013 * @see LICENSE (MIT style license file). * * @title Model: Polynomial Regression (for one variable) * * For (low order) polynomial regression using multiple variables: * @see `QuadRegression`, QuadXRegression`, `CubicRegesssion`, `CubicXRegression` * @see * @see https://en.wikipedia.org/wiki/Legendre_polynomials */ package scalation.analytics import scalation.linalgebra.{MatriD, MatrixD, VectoD, VectorD} import scalation.math.double_exp import scalation.plot.Plot import scalation.util.banner import Fit.showQofStatTable import MatrixTransform._ import RegTechnique._ //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `PolyORegression` class supports orthogonal polynomial regression. * In this case, 't' is expanded to an orthononalization of '[1, t, t^2 ... t^k]'. * Fit the parameter vector 'b' in the regression equation *

* y = b dot x + e = b_0 + b_1 * t + b_2 * t^2 ... b_k * t^k + e *

* where 'e' represents the residuals (the part not explained by the model). * Use Least-Squares (minimizing the residuals) to solve for the parameter vector 'b' * using the Normal Equations: *

* x.t * x * b = x.t * y * b = fac.solve (.) *

* @see www.ams.sunysb.edu/~zhu/ams57213/Team3.pptx * @param t the initial data/input m-by-1 matrix: t_i expands to x_i = [1, t_i, t_i^2, ... t_i^k] * @param y the response/ouput vector * @param ord the order (k) of the polynomial (max degree) * @param fname_ the feature/variable names * @param hparam the hyper-parameters * @param technique the technique used to solve for b in x.t*x*b = x.t*y */ class PolyORegression (t: MatriD, y: VectoD, ord: Int, fname_ : Strings = null, hparam: HyperParameter = null, technique: RegTechnique = Cholesky) extends Regression (PolyORegression.allForms (t, ord), y, fname_, hparam, technique) with ExpandableForms { private val DEBUG = false // debug flag private val n0 = 1 // number of terms/columns originally private val nt = PolyORegression.numTerms (ord) // number of terms/columns after expansion private val a = PolyORegression.getA // get the multipliers for orthogonal polynomials //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Expand the vector 'z' into a vector of that includes additional terms, * i.e., add polynomial terms. * @param z the un-expanded vector */ def expand (z: VectoD): VectoD = PolyORegression.forms (z, n0, nt) //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Follow the same transformations used to orthogonalize the data/input matrix 'x', * on vector 'v', so its elements are correctly mapped. * @param v the vector to be transformed based the orthogonalize procedure */ def orthoVector (v: VectoD): VectoD = { val u = new VectorD (v.dim) u(0) = v(0) for (j <- 1 until v.dim) { // element to set u(j) = v(j) for (k <- 0 until j) u(j) -= u(k) * a(j, k) // apply orthogonal multiplier } // for if (DEBUG) println (s"v = $v \nu = $u") u } // orthoVector //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Given the scalar 'z', expand it and predict the response value. * @param z the un-expanded scalar */ def predict (z: Double): Double = predict_ex (VectorD (z)) //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Given the vector 'z', expand it and predict the response value. * @param z the un-expanded vector */ def predict_ex (z: VectoD): Double = predict (orthoVector (expand (z))) } // PolyORegression class //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `PolyORegression` companion object provides factory functions and functions * for creating functional forms. */ object PolyORegression extends ModelFactory { val drp = (null, null, Cholesky) // default remaining parameters private val DEBUG = false // debug flag private var a: MatriD = null // multipliers for orthogonal polynomials //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Get the multipliers for orthogonal polynomials, matrix 'a'. * FIX - collecting the 'a' matrix this way may fail for parallel processing */ def getA: MatriD = a //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Create a `PolyORegression` object from a combined data-response matrix. * @param xy the initial combined data-response matrix (before polynomial term expansion) * @param ord the order (k) of the polynomial (max degree) * @param fname_ the feature/variable names * @param hparam the hyper-parameters * @param technique the technique used to solve for b in x.t*x*b = x.t*y */ def apply (xy: MatriD, ord: Int, fname: Strings = null, hparam: HyperParameter = null, technique: RegTechnique = Cholesky): PolyORegression = { val n = xy.dim2 if (n < 2) { flaw ("apply", s"dim2 = $n of the 'xy' matrix must be at least 2") null } else { val (x, y) = pullResponse (xy) new PolyORegression (x, y, ord, fname, hparam, technique) } // if } // apply //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Create a `PolyORegression` object from a combined data-response matrix. * @param t the initial data/input vector: t_i expands to x_i = [1, t_i, t_i^2, ... t_i^k] * @param y the response/ouput vector * @param ord the order (k) of the polynomial (max degree) * @param fname_ the feature/variable names * @param hparam the hyper-parameters * @param technique the technique used to solve for b in x.t*x*b = x.t*y */ def apply (t: VectoD, y: VectoD, ord: Int, fname: Strings, hparam: HyperParameter, technique: RegTechnique): PolyORegression = { new PolyORegression (MatrixD (Seq (t)), y, ord, fname, hparam, technique) } // apply //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Create a `PolyORegression` object from a data matrix and a response vector. * This factory function provides data rescaling. * @see `ModelFactory` * @param x the initial data/input matrix (before polynomial term expansion) * @param y the response/output m-vector * @param ord the order (k) of the polynomial (max degree) * @param fname the feature/variable names (use null for default) * @param hparam the hyper-parameters (use null for default) * @param technique the technique used to solve for b in x.t*x*b = x.t*y (use Cholesky for default) */ def apply (x: MatriD, y: VectoD, ord: Int, fname: Strings, hparam: HyperParameter, technique: RegTechnique): PolyORegression = { val n = x.dim2 if (n < 1) { flaw ("apply", s"dim2 = $n of the 'x' matrix must be at least 1") null } else if (rescale) { // normalize the x matrix val (mu_x, sig_x) = (x.mean, stddev (x)) val xn = normalize (x, (mu_x, sig_x)) new PolyORegression (xn, y, ord, fname, hparam, technique) } else { new PolyORegression (x, y, ord, fname, hparam, technique) } // if } // apply //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Given a 1-vector/point 'v', compute the values for all of its polynomial * forms/terms, returning them as a vector. * @param v the vector/point (i-th row of t) for creating forms/terms * @param k number of features/predictor variables (not counting intercept) = 1 * @param nt the number of terms */ override def forms (v: VectoD, k: Int, nt: Int): VectoD = { val t = v(0) VectorD (for (j <- 0 until nt) yield t~^j) } // forms //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Create all forms/terms for each row/point placing them in a new matrix. * @param x the original un-expanded input/data matrix * @param ord the order (max degree) of the polynomial */ def allForms (x: MatriD, ord: Int): MatriD = { val k = 1 val nt = numTerms (ord) println (s"allForms: create expanded data matrix with nt = $nt columns from k = $k columns") val xe = new MatrixD (x.dim1, nt) for (i <- x.range1) xe(i) = forms (x(i), k, nt) // vector with values for all forms/terms val za = orthogonalize (xe) a = za._2 // save multipliers if (DEBUG) println (s"expanded data matrix za._1 = ${za._1}") za._1 // orthogonalized expanded matrix } // allForms //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Orthogonalize the data/input matrix 'x' using Gram-Schmidt Orthogonalization, * returning the a new orthogonal matrix 'z' and the orthogonalization multipliers 'a'. * This will eliminate the multi-collinearity problem. * @param x the matrix to orthogonalize */ def orthogonalize (x: MatriD): (MatriD, MatriD) = { val z = new MatrixD (x.dim1, x.dim2) val a = new MatrixD (x.dim2, x.dim2) z.setCol (0, x.col(0)) for (j <- 1 until x.dim2) { // column to set z.setCol (j, x.col(j)) for (k <- 0 until j) { // make orthogonal to prior columns a(j, k) = (z.col(j) dot z.col(k)) / z.col(k).normSq z.setCol (j, z.col(j) - z.col(k) * a(j, k)) } // for } // for if (DEBUG) println (s"x = $x \nz = $z") (z, a) } // orthogonalize } // PolyORegression object //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `PolyORegressionTest` object tests `PolyORegression` class using the following * regression equation. *

* y = b dot x = b_0 + b_1*t + b_2*t^2 + ... b_k*t_k *

* Note, the 'order' at which R-Squared drops is QR(7), Cholesky(14), SVD(6), Inverse(13). * > runMain scalation.analytics.PolyORegressionTest */ object PolyORegressionTest extends App { import scalation.random.Normal val noise = Normal (0.0, 100.0) val t = VectorD.range (0, 100) val y = new VectorD (t.dim) for (i <- 0 until 100) y(i) = 10.0 - 10.0 * i + i~^2 + i * noise.gen println ("t = " + t) println ("y = " + y) val order = 4 val technique = Cholesky // others: QR, SVD, LU or Inverse val prg = PolyORegression (t, y, order, null, null, technique) prg.train ().eval () println (prg.report) banner ("test for collinearity") println ("corr = " + prg.corrMatrix ()) println ("vif = " + prg.vif ()) banner ("test predictions") val yp = t.map (prg.predict (_)) println (s" y = $y \n yp = $yp") new Plot (t, y, yp, "PolyORegression") val z = 10.5 // predict y for one point val yp2 = prg.predict (z) println ("predict (" + z + ") = " + yp2) banner ("test cross-validation") val stats = prg.crossValidate () showQofStatTable (stats) } // PolyORegressionTest object //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `PolyORegressionTest2` object tests `PolyORegression` class using the following * regression equation. *

* y = b dot x = b_0 + b_1*t + b_2*t^2 + ... b_k*t_k *

* > runMain scalation.analytics.PolyORegressionTest2 */ object PolyORegressionTest2 extends App { import scalation.random.Normal val noise = Normal (0.0, 100.0) val t = VectorD.range (0, 100) val y = new VectorD (t.dim) for (i <- 0 until 100) y(i) = 10.0 - 10.0 * i + i~^2 + i * noise.gen println ("t = " + t) println ("y = " + y) val order = 6 val technique = Cholesky // others: QR, SVD, LU or Inverse val prg = PolyORegression (t, y, order, null, null, technique) prg.train ().eval () println (prg.report) banner ("test for collinearity") println ("corr = " + prg.corrMatrix ()) println ("vif = " + prg.vif ()) banner ("test predictions") val yp = t.map (prg.predict (_)) println (s" y = $y \n yp = $yp") new Plot (t, y, yp, "PolyORegression") val z = 10.5 // predict y for one point val yp2 = prg.predict (z) println ("predict (" + z + ") = " + yp2) banner ("test cross-validation") val stats = prg.crossValidate () showQofStatTable (stats) } // PolyORegressionTest2 object