//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** @author John Miller * @version 1.6 * @date Wed Feb 20 17:39:57 EST 2013 * @see LICENSE (MIT style license file). * * @title Model: Polynomial Regression (for one variable) * * For (low order) polynomial regression using multiple variables: * @see `QuadRegression`, QuadXRegression`, `CubicRegesssion`, `CubicXRegression` */ package scalation.analytics import scalation.linalgebra.{MatriD, MatrixD, VectoD, VectorD} import scalation.math.double_exp import scalation.plot.Plot import scalation.util.banner import Fit.showQofStatTable import MatrixTransform._ import RegTechnique._ //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `PolyRegression` class supports polynomial regression. In this case, * 't' is expanded to '[1, t, t^2 ... t^k]'. Fit the parameter vector 'b' in the * regression equation *

* y = b dot x + e = b_0 + b_1 * t + b_2 * t^2 ... b_k * t^k + e *

* where 'e' represents the residuals (the part not explained by the model). * Use Least-Squares (minimizing the residuals) to solve for the parameter vector 'b' * using the Normal Equations: *

* x.t * x * b = x.t * y * b = fac.solve (.) *

* @see www.ams.sunysb.edu/~zhu/ams57213/Team3.pptx * @param t the initial data/input m-by-1 matrix: t_i expands to x_i = [1, t_i, t_i^2, ... t_i^k] * @param y the response/ouput vector * @param ord the order (k) of the polynomial (max degree) * @param fname_ the feature/variable names * @param hparam the hyper-parameters * @param technique the technique used to solve for b in x.t*x*b = x.t*y */ class PolyRegression (t: MatriD, y: VectoD, ord: Int, fname_ : Strings = null, hparam: HyperParameter = null, technique: RegTechnique = Cholesky) extends Regression (PolyRegression.allForms (t, ord), y, fname_, hparam, technique) with ExpandableForms { private val n0 = 1 // number of terms/columns originally private val nt = PolyRegression.numTerms (ord) // number of terms/columns after expansion //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Expand the vector 'z' into a vector of that includes additional terms, * i.e., add polynomial terms. * @param z the un-expanded vector */ def expand (z: VectoD): VectoD = PolyRegression.forms (z, n0, nt) //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Given the scalar 'z', expand it and predict the response value. * @param z the un-expanded scalar */ def predict (z: Double): Double = predict_ex (VectorD (z)) //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Given the vector 'z', expand it and predict the response value. * @param z the un-expanded vector */ def predict_ex (z: VectoD): Double = predict (expand (z)) } // PolyRegression class //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `PolyRegression` companion object provides factory functions and functions * for creating functional forms. */ object PolyRegression extends ModelFactory { val drp = (null, null, Cholesky) // default remaining parameters private val DEBUG = false // debug flag //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Create a `PolyRegression` object from a combined data-response matrix. * @param xy the initial combined data-response matrix (before polynomial term expansion) * @param ord the order (k) of the polynomial (max degree) * @param fname_ the feature/variable names * @param hparam the hyper-parameters * @param technique the technique used to solve for b in x.t*x*b = x.t*y */ def apply (xy: MatriD, ord: Int, fname: Strings = null, hparam: HyperParameter = null, technique: RegTechnique = Cholesky): PolyRegression = { val n = xy.dim2 if (n < 2) { flaw ("apply", s"dim2 = $n of the 'xy' matrix must be at least 2") null } else { val (x, y) = pullResponse (xy) new PolyRegression (x, y, ord, fname, hparam, technique) } // if } // apply //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Create a `PolyRegression` object from a combined data-response matrix. * @param t the initial data/input vector: t_i expands to x_i = [1, t_i, t_i^2, ... t_i^k] * @param y the response/ouput vector * @param ord the order (k) of the polynomial (max degree) * @param fname_ the feature/variable names * @param hparam the hyper-parameters * @param technique the technique used to solve for b in x.t*x*b = x.t*y */ def apply (t: VectoD, y: VectoD, ord: Int, fname: Strings, hparam: HyperParameter, technique: RegTechnique): PolyRegression = { new PolyRegression (MatrixD (Seq (t)), y, ord, fname, hparam, technique) } // apply //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Create a `PolyRegression` object from a data matrix and a response vector. * This factory function provides data rescaling. * @see `ModelFactory` * @param x the initial data/input matrix (before polynomial term expansion) * @param y the response/output m-vector * @param ord the order (k) of the polynomial (max degree) * @param fname the feature/variable names (use null for default) * @param hparam the hyper-parameters (use null for default) * @param technique the technique used to solve for b in x.t*x*b = x.t*y (use Cholesky for default) */ def apply (x: MatriD, y: VectoD, ord: Int, fname: Strings, hparam: HyperParameter, technique: RegTechnique): PolyRegression = { val n = x.dim2 if (n < 1) { flaw ("apply", s"dim2 = $n of the 'x' matrix must be at least 1") null } else if (rescale) { // normalize the x matrix val (mu_x, sig_x) = (x.mean, stddev (x)) val xn = normalize (x, (mu_x, sig_x)) new PolyRegression (xn, y, ord, fname, hparam, technique) } else { new PolyRegression (x, y, ord, fname, hparam, technique) } // if } // apply //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Given a 1-vector/point 'v', compute the values for all of its polynomial * forms/terms, returning them as a vector. * @param v the vector/point (i-th row of t) for creating forms/terms * @param k number of features/predictor variables (not counting intercept) = 1 * @param nt the number of terms */ override def forms (v: VectoD, k: Int, nt: Int): VectoD = { val t = v(0) VectorD (for (j <- 0 until nt) yield t~^j) } // forms //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Create all forms/terms for each row/point placing them in a new matrix. * @param x the original un-expanded input/data matrix * @param ord the order (max degree) of the polynomial */ def allForms (x: MatriD, ord: Int): MatriD = { val k = 1 val nt = numTerms (ord) println (s"allForms: create expanded data matrix with nt = $nt columns from k = $k columns") val xe = new MatrixD (x.dim1, nt) for (i <- x.range1) xe(i) = forms (x(i), k, nt) // vector with values for all forms/terms if (DEBUG) println (s"expanded data matrix xe = $xe") xe // expanded matrix } // allForms } // PolyRegression object //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `PolyRegressionTest` object tests `PolyRegression` class using the following * regression equation. *

* y = b dot x = b_0 + b_1*t + b_2*t^2 + ... b_k*t_k *

* Note, the 'order' at which R-Squared drops is QR(7), Cholesky(14), SVD(6), Inverse(13). * > runMain scalation.analytics.PolyRegressionTest */ object PolyRegressionTest extends App { import scalation.random.Normal val noise = Normal (0.0, 100.0) val t = VectorD.range (0, 100) val y = new VectorD (t.dim) for (i <- 0 until 100) y(i) = 10.0 - 10.0 * i + i~^2 + i * noise.gen println ("t = " + t) println ("y = " + y) val order = 4 val technique = Cholesky // others: QR, SVD, LU or Inverse val prg = PolyRegression (t, y, order, null, null, technique) prg.train ().eval () println (prg.report) banner ("test for collinearity") println ("corr = " + prg.corrMatrix ()) println ("vif = " + prg.vif ()) banner ("test predictions") val yp = t.map (prg.predict (_)) println (s" y = $y \n yp = $yp") new Plot (t, y, yp, "PolyRegression") val z = 10.5 // predict y for one point val yp2 = prg.predict (z) println ("predict (" + z + ") = " + yp2) banner ("test cross-validation") val stats = prg.crossValidate () showQofStatTable (stats) } // PolyRegressionTest object //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `PolyRegressionTest2` object tests `PolyRegression` class using the following * regression equation. *

* y = b dot x = b_0 + b_1*t + b_2*t^2 + ... b_k*t_k *

* > runMain scalation.analytics.PolyRegressionTest2 */ object PolyRegressionTest2 extends App { import scalation.random.Normal val noise = Normal (0.0, 100.0) val t = VectorD.range (0, 100) val y = new VectorD (t.dim) for (i <- 0 until 100) y(i) = 10.0 - 10.0 * i + i~^2 + i * noise.gen println ("t = " + t) println ("y = " + y) val order = 6 val technique = Cholesky // others: QR, SVD, LU or Inverse val prg = PolyRegression (t, y, order, null, null, technique) prg.train ().eval () println (prg.report) banner ("test for collinearity") println ("corr = " + prg.corrMatrix ()) println ("vif = " + prg.vif ()) banner ("test predictions") val yp = t.map (prg.predict (_)) println (s" y = $y \n yp = $yp") new Plot (t, y, yp, "PolyRegression") val z = 10.5 // predict y for one point val yp2 = prg.predict (z) println ("predict (" + z + ") = " + yp2) banner ("test cross-validation") val stats = prg.crossValidate () showQofStatTable (stats) } // PolyRegressionTest2 object