//:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** @author John Miller, Mustafa Nural * @version 1.5 * @date Sat Jan 20 16:05:52 EST 2018 * @see LICENSE (MIT style license file). */ package scalation.analytics import scala.collection.mutable.Set import scalation.linalgebra.{MatriD, MatrixD, VectoD, VectorD} import scalation.linalgebra.VectorD.one import scalation.util.banner import RegTechnique._ //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `QuadRegression` class uses multiple regression to fit a quadratic * surface to the data. For example in 2D, the quadratic regression equation is *

* y = b dot x + e = [b_0, ... b_k] dot [1, x_0, x_0^2, x_1, x_1^2] + e *

* Has no interaction/cross-terms and adds an a constant term for intercept * (must not include intercept, column of ones in initial data matrix). * @see scalation.metamodel.QuadraticFit * @param x_ the input vectors/points (initial data matrix) * @param y the response vector * @param fname_ the feature/variable names * @param hasIntercept whether data matrix include an intercept column * @param technique the technique used to solve for b in x.t*x*b = x.t*y */ class QuadRegression (x_ : MatriD, y: VectoD, fname_ : Strings = null, technique: RegTechnique = QR) extends Regression (QuadRegression.allForms (x_), y, fname_, null, technique) { //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Perform 'k'-fold cross-validation. * @param k the number of folds * @param rando whether to use randomized cross-validation */ override def crossVal (k: Int = 10, rando: Boolean = true) { crossValidate ((x: MatriD, y: VectoD) => new QuadRegression (x, y), k, rando) } // crossVa } // QuadRegression class //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `QuadRegression` companion object provides methods for creating * functional forms. */ object QuadRegression { //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The number of quadratic, linear and constant forms/terms (1, 3, 5, 7, ...). * when there are no cross-terms. * @param n number of features/predictor variables */ def numTerms (n: Int) = 2 * n - 1 //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Create all forms/terms for each point placing them in a new matrix. * @param x the input data matrix */ def allForms (x: MatriD): MatriD = { val n = x.dim2 val nt = numTerms (n) if (x.dim1 < nt) throw new IllegalArgumentException ("not enough data rows in matrix to use regression") val xa = new MatrixD (x.dim1, nt) for (i <- x.range1) xa(i) = qForms (x(i), nt, n) // vector values for all quadratic forms xa } // allForms //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Given a vector/point 'p', compute the values for all of its quadratic, * linear and constant forms/terms, returning them as a vector. * No interaction/cross terms. * for 1D: p = (x_0) => 'VectorD (1, x_0, x_0^2)' * for 2D: p = (x_0, x_1) => 'VectorD (1, x_0, x_0^2, x_1^2)' * @param p the source vector/point for creating forms/terms * @param nt the number of terms * @param n the number of predictors */ def qForms (p: VectoD, nt: Int, n: Int): VectoD = { val z = new VectorD (nt) // vector of all forms/terms z(0) = 1.0 // intercept for (i <- 1 to n) z(i) = p(i-1) // linear terms for (i <- n+1 until nt) z(i) = p(i-n) * p(i-n) // quadratic terms z } // qForms } // QuadRegression object //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `QuadRegressionTest` object is used to test the `QuadRegression` class. * > runMain scalation.analytics.QuadRegressionTest */ object QuadRegressionTest extends App { // x1 x2 val x = new MatrixD ((20, 2), 47.0, 85.4, 49.0, 94.2, 49.0, 95.3, 50.0, 94.7, 51.0, 89.4, 48.0, 99.5, 49.0, 99.8, 47.0, 90.9, 49.0, 89.2, 48.0, 92.7, 47.0, 94.4, 49.0, 94.1, 50.0, 91.6, 45.0, 87.1, 52.0, 101.3, 46.0, 94.5, 46.0, 87.0, 46.0, 94.5, 48.0, 90.5, 56.0, 95.7) // response BP val y = VectorD (105.0, 115.0, 116.0, 117.0, 112.0, 121.0, 121.0, 110.0, 110.0, 114.0, 114.0, 115.0, 114.0, 106.0, 125.0, 114.0, 106.0, 113.0, 110.0, 122.0) val rsr = new QuadRegression (x, y) rsr.train ().eval () val nTerms = QuadRegression.numTerms (2) println ("nTerms = " + nTerms) println ("parmaeter = " + rsr.parameter) println ("fitMap = " + rsr.fitMap) banner ("Forward Selection Test") val fcols = Set (0) for (l <- 1 until nTerms) { val (x_j, b_j, fit_j) = rsr.forwardSel (fcols) // add most predictive variable println (s"forward model: add x_j = $x_j with b = $b_j \n fit = $fit_j") fcols += x_j } // for banner ("Backward Elimination Test") val bcols = Set (0) ++ Array.range (1, nTerms) for (l <- 1 until nTerms) { val (x_j, b_j, fit_j) = rsr.backwardElim (bcols) // eliminate least predictive variable println (s"backward model: remove x_j = $x_j with b = $b_j \n fit = $fit_j") bcols -= x_j } // for } // QuadRegressionTest object