//:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** @author John Miller * @version 1.6 * @date Thu Jul 4 14:21:05 EDT 2019 * @see LICENSE (MIT style license file). * * @title Model: Quadratic Regression for Time Series */ package scalation.analytics package forecaster import scala.collection.mutable.Set import scalation.linalgebra.{MatriD, MatrixD, VectoD, VectorD} import scalation.linalgebra.VectorD.one import scalation.math.double_exp import scalation.plot.PlotM import scalation.stat.Statistic import scalation.util.banner import MatrixTransform._ import RegTechnique._ //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `QuadRegression4TS` class uses multiple regression to fit the lagged data. * Lag columns ranging from 'lag1' (inclusive) to 'lag2' (exclusive) are added before * delegating the problem to the `Regression` class. A constant term for intercept * can be added (@see 'allForms' method) but must not include intercept (column of ones) * in initial data matrix. * @param x_ the initial data/input matrix (before lag term expansion) * @param y the response/output vector * @param fname_ the feature/variable names * @param hparam the hyper-parameters * @param technique the technique used to solve for b in x.t*x*b = x.t*y */ class QuadRegression4TS (x_ : MatriD, y: VectoD, fname_ : Strings = null, hparam: HyperParameter = Regression4TS.hp, technique: RegTechnique = QR) extends QuadRegression (Regression4TS.allForms (x_, hparam ("lag1").toInt, hparam ("lag2").toInt), y, fname_, hparam, technique) with ForecasterMat { private val DEBUG = false // debug flag //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Return the model name including its current hyper-parameter. */ override def modelName: String = s"QuadRegression4TS($n)" //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Compute the error (difference between actual and forecasted) and useful * diagnostics for the test dataset. * @param ym the mean of the actual response/output vector (full/training) * @param yy the actual response/output vector (full/testing) * @param yf the forecasted response/output vector (full/testing) */ override def eval (ym: Double, yy: VectoD, yf: VectoD): QuadRegression4TS = { if (DEBUG) { println (s"eval: ym = $ym") println (s"eval: yy.dim = ${yy.dim}") println (s"eval: yf.dim = ${yf.dim}") } // if e = yy - yf // compute residual/error vector e diagnose (e, yy, yf, null, ym) // compute diagnostics this } // eval //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Produce a forecast for 'h' steps ahead into the future. * Note: the forecasts for time 't = 0, ... , h-1' will be duplicates. * @param xe the relevant expanded data matrix * @param t the time for the forecast * @param h the forecasting horizon, number of steps ahead to produce forecast */ def forecast (xe: MatriD, t: Int, h: Int = 1): Double = { val tt = math.max (0, t+1-h) // time @ row h-1 back in matrix predict (xe(tt)) } // forecast } // QuadRegression4TS class //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `QuadRegression4TS` companion object provides factory functions and functions * for creating functional forms. */ object QuadRegression4TS extends ModelFactory { //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Create a `QuadRegression4TS` object from a combined data-response matrix. * @param xy the initial combined data-response matrix (before quadratic term expansion) * @param fname_ the feature/variable names * @param hparam the hyper-parameters * @param technique the technique used to solve for b in x.t*x*b = x.t*y */ def apply (xy: MatriD, fname: Strings = null, hparam: HyperParameter = Regression4TS.hp, technique: RegTechnique = QR): QuadRegression4TS = { val n = xy.dim2 if (n < 2) { flaw ("apply", s"dim2 = $n of the 'xy' matrix must be at least 2") null } else { val (x, y) = pullResponse (xy) new QuadRegression4TS (x, y, fname, hparam, technique) } // if } // apply //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Create a `QuadRegression4TS` object from a data matrix and a response vector. * This factory function provides data rescaling. * @see `ModelFactory` * @param x the initial data/input matrix (before quadratic term expansion) * @param y the response/output m-vector * @param fname the feature/variable names (use null for default) * @param hparam the hyper-parameters (use null for default) * @param technique the technique used to solve for b in x.t*x*b = x.t*y (use OR for default) */ def apply (x: MatriD, y: VectoD, fname: Strings, hparam: HyperParameter, technique: RegTechnique): QuadRegression4TS = { val n = x.dim2 if (n < 1) { flaw ("apply", s"dim2 = $n of the 'x' matrix must be at least 1") null } else if (rescale) { // normalize the x matrix val xx = x.copy () val (mu_xx, sig_xx) = (xx.mean, stddev (xx)) normalize (xx, (mu_xx, sig_xx)) new QuadRegression4TS (xx, y, fname, hparam, technique) } else { new QuadRegression4TS (x, y, fname, hparam, technique) } // if } // apply //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The number of terms include current value and lag one value. * when there are no cross-terms. * @param k number of features/predictor variables (not counting intercept) */ override def numTerms (k: Int) = 2 * k + 1 // FIX } // QuadRegression4TS object //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `QuadRegression4TSTest` object is used to test the `QuadRegression4TS` class. * > runMain scalation.analytics.forecaster.QuadRegression4TSTest */ object QuadRegression4TSTest extends App { import ExampleBPressure.{x01 => x, y} val rg4 = new QuadRegression4TS (x, y) rg4.train ().eval () val nTerms = QuadRegression4TS.numTerms (2) println (s"x = ${rg4.getX}") println (s"y = $y") println (s"nTerms = $nTerms") println (rg4.report) println (rg4.summary) banner ("Forward Selection Test") rg4.forwardSelAll (cross = false) banner ("Backward Elimination Test") rg4.backwardElimAll (cross = false) } // QuadRegression4TSTest object //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `QuadRegression4TSTest2` object is used to test the `QuadRegression4TS` class. * The 'x' matrix in one dimensional. * > runMain scalation.analytics.forecaster.QuadRegression4TSTest2 */ object QuadRegression4TSTest2 extends App { import scalation.random.Normal import scalation.plot.Plot val (m, n) = (400, 1) val noise = new Normal (0, 10 * m * m) val x = new MatrixD (m, n) val y = new VectorD (m) val t = VectorD.range (0, m) for (i <- x.range1) { x(i, 0) = i + 1 y(i) = i*i + i + noise.gen } // for banner ("Regression") val ox = VectorD.one (y.dim) +^: x val rg = new Regression (ox, y) rg.train ().eval () println (rg.report) println (rg.summary) val yp = rg.predict () val e = rg.residual banner ("QuadRegression4TS") val rg4 = new QuadRegression4TS (x, y) rg4.train ().eval () println (rg4.report) println (rg4.summary) val yp4 = rg4.predict () val e4 = rg4.residual val x0 = x.col(0) new Plot (x0, y, null, "y vs x") new Plot (t, y, yp, "y and yp vs t") new Plot (t, y, yp4, "y and yp4 vs t") new Plot (x0, e, null, "e vs x") new Plot (x0, e4, null, "e4 vs x") } // QuadRegression4TSTest2 object //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `QuadRegression4TSTest3` object is used to test the `QuadRegression4TS` class. * The 'x' matrix in two dimensional. * > runMain scalation.analytics.forecaster.QuadRegression4TSTest3 */ object QuadRegression4TSTest3 extends App { import scalation.random.Normal import scalation.stat.StatVector.corr val s = 20 val grid = 1 to s val (m, n) = (s*s, 2) val noise = new Normal (0, 10 * s * s) val x = new MatrixD (m, n) val y = new VectorD (m) var k = 0 for (i <- grid; j <- grid) { x(k) = VectorD (i, j) y(k) = x(k, 0)~^2 + 2 * x(k, 1) + noise.gen k += 1 } // for banner ("Regression") val ox = VectorD.one (y.dim) +^: x val rg = new Regression (ox, y) rg.train ().eval () println (rg.report) println (rg.summary) banner ("QuadRegression4TS") val rg4 = new QuadRegression4TS (x, y) rg4.train ().eval () println (rg4.report) println (rg4.summary) banner ("Multi-collinearity Check") val x4 = rg4.getX println (corr (x4.asInstanceOf [MatrixD])) println (s"vif = ${rg4.vif ()}") } // QuadRegression4TSTest3 object //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `QuadRegression4TSTest4` object tests the `QuadRegression4TS` class using the AutoMPG * dataset. It illustrates using the `Relation` class for reading the data * from a .csv file "auto-mpg.csv". Assumes no missing values. * It also combines feature selection with cross-validation and plots * R^2, R^2 Bar and R^2 cv vs. the instance index. * > runMain scalation.analytics.forecaster.QuadRegression4TSTest4 */ object QuadRegression4TSTest4 extends App { import scalation.columnar_db.Relation banner ("auto_mpg relation") val auto_tab = Relation (BASE_DIR + "auto-mpg.csv", "auto_mpg", null, -1) auto_tab.show () banner ("auto_mpg (x, y) dataset") val (x, y) = auto_tab.toMatriDD (1 to 6, 0) println (s"x = $x") println (s"y = $y") banner ("auto_mpg regression") val rg4 = new QuadRegression4TS (x, y) rg4.train ().eval () val n = x.dim2 // number of variables val nt = QuadRegression4TS.numTerms (n) // number of terms println (s"n = $n, nt = $nt") println (rg4.report) println (rg4.summary) banner ("Forward Selection Test") val (cols, rSq) = rg4.forwardSelAll () // R^2, R^2 Bar, R^2 cv val k = cols.size val t = VectorD.range (1, k) // instance index new PlotM (t, rSq, lines = true) println (s"rSq = $rSq") } // QuadRegression4TSTest4 object //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `QuadRegression4TSTest5` object tests `QuadRegression4TS` class using the following * regression equation. *

* y = b dot x = b_0 + b_1*x1 + b_2*x_2. *

* > runMain scalation.analytics.forecaster.QuadRegression4TSTest5 */ object QuadRegression4TSTest5 extends App { // 4 data points: x1 x2 y val xy = new MatrixD ((9, 3), 2, 1, 0.4, // 4-by-3 matrix 3, 1, 0.5, 4, 1, 0.6, 2, 2, 1.0, 3, 2, 1.1, 4, 2, 1.2, 2, 3, 2.0, 3, 3, 2.1, 4, 3, 2.2) println ("model: y = b0 + b1*x1 b2*x1^2 + b3*x2 + b4*x2^2") println (s"xy = $xy") val oxy = VectorD.one (xy.dim1) +^: xy val xy_ = oxy.selectCols (Array (0, 2, 3)) println (s"xy_ = $xy_") banner ("SimpleRegression") val srg = SimpleRegression (xy_) srg.analyze () println (srg.report) println (srg.summary) println (s"predict = ${srg.predict ()}") banner ("Regression") val rg = Regression (oxy) rg.analyze () println (rg.report) println (rg.summary) println (s"predict = ${rg.predict ()}") banner ("QuadRegression4TS") val rg4 = QuadRegression4TS (xy) rg4.analyze () println (rg4.report) println (rg4.summary) println (s"predict = ${rg4.predict ()}") } // QuadRegression4TSTest5 object