//:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** @author John Miller * @version 1.6 * @date Thu Jul 4 14:21:05 EDT 2019 * @see LICENSE (MIT style license file). * * @title Model: Regression for Time Series */ package scalation.analytics package forecaster import scala.collection.mutable.Set import scalation.linalgebra.{MatriD, MatrixD, VectoD, VectorD} import scalation.linalgebra.VectorD.one import scalation.math.{double_exp, noDouble} import scalation.plot.PlotM import scalation.stat.Statistic import scalation.util.banner //import ImputeBackward.impute import ImputeMean.impute import MatrixTransform._ import RegTechnique._ //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `Regression4TS` class uses multiple regression to fit the lagged data. * Lag columns ranging from 'lag1' (inclusive) to 'lag2' (inclusive) are added before * delegating the problem to the `Regression` class. A constant term for intercept * can be added (@see 'allForms' method) but must not include intercept (column of ones) * in initial data matrix. * @param x_ the initial data/input matrix (before lag term expansion) * @param y the response/output vector * @param fname_ the feature/variable names * @param hparam the hyper-parameters * @param technique the technique used to solve for b in x.t*x*b = x.t*y * @param addOne whether to add a column of all ones to the data matrix (for intercept) */ class Regression4TS (x_ : MatriD, y: VectoD, fname_ : Strings = null, hparam: HyperParameter = Regression4TS.hp, technique: RegTechnique = QR, addOne: Boolean = true) extends Regression (Regression4TS.allForms (x_, hparam ("lag1").toInt, hparam ("lag2").toInt, addOne), // extends Regression (Regression4TS.allForms_y (x_, y, hparam ("lag1").toInt, hparam ("lag2").toInt, addOne), y, fname_, hparam, technique) with ForecasterMat { private val DEBUG = false // debug flag //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Return the model name including its current hyper-parameter. */ override def modelName: String = s"Regression4TS($n)" //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Compute the error (difference between actual and forecasted) and useful * diagnostics for the test dataset. * @param ym the mean of the actual response/output vector (full/training) * @param yy the actual response/output vector (full/testing) * @param yf the forecasted response/output vector (full/testing) */ override def eval (ym: Double, yy: VectoD, yf: VectoD): Regression4TS = { if (DEBUG) { println (s"eval: ym = $ym") println (s"eval: yy.dim = ${yy.dim}") println (s"eval: yf.dim = ${yf.dim}") } // if e = yy - yf // compute residual/error vector e diagnose (e, yy, yf, null, ym) // compute diagnostics this } // eval //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Produce a forecast for 'h' steps ahead into the future. * Note: the forecasts for time 't = 0, ... , h-1' will be duplicates. * @param xe the relevant expanded data matrix * @param t the time for the forecast * @param h the forecasting horizon, number of steps ahead to produce forecast */ def forecast (xe: MatriD, t: Int, h: Int = 1): Double = { val tt = math.max (0, t+1-h) // time @ row h-1 back in matrix predict (xe(tt)) } // forecast } // Regression4TS class //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `Regression4TS` companion object provides factory functions and functions * for creating functional forms. */ object Regression4TS extends ModelFactory { /** Base hyper-parameter specification for `Regression4TS` */ val hp = new HyperParameter hp += ("lag1", 1, 1) // first lag included (inclusive) hp += ("lag2", 2, 2) // last lag included (inclusive) // hp += ("lambda", 0.01, 0.01) // regularization/shrinkage parameter private val DEBUG = true // debug flag private val DEBUG2 = false // verbose debug flag //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Create a `Regression4TS` object from a combined data-response matrix. * @param xy the initial combined data-response matrix (before quadratic term expansion) * @param fname_ the feature/variable names * @param hparam the hyper-parameters * @param technique the technique used to solve for b in x.t*x*b = x.t*y * @param addOne whether to add a column of all ones to the data matrix */ def apply (xy: MatriD, fname: Strings = null, hparam: HyperParameter = hp, technique: RegTechnique = QR, addOne: Boolean = true): Regression4TS = { val n = xy.dim2 if (n < 2) { flaw ("apply", s"dim2 = $n of the 'xy' matrix must be at least 2") null } else { val (x, y) = pullResponse (xy) new Regression4TS (x, y, fname, hparam, technique, addOne) } // if } // apply //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Create a `Regression4TS` object from a data matrix and a response vector. * This factory function provides data rescaling. * @see `ModelFactory` * @param x the initial data/input matrix (before quadratic term expansion) * @param y the response/output m-vector * @param fname the feature/variable names (use null for default) * @param hparam the hyper-parameters (use null for default) * @param technique the technique used to solve for b in x.t*x*b = x.t*y (use OR for default) * @param addOne whether to add a column of all ones to the data matrix */ def apply (x: MatriD, y: VectoD, fname: Strings, hparam: HyperParameter, technique: RegTechnique, addOne: Boolean): Regression4TS = { val n = x.dim2 if (n < 1) { flaw ("apply", s"dim2 = $n of the 'x' matrix must be at least 1") null } else if (rescale) { // normalize the x matrix val xx = x.copy () val (mu_xx, sig_xx) = (xx.mean, stddev (xx)) normalize (xx, (mu_xx, sig_xx)) new Regression4TS (xx, y, fname, hparam, technique, addOne) } else { new Regression4TS (x, y, fname, hparam, technique, addOne) } // if } // apply //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The number of terms include current value and lag one value. * when there are no cross-terms. * @param k number of features/predictor variables (not counting intercept) */ override def numTerms (k: Int) = 2 * k + 1 // FIX //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Create all forms/terms by concatenating columnwise the data matrix with * its first lag. * @param x the original un-expanded input/data matrix */ override def allForms (x: MatriD): MatriD = { x.slice (1, x.dim1) ++^ x.slice (0, x.dim1 - 1) } // allForms //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Create all forms/terms by concatenating columnwise the data matrix with * lags 'lag1' to 'lag2', where 'lag1 = 0' is current values. * Note 'allForms (x) = allForms (x, 0, 2)' * @param x the original un-expanded input/data matrix * @param lag1 the first lag included (inclusive) * @param lag2 the last lag included (inclusive) * @param addOne whether to add a column of all ones to the matrix (intercept) */ def allForms (x: MatriD, lag1: Int, lag2: Int, addOne: Boolean = false): MatriD = { val nl = lag2 - lag1 + 1 var z = new MatrixD (x.dim1, nl * x.dim2) if (DEBUG) println (s"allForms: nl = $nl, z.dim = ${z.dim1}, z.dim2 = ${z.dim2}") for (i <- x.range1) { for (j <- x.range2; l <- lag1 to lag2) { z(i, j + (l - lag1) * x.dim2) = if (i - l >= 0) { x(i - l, j) } else { val missVal = impute (x.col (j))._2 if (missVal == noDouble) flaw ("allForms", s"for ($i, $j) missVal = $missVal") missVal } // if } // for } // for if (addOne) z = VectorD.one (x.dim1) +^: z // add first column of all ones if (DEBUG2) println (s"allForms: lag expanded x = $z") z } // allForms //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Create all forms/terms by concatenating columnwise the response vector * lags 'lag1' to 'lag2', where 'lag1 = 0' is current values. * @param y the original un-expanded output/response vector * @param lag1 the first lag included (inclusive) required to be > 0 * @param lag2 the last lag included (inclusive) * @param addOne whether to add a column of all ones to the matrix (intercept) */ def allForms_y (y: VectoD, lag1: Int, lag2: Int, addOne: Boolean = false): MatriD = { if (lag1 == 0) flaw ("allForms_y", "lag1 == 0 => using current response y to forecast itself") val nl = lag2 - lag1 + 1 var z = new MatrixD (y.dim, nl) for (i <- y.range) { for (l <- lag1 to lag2) { z(i, l - lag1) = if (i - l >= 0) y(i - l) else impute (y, i)._2 } // for } // for if (addOne) z = VectorD.one (y.dim) +^: z // add first column of all ones z } // allForms_y //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Create all forms/terms by concatenating columnwise the response vector * lags 'lag1' to 'lag2', where 'lag1 = 0' is current values. * This method is used for the direct forecasting method, where the model * is explictly trained for an 'h'-step ahead forecast. * @see arxiv.org/pdf/1108.3259.pdf * @param y the original un-expanded output/response vector * @param lag1 the first lag included (inclusive) required to be > 0 * @param lag2 the last lag included (inclusive) * @param h the forecasting horizon for the model to be trained on. * @param addOne whether to add a column of all ones to the matrix (intercept) */ def allForms_y_dir (y: VectoD, lag1: Int, lag2: Int, h: Int, addOne: Boolean = false): MatriD = { null // FIX - to be coded } // allForms_y_dir //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Create all forms/terms by concatenating columnwise the data matrix with * lags 'lag1' to 'lag2', where 'lag1 = 0' is current values. * Include lags for the response variable as well. * @param x the original un-expanded input/data matrix * @param y the original un-expanded output/response vector * @param lag1 the first lag included (inclusive) * @param lag2 the last lag included (inclusive) * @param addOne whether to add a column of all ones to the matrix (intercept) */ def allForms_xy (x: MatriD, y: VectoD, lag1: Int, lag2: Int, addOne: Boolean = false): MatriD = { allForms (x, lag1, lag2, addOne) ++^ allForms_y (y, math.max (1, lag1), lag2, false) } // allForms_xy } // Regression4TS object //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `Regression4TSTest` object is used to test the `Regression4TS` class. * > runMain scalation.analytics.forecaster.Regression4TSTest */ object Regression4TSTest extends App { import ExampleBPressure.{x01 => x, y} val rg4 = new Regression4TS (x, y) rg4.analyze () val nTerms = Regression4TS.numTerms (2) println (s"x = ${rg4.getX}") println (s"y = $y") println (s"nTerms = $nTerms") println (rg4.report) println (rg4.summary) banner ("Forward Selection Test") rg4.forwardSelAll (cross = false) banner ("Backward Elimination Test") rg4.backwardElimAll (cross = false) } // Regression4TSTest object //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `Regression4TSTest2` object is used to test the `Regression4TS` class. * The 'x' matrix in one dimensional. * > runMain scalation.analytics.forecaster.Regression4TSTest2 */ object Regression4TSTest2 extends App { import scalation.random.Normal import scalation.plot.Plot val (m, n) = (400, 1) val noise = new Normal (0, 10 * m * m) val x = new MatrixD (m, n) val y = new VectorD (m) val t = VectorD.range (0, m) for (i <- x.range1) { x(i, 0) = i + 1 y(i) = i*i + i + noise.gen } // for banner ("Regression") val ox = VectorD.one (y.dim) +^: x val rg = new Regression (ox, y) rg.analyze () println (rg.report) println (rg.summary) val yp = rg.predict () val e = rg.residual banner ("Regression4TS") val hp2 = Regression4TS.hp.updateReturn (("lag1", 1.0), ("lag2", 2.0)) val rg4 = new Regression4TS (x, y, hparam = hp2) rg4.analyze () println (rg4.report) println (rg4.summary) val yp4 = rg4.predict () val e4 = rg4.residual val x0 = x.col(0) new Plot (x0, y, null, "y vs x") new Plot (t, y, yp, "y and yp vs t") new Plot (t, y, yp4, "y and yp4 vs t") new Plot (x0, e, null, "e vs x") new Plot (x0, e4, null, "e4 vs x") } // Regression4TSTest2 object //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `Regression4TSTest3` object is used to test the `Regression4TS` class. * The 'x' matrix in two dimensional. * > runMain scalation.analytics.forecaster.Regression4TSTest3 */ object Regression4TSTest3 extends App { import scalation.random.Normal import scalation.stat.StatVector.corr val s = 20 val grid = 1 to s val (m, n) = (s*s, 2) val noise = new Normal (0, 10 * s * s) val x = new MatrixD (m, n) val y = new VectorD (m) var k = 0 for (i <- grid; j <- grid) { x(k) = VectorD (i, j) y(k) = x(k, 0)~^2 + 2 * x(k, 1) + noise.gen k += 1 } // for banner ("Regression") val ox = VectorD.one (y.dim) +^: x val rg = new Regression (ox, y) rg.analyze () println (rg.report) println (rg.summary) banner ("Regression4TS") val rg4 = new Regression4TS (x, y) rg4.analyze () println (rg4.report) println (rg4.summary) banner ("Multi-collinearity Check") val x4 = rg4.getX println (corr (x4.asInstanceOf [MatrixD])) println (s"vif = ${rg4.vif ()}") } // Regression4TSTest3 object //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `Regression4TSTest4` object tests the `Regression4TS` class using the AutoMPG * dataset. It illustrates using the `Relation` class for reading the data * from a .csv file "auto-mpg.csv". Assumes no missing values. * It also combines feature selection with cross-validation and plots * R^2, R^2 Bar and R^2 cv vs. the instance index. * > runMain scalation.analytics.forecaster.Regression4TSTest4 */ object Regression4TSTest4 extends App { import scalation.columnar_db.Relation banner ("auto_mpg relation") val auto_tab = Relation (BASE_DIR + "auto-mpg.csv", "auto_mpg", null, -1) auto_tab.show () banner ("auto_mpg (x, y) dataset") val (x, y) = auto_tab.toMatriDD (1 to 6, 0) println (s"x = $x") println (s"y = $y") banner ("auto_mpg regression") val rg4 = new Regression4TS (x, y) rg4.analyze () val n = x.dim2 // number of variables val nt = Regression4TS.numTerms (n) // number of terms println (s"n = $n, nt = $nt") println (rg4.report) println (rg4.summary) banner ("Forward Selection Test") val (cols, rSq) = rg4.forwardSelAll () // R^2, R^2 Bar, R^2 cv val k = cols.size val t = VectorD.range (1, k) // instance index new PlotM (t, rSq, lines = true) println (s"rSq = $rSq") /* banner ("Forward Selection Test") val qx = rg4.getX // get matrix with all columns val rg = new Regression (qx, y) // regression on all columns val rSq = new MatrixD (qx.dim2-1, 3) // R^2, R^2 Bar, R^2 cv val fcols = Set (0) // start with x_0 in model for (l <- 1 until nt) { val (x_j, b_j, fit_j) = rg.forwardSel (fcols) // add most predictive variable println (s"forward model: add x_j = $x_j with b = $b_j \n fit = $fit_j") if (x_j == -1) { println (s"the 'forwardSel' could not find a variable to add: x_j = $x_j") } else { fcols += x_j // add variable x_j val x_cols = qx.selectCols (fcols.toArray) // qx projected onto cols_j columns rSq(l-1) = Fit.qofVector (fit_j, rg.crossVal (x_cols)) // use main model, 'rg' // val rg_j = new Regression (x_cols, y) // new model, regress with x_j added // rSq(l-1) = Fit.qofVector (fit_j, rg_j.crossVal ()) // use new model, rg_j } // if } // for val k = fcols.size for (l <- k until nt) rSq(l-1) = rSq(l-2) println (s"rSq = $rSq") val t = VectorD.range (1, k) // instance index new PlotM (t, rSq.t, lines = true) */ } // Regression4TSTest4 object //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `Regression4TSTest5` object tests `Regression4TS` class using the following * regression equation. *

* y = b dot x = b_0 + b_1*x1 + b_2*x_2. *

* > runMain scalation.analytics.forecaster.Regression4TSTest5 */ object Regression4TSTest5 extends App { // 4 data points: x1 x2 y val xy = new MatrixD ((9, 3), 2, 1, 0.4, // 4-by-3 matrix 3, 1, 0.5, 4, 1, 0.6, 2, 2, 1.0, 3, 2, 1.1, 4, 2, 1.2, 2, 3, 2.0, 3, 3, 2.1, 4, 3, 2.2) println ("model: y = b0 + b1*x1 b2*x1^2 + b3*x2 + b4*x2^2") println (s"xy = $xy") val oxy = VectorD.one (xy.dim1) +^: xy val xy_ = oxy.selectCols (Array (0, 2, 3)) println (s"xy_ = $xy_") banner ("SimpleRegression") val srg = SimpleRegression (xy_) srg.analyze () println (srg.report) println (srg.summary) println (s"predict = ${srg.predict ()}") banner ("Regression") val rg = Regression (oxy) rg.analyze () println (rg.report) println (rg.summary) println (s"predict = ${rg.predict ()}") banner ("Regression4TS") val rg4 = Regression4TS (xy) rg4.analyze () println (rg4.report) println (rg4.summary) println (s"predict = ${rg4.predict ()}") } // Regression4TSTest5 object