//:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** @author John Miller * @version 1.6 * @date Sat Jun 13 13:37:06 EDT 2020 * @see LICENSE (MIT style license file). * * @title Model: Quadratic X Regression for Time Series */ package scalation.analytics package forecaster import scala.collection.mutable.Set import scalation.linalgebra.{FunctionV_2V, MatriD, MatrixD, VectoD, VectorD} import scalation.linalgebra.VectorD.one import scalation.math.{double_exp, noDouble} import scalation.plot.{Plot, PlotM} import scalation.random.{Normal, Random} import scalation.stat.Statistic import scalation.util.banner //import ImputeBackward.impute import ImputeMean.impute import MatrixTransform._ import RegTechnique._ //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `QuadXRegression4TSy` class uses a neural network to fit the lagged data. * Lag columns ranging from 'lag1' (inclusive) to 'lag2' (inclusive) are added before * delegating the problem to the `QuadXRegression` class. A constant term for intercept * can be added (@see 'allForms' method). * @param x_ the input/data m-by-n matrix consisting of multiple lags of y * @param y_ the output/response m vector (training data consisting of m output values) * the initial output/response vector (before lag term expansion) * @param fname_ the feature/variable names (if null, use x_j's) * @param hparam the hyper-parameters for the model/network * @param technique the technique used to solve for b in x.t*x*b = x.t*y */ class QuadXRegression4TSy (x_ : MatriD = null, y_ : VectoD, fname_ : Strings = null, hparam: HyperParameter = Regression4TS.hp, technique: RegTechnique = QR, itran: FunctionV_2V = null) extends QuadXRegression (x_, y_, fname_, hparam, technique) with ForecasterMat { private val DEBUG = true // debug flag //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Return the model name including its current hyper-parameter. */ override def modelName: String = s"QuadXRegression4TSy($n)" //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Compute the error (difference between actual and forecasted) and useful * diagnostics for the test dataset. * @param ym the mean of the actual response/output vector (full/training) * @param yy the actual response/output vector (full/testing) * @param yf the forecasted response/output vector (full/testing) */ override def eval (ym: Double, yy: VectoD, yf: VectoD): QuadXRegression4TSy = { if (DEBUG) { println (s"eval: ym = $ym") println (s"eval: yy.dim = ${yy.dim}") println (s"eval: yf.dim = ${yf.dim}") } // if if (e == null) e = new VectorD (yy.dim) e = yy - yf // compute residual/error vector e, i.e., ee(0) diagnose (e, yy, yf, null, ym) // compute diagnostics this } // eval //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Forecast values for all time points using 1 through 'h'-steps ahead forecasts. * Return a vector of forecasts for all time points. * @param h the forecasting horizon, number of steps ahead to produce forecasts */ override def forecastAll (xe: MatriD = getX, h: Int = 1): VectoD = { val yf = new VectorD (m) for (t <- 0 until m) yf(t) = forecast (xe, t, h) if (DEBUG) println (s"yf = $yf") yf } // forecastAll //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Produce a forecast for 'h' steps ahead into the future. * Note: the forecasts for time 't = 0, ... , h-1' will be duplicates. * @param xe the relevant expanded data matrix * @param t the time for the forecast * @param h the forecasting horizon, number of steps ahead to produce forecast */ def forecast (xe: MatriD, t: Int, h: Int = 1): Double = { val tt = math.max (0, t+1-h) // time @ row h-1 back in matrix predict (xe(tt)) } // forecast } // QuadXRegression4TSy class import Regression4TS.hp //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `QuadXRegression4TSy` companion object provides factory functions and functions * for creating functional forms. */ object QuadXRegression4TSy extends ModelFactory { private val DEBUG = true // debug flag //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Create a `QuadXRegression4TSy` object from a response vector. * This factory function provides data rescaling. * @see `ModelFactory` * @param y the output/response m-vector (before expansion) * @param fname the feature/variable names (if null, use x_j's) * @param hparam the hyper-parameters for the model/network * @param technique the technique used to solve for b in x.t*x*b = x.t*y */ def apply (y: VectoD, fname: Strings, hparam: HyperParameter, technique: RegTechnique = QR): QuadXRegression4TSy = { val t = VectorD.range (1, y.dim + 1) val hp2 = if (hparam == null) hp else hparam val x = Regression4TS.allForms_y (y, hp2("lag1").toInt, hp2("lag2").toInt, addOne = false) :^+ t if (rescale) { // normalize the x matrix val xx = x.copy val (mu_xx, sig_xx) = (xx.mean, stddev (xx)) normalize (xx, (mu_xx, sig_xx)) new QuadXRegression4TSy (xx, y, fname, hp2, technique) } else { new QuadXRegression4TSy (x, y, fname, hp2, technique) } // if } // apply //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The number of terms include current value and lag one value. * when there are no cross-terms. * @param k number of features/predictor variables (not counting intercept) */ override def numTerms (k: Int) = 2 * k + 1 // FIX } // QuadXRegression4TSy object //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `QuadXRegression4TSyTest` object is used to test the `QuadXRegression4TSy` class. * > runMain scalation.analytics.forecaster.QuadXRegression4TSyTest */ object QuadXRegression4TSyTest extends App { val ran = Random () val n = 100 val y = VectorD (for (i <- 0 until n) yield i + 10.0 * ran.gen) banner ("Build AR(2) Model") val ar = new AR (y) // time series data ar.train (null, y).eval () // train for AR(1) model println (ar.report) new Plot (null, y, ar.predictAll (), "Plot of y, AR(1) vs. t", true) banner ("Select model based on ACF and PACF") ar.plotFunc2 (ar.acF, "ACF") ar.plotFunc2 (ar.pacF, "PACF") banner ("QuadXRegression4TSy") val mxl = 3.0 // maximum lag val hp2 = Regression4TS.hp.updateReturn (("lag1", 1.0), ("lag2", mxl)) val nn = QuadXRegression4TSy (y, null, hp2) nn.train() nn.eval() println (nn.report) val yp = nn.predict () new Plot (null, y, yp, "Plot of y, yp_nn vs. t", true) } // QuadXRegression4TSyTest object //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `QuadXRegression4TSyTest2` object is used to test the `QuadXRegression4TSy` class. * The order 'p' hyper-parameter should be re-assigned (try 1, 2, 3, ...). * > runMain scalation.analytics.forecaster.QuadXRegression4TSyTest2 */ object QuadXRegression4TSyTest2 extends App { val noise = Normal (0.0, 1.0) val n = 30 val t = VectorD.range (0, n) val y = VectorD (for (i <- 0 until n) yield i + noise.gen) val p = 2 // try different values for p banner ("Build AR($p) Model") ARMA.hp("p") = p // reassign hyper-parameter p val ar = new AR (y) // time series data ar.train (null, y) // train for AR(p) model ar.eval () // evaluate the QoF println (ar.report) new Plot (t, y, ar.predictAll (), s"Plot of y, AR($p) vs. t", true) banner ("Make Forecasts") val steps = 10 // number of steps for the forecasts val ar_f = ar.forecast (steps) println (s"$steps-step ahead forecasts using AR($p) model = $ar_f") val tf = VectorD.range (n, n + steps) new Plot (tf, ar_f, null, s"Plot ar($p) forecasts vs. t", true) banner ("QuadXRegression4TSy") val mxl = 4.0 // maximum lag val hp2 = Regression4TS.hp.updateReturn (("lag1", 1.0), ("lag2", mxl)) val nn = QuadXRegression4TSy (y, null, hparam = hp2) nn.train2() nn.eval() println (nn.report) // val yp = nn.predict () val yp = nn.forecastAll () new Plot (null, y, yp, "Plot of y, yp_nn vs. t", true) } // QuadXRegression4TSyTest2 object //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `QuadXRegression4TSyTest3` object is used to test the `QuadXRegression4TSy` class. * Forecasting lake levels. * @see cran.r-project.org/web/packages/fpp/fpp.pdf * > runMain scalation.analytics.forecaster.QuadXRegression4TSyTest3 */ object QuadXRegression4TSyTest3 extends App { import ForecasterVec.y for (p <- 1 to 5) { banner (s"Build AR($p) Model") ARMA.hp("p") = p // reassign hyper-parameter p val ar = new AR (y) // time series data ar.train (null, y).eval () // train for AR(1) model println (ar.report) new Plot (null, y, ar.predictAll (), s"Plot of y, yp AR($p) vs. t", true) banner (s"Rolling Validation for AR($p) Model") val stats = SimpleRollingValidation.crossValidate2 (ar, kt_ = 1) // val stats = RollingValidation.crossValidate2 (ar, kt_ = 2) // Fit.showQofStatTable (stats) banner (s"QuadXRegression4TSy($p)") val hp2 = Regression4TS.hp.updateReturn (("lag1", 1.0), ("lag2", p.toDouble)) val qr = QuadXRegression4TSy (y, null, hparam = hp2) qr.train ().eval () qr.eval () println (qr.report) // val yp = qr.predict () val yp = qr.forecastAll () new Plot (null, y, yp, s"Plot of y, yp QuadXRegression4TSy($p) vs. t", true) banner (s"Rolling Validation for QuadXRegression4TSy($p)") val stats2 = SimpleRollingValidation.crossValidate (qr, kt_ = 1) // val stats2 = RollingValidation.crossValidate (qr, kt_ = 2) // Fit.showQofStatTable (stats2) } // for } // QuadXRegression4TSyTest3 object