//:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** @author John Miller, Hao Peng * @version 1.6 * @date Sat Dec 8 14:32:12 EST 2018 * @see LICENSE (MIT style license file). * * @title Model Framework: Forecasters with Vector Input */ package scalation.analytics package forecaster import scala.collection.mutable.Set import scala.math.{abs, min, sqrt} import scala.util.control.Breaks.{break, breakable} import scalation.linalgebra.{MatriD, MatrixD, VectoD, VectorD, VectorI} import scalation.math.double_exp import scalation.plot.{Plot, PlotM} import scalation.util.banner import Fit._ import ForecasterVec._ import RollingValidation.trSize //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `ForecasterVec` abstract class provides a common framework for several * forecasters. Note, the 'train' method must be called first followed by 'eval'. * @param y the response vector (time-series data) * @param n the number of parameters * @param hparam the hyper-parameters */ abstract class ForecasterVec (y: VectoD, n: Int, hparam: HyperParameter = null) extends Fit (y, n, n, y.dim - n) with Predictor { private val DEBUG = false // debug flag protected val m = y.dim // size of the response vector protected val ml = min (m, MAX_LAGS) // maximum lag to consider protected var stats: Stats = null // the training/full dataset statistics protected var psi: MatriD = null // pass in auto-covariance and max lags to Durbin-Levinson protected var pacf: VectoD = null // Partial Auto-Correlation Function (PACF) protected var z: VectoD = null // work with mean zero time series protected var e: VectoD = null // residual/error vector [e_0, e_1, ... e_m-1] protected var yf: MatriD = null // forecasts for all time points t & horizons h //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Return the 'used' data matrix 'x' (for such models, it's null). */ def getX: MatriD = null //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Return the 'used' response vector 'y'. Mainly for derived classes where 'y' is * transformed, e.g., `TranRegression`, `Regression4TS`. */ def getY: VectoD = y //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Return the autocorrelation. Must call 'train' first. */ def acF: VectoD = stats.acr //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Return the partial autocorrelation. Must call 'train' first. */ def pacF: VectoD = pacf //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Given a time-series 'y_', train the forecasting function 'y = f(y_)', where * 'f(y_)' is a function of the lagged values of 'y_', by fitting its parameters. * @param x_null the training/full data/input matrix (ignored, pass 'null') * @param y_ the training/full response/output vector */ def train (x_null: MatriD, y_ : VectoD): ForecasterVec = { stats = Stats (y_, ml) if (DEBUG) println (s"train: $stats") psi = durbinLevinson (stats.acv, ml) // pass in auto-covariance and max lags to Durbin-Levinson pacf = psi.getDiag () // Partial Auto-Correlation Function (PACF) z = y_ - stats.mu // zero-center the training response vector e = new VectorD (y_.dim) // make space for error results this } // train //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Compute the error (difference between actual and predicted) and useful * diagnostics for the dataset. * @param x_e the test/full data/input matrix (ignored, pass null) * @param y_e the test/full actual response/output vector */ def eval (x_e: MatriD, y_e: VectoD): ForecasterVec = eval (y_e) //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Compute the error (difference between actual and predicted) and useful * diagnostics for the dataset. * @param y_e the test/full actual response/output vector */ def eval (y_e: VectoD = y): ForecasterVec = { val yp = predictAll () e = y_e - yp diagnose (e, y_e, yp) // , null, stats.mu) this } // eval //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Compute the error (difference between actual and predicted) and useful * diagnostics for the dataset. * @param y_e the test/full actual response/output vector * @param yf the vector of forecasts */ def evalf (y_e: VectoD, yf: VectoD): ForecasterVec = { e = y_e - yf diagnose (e, y_e, yf) // , null, stats.mu) this } // evalf //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Analyze a dataset using this model using ordinary training with the * 'train' method. * @param x_ the training/full the data/input matrix (ignore) * @param y_ the training/full the response/output vector * @param x_e the test/full data/input matrix (ignore) * @param y_e the test/full response/output vector */ def analyze (x_ : MatriD = null, y_ : VectoD = y, x_e: MatriD = null, y_e: VectoD = y): ForecasterVec = { train (x_, y_) // train the model on the training dataset eval (x_e, y_e) // evaluate using the testing dataset this } // analyze //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Return the hyper-parameters. */ def hparameter: HyperParameter = hparam //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Return a basic report on the trained model. */ def report: String = { s""" REPORT modelName = $modelName hparameter hp = $hparameter parameter b = $parameter fitMap qof = $fitMap """ } // report //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Return a detailed summary of the trained model. * @param b the symbol(s) used for the parameters * @param modelEq the model equation as a string */ def summary (b: String, modelEq: String): String = { s""" SUMMARY modelName = $modelName hparameter hp \t= $hparameter parameter $b \t= $parameter fitMap qof \t= $fitMap stats \t= $stats modelEq \t= $modelEq """ } // summary //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Return the vector of residuals/errors. */ def residual: VectoD = e //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Return the horizon 1 forecast beyond the end of the time-series. * @param y_null the actual response/output vector to use (ignored) */ def predict (y_null: VectoD = null): Double = forecast (y.dim-1)(0) //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Predict the value of 'y = f(z)' for each row of matrix 'z'. * @param z the new matrix to predict */ def predict (z: MatriD): VectoD = { throw new UnsupportedOperationException ("predict: on a matrix is not supported") } // predict //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Return the vector of predicted values for all zero-centered data. */ def predictAllz (): VectoD //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Return the vector of predicted values for all original data. * Undo initial zeroing of the data 'y - mu'. */ def predictAll (): VectoD = predictAllz () + stats.mu //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Produce a vector of size 'h', of 1 through 'h'-steps ahead forecasts for the * model. *

* forecast the following time points: t, t+1, ..., t-1+h. *

* Note, invoke 'forecastAll' to create the 'yf' matrix. * @param yf the y-forecast matrix for all time and horizons * @param t the time point from which to make forecasts * @param h the forecasting horizon, number of steps ahead to produce forecasts */ def forecast (yf: MatriD, t: Int, h: Int): VectoD = { if (yf == null) flaw ("forecast", "must call 'forecastAll' first to get 'yf'") if (t > m) flaw ("forecast", "no forecasts with starting t > m are provided") if (h >= yf.dim2) flaw ("forecast", "no forecasts beyond horizon h = ${yf.dim2-1}") VectorD (for (k <- 1 to h if t-1+k < m) yield yf(t-1+k, k)) } // forecast //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Produce a vector of size 'h', of 1 through 'h'-steps ahead forecasts for the * model. *

* forecast the following time points: t, t+1, ..., t-1+h. *

* Note, invoke 'forecastAll' first to create the 'yf' matrix. * @param t the time point from which to make forecasts * @param h the forecasting horizon, number of steps ahead to produce forecasts */ def forecast (t: Int, h: Int = 1): VectoD = forecast (yf, t, h) //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Forecast values for all time points using 1 through 'h'-steps ahead forecasts. * The 'h'-th row of matrix is the horizon 'h' forecast (where 'h = 0' is actual data). * @param h the forecasting horizon, number of steps ahead to produce forecasts, must be > 0 */ def forecastAll (h: Int): MatriD //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Forecast values for all time points using 1 through 'h'-steps ahead forecasts. * The 'h'-th row of matrix is the horizon 'h' forecast (where 'h = 0' is actual data). * @param h the forecasting horizon, number of steps ahead to produce forecasts, must be > 0 * @param p the order of the model (e.g, p in AR, q in MA) * or number of values to use in making forecasts, must be > 0 */ def forecastAll (h: Int, p: Int): MatriD = { println (s"forecastAll: p = $p") if (h < 1 || p < 1) flaw ("forecastAll", "h = $h and p = $p must be at least one") val yf = new MatrixD (h+1, m) // forecasts for all horizons h & time points t yf(0) = y // first row is actual values for (t <- p until m) { val ft = forecast (t, h) // forecasts at time point t, horizons 1 to h for (k <- 1 to h if t+k-1 < m) { yf(k, t+k-1) = ft(k-1) // place the forecasted values diagonally down into yf // (to align with the appropriate forecasting horizon/step) } // for } // for // fill in the blank values in the first few columns where no forecasts can be produced by copying values from previous rows for (k <- 1 to h; t <- 0 until p) yf(k, t) = y(t) // copy observed values from y for (k <- 2 to h; t <- p until p+k-1) yf(k, t) = yf(k-1, t) // copy forecasted values if (DEBUG) for (k <- 1 to h) println (s"for k = $k, sse = ${(y - yf(k)).normSq}") yf // return matrix of forecasted values } // forecastAll //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Produce h-steps ahead forecast on the testing data during cross validation. * Likely to need overriding. * @param y the current response vector * @param t the time point/index to be forecast * @param h the forecasting horizon, number of steps ahead to produce forecast */ def forecastX (y: VectoD, t: Int, h: Int = 1): Double = forecast (t, h).last //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Perform forward selection to find the most predictive variable to add the * existing model, returning the variable to add and the new model. * May be called repeatedly. * Note, all lags up and including 'p|q' define the model. * @see `Fit` for index of QoF measures. * @param cols the lags/columns currently included in the existing model (currently ignored) * @param index_q index of Quality of Fit (QoF) to use for comparing quality */ def forwardSel (cols: Set [Int], index_q: Int = index_rSqBar): (Int, ForecasterVec) //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Perform forward selection to find the most predictive lags/variables to have * in the model, returning the variables added and the new Quality of Fit (QoF) * measures for all steps. * @see `Fit` for index of QoF measures. * @param index_q index of Quality of Fit (QoF) to use for comparing quality * @param cross whether to include the cross-validation QoF measure (currently ignored) * def forwardSelAll (index_q: Int = index_rSq, cross: Boolean = false): (Set [Int], MatriD) = { val rSq = new MatrixD (MAX_LAGS, 3) // R^2, R^2 Bar, R^2 cv val cols = Set (1) // start with lag1 in model println (s"forwardSelAll (l = 0): cols = $cols") breakable { for (l <- 2 until MAX_LAGS) { val (j, mod_j) = forwardSel (cols, index_q) // add most predictive variable if (j == -1) break () cols += j // add variable x_j val fit_j = mod_j.fit rSq(l) = Fit.qofVector (fit_j, null) // use new model, mod_j, no cross if (true) { // (DEBUG) { val k = cols.size - 1 println (s"==> forwardSelAll (l = $l): add (#$k) variable $j, cols = $cols, qof = ${fit_j(index_q)}") } // if }} // breakable for (cols, rSq.slice (0, cols.size-1)) } // forwardSelAll */ //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Plot a function, e.g., Auto-Correlation Function 'ACF', Partial Auto-Correlation * Function 'PACF'. * @param fVec the vector given function values * @param name the name of the function * @param show whether to show the fVec values */ def plotFunc (fVec: VectoD, name: String, show: Boolean = true): Unit = { val lag_axis = VectorD.range (0, ml+1) val zero = new VectorD (ml+1) new Plot (lag_axis, fVec, zero, "Plot of " + name, true) if (show) println (s"$name: fVec = $fVec") } // plotFunc //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Plot a function, e.g., Auto-Correlation Function 'ACF', Partial Auto-Correlation * Function 'PACF' with confidence bound. * @param fVec the vector given function values * @param name the name of the function * @param show whether to show the fVec values */ def plotFunc2 (fVec: VectoD, name: String, show: Boolean = true): Unit = { val lag_axis = VectorD.range (0, ml+1) val zero = new VectorD (ml+1) val bound = VectorD (for (k <- 0 to ml) yield 1.96 / sqrt (m - k)) val mat = MatrixD (Seq (fVec, zero, bound, -bound)) new PlotM (lag_axis, mat.t, Array ("fVec", "zero", "bound"), "PlotM of " + name, true) if (show) println (s"$name: fVec = $fVec") } // plotFunc2 } // ForecasterVec abstract class //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `ForecasterVec` companion object provides functions useful for forecasting * models. It also contains a sample dataset. */ object ForecasterVec { import scalation.stat.vectorD2StatVector val MAX_LAGS = 29 // maximum amount of lag supported private val DEBUG = false // debug flag //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Point out the differences between two vectors/time-series. * @param u the first vector/time-series * @param v the second vector/time-series * @param scale the scale factor to set the tolerance 'tol' * @param allow flag indicating whether allow (via assert) any differences */ def differ (u: VectoD, v: VectoD, scale: Double = 1E-9, allow: Boolean = true): Int = { if (u.dim != v.dim) flaw ("differ", s"requires u.dim = ${u.dim} = v.dim = ${v.dim}") val tol = u.mean * scale var cnt = 0 // for (t <- u.range if u(t) !=~ v(t)) { // machine epsilon for (t <- u.range if abs (u(t) - v(t)) > tol) { // application tolerance cnt += 1 println (s"differ at t = $t: ${u(t)} \t ${v(t)}") } // for banner (s"differ (u, v): found $cnt points that differ") if (! allow) assert (cnt == 0) cnt } // differ //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Apply the Durbin-Levinson Algorithm to iteratively compute the 'psi' matrix. * The last row of the matrix gives 'AR' coefficients. * @see www.stat.tamu.edu/~suhasini/teaching673/time_series.pdf, p. 247 * @param g the auto-covariance vector (gamma) * @param ml the maximum number of lags */ def durbinLevinson (g: VectoD, ml: Int): MatriD = { val psi_ = new MatrixD (ml+1, ml+1) // psi matrix (ml = max lags) val r = new VectorD (ml+1); r(0) = g(0) for (k <- 1 to ml) { // range up to max lags var sum = 0.0 for (j <- 1 until k) sum += psi_(k-1, j) * g(k-j) val a = (g(k) - sum) / r(k-1) psi_(k, k) = a for (j <- 1 until k) psi_(k, j) = psi_(k-1, j) - a * psi_(k-1, k-j) r(k) = r(k-1) * (1.0 - a * a) } // for psi_ } // durbinLevinson //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Test (in sample) forecasting model that extends `ForecasterVec`. * returning the matrix of all forecasts over horizon and time. * @param y the time series data * @param pq the order of the model (number of response values to use in forecasts) * @param mod the model to test * @param h the forecasting horizon */ def testInSamp (y: VectoD, pq: Int, mod: ForecasterVec, h: Int = 1): MatriD = { val modName = mod.modelName banner (s"testInSamp: $modName}, pq = $pq, h = $h") println (mod.report) val yp = mod.predictAll () // one-step ahead val yfa = mod.forecastAll (h) // h-steps ahead val yf = yfa.col (h) // only for horizon h new Plot (null, y, yp, s"predictAll: Plot of y vs. yp for h = 1, $modName vs. t", true) new Plot (null, y, yf, s"forecastAll: Plot of y vs. yf for h = $h, $modName vs. t", true) if (h == 1) differ (yp, yf, allow = false) yfa // return all forecast } // testInSamp //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Test forecasting model that extends `ForecasterVec`. * @param modName the name of the model to test * @param p the order of the model (number of response values to use in forecasts) * @param mod the model to test * @param kt the retraining frequency * @param h the forecasting horizon */ def test (modName: String, p: Int, mod: ForecasterVec, kt: Int = 5, h: Int = 1): Unit = { banner (s"test $modName") if (modName == "ARIMA") mod.train (null, y).asInstanceOf [ARIMA].eval_ () else mod.train (null, y).eval () val yf = mod.forecastAll (h, p) // forecast for all times and horizons val yf_h = yf(h) if (y.dim != yf_h.dim) flaw ("test", s"y.dim = ${y.dim} != yf_h.dim = ${yf_h.dim}") mod.evalf (y, yf_h) println (mod.report) new Plot (null, y, yf_h, s"$modName in sample horizon $h: y and yf", true) val stats = SimpleRollingValidation.crossValidate2 (mod, kt, h) // val stats = RollingValidation.crossValidate2 (mod, kt, h) // Fit.showQofStatTable (stats) } // test // Example: Forecasting lake levels // @see cran.r-project.org/web/packages/fpp/fpp.pdf val t = VectorD.range (0, 98) val y = VectorD (580.38, 581.86, 580.97, 580.80, 579.79, 580.39, 580.42, 580.82, 581.40, 581.32, 581.44, 581.68, 581.17, 580.53, 580.01, 579.91, 579.14, 579.16, 579.55, 579.67, 578.44, 578.24, 579.10, 579.09, 579.35, 578.82, 579.32, 579.01, 579.00, 579.80, 579.83, 579.72, 579.89, 580.01, 579.37, 578.69, 578.19, 578.67, 579.55, 578.92, 578.09, 579.37, 580.13, 580.14, 579.51, 579.24, 578.66, 578.86, 578.05, 577.79, 576.75, 576.75, 577.82, 578.64, 580.58, 579.48, 577.38, 576.90, 576.94, 576.24, 576.84, 576.85, 576.90, 577.79, 578.18, 577.51, 577.23, 578.42, 579.61, 579.05, 579.26, 579.22, 579.38, 579.10, 577.95, 578.12, 579.75, 580.85, 580.41, 579.96, 579.61, 578.76, 578.18, 577.21, 577.13, 579.10, 578.25, 577.91, 576.89, 575.96, 576.80, 577.68, 578.38, 578.52, 579.74, 579.31, 579.89, 579.96) } // ForecasterVec object //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `ForecasterVecTest` object tests the `ForecasterVec` companion object. * > runMain scalation.analytics.forecaster.ForecasterVecTest */ object ForecasterVecTest extends App { import ForecasterVec._ val stats = Stats (y, MAX_LAGS) println (stats) val zero = new VectorD (stats.acr.dim) new Plot (null, stats.acr, zero, "ACF vs. k", true) } // ForecasterVecTest object //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `ForecasterVecTest2` object tests the `ForecasterVec` companion object * and models/classes that extend `ForecasterVec`. * > runMain scalation.analytics.forecaster.ForecasterVecTest2 */ object ForecasterVecTest2 extends App { for (h <- 1 to 2) { banner (s"Test with forecasting horizon h = $h") test ("NullModel", 1, new NullModel (y), h = h) test ("RandomWalk", 1, new RandomWalk (y), h = h) test ("SimpleExpSmoothing", 1, new SimpleExpSmoothing (y), h = h) test ("MovingAverage", 1, new MovingAverage (y), h = h) for (p <- 1 to 3) { ARMA.hp("p") = p.toDouble test (s"AR($p)", p, new AR (y), h = h) } // for /* for (p <- 1 to 3) { val mod = new ARIMA (y, 0, p); mod.setPQ (p, 0) test (s"ARIMA($p, 0, 0)", p, mod, h = h) } // for */ } // for } // ForecasterVecTest2 object