//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** @author John Miller * @version 2.0 * @date Sun May 29 13:45:56 EDT 2022 * @see LICENSE (MIT style license file). * * @note Model Framework: Rolling Validation for Forecasters */ package scalation package modeling package forecasting import scala.math.max import scalation.mathstat._ //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `RollingValidation` object provides rolling-validation, where a full * dataset is divided into a training set followed by a testing set. * Retraining is done as the algorithm rolls through the testing set making * out-of-sample predictions/forecasts to keep the parameters from becoming stale. * For example, with TR_RATIO = 0.5 and m = 1000 it works as follows: * tr(ain) 0 to 499, te(st) 500 to 999 * Re-training occurs according to the retraining cycle rc, e.g., rc = 10 * implies that retraining would occurs after every 10 forecasts or 50 times * for this example. */ object RollingValidation: private val debug = debugf ("RollingValidation", true) // debug function private val TR_RATIO = 0.5 // ratio training set to full datasets //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Calculate the size (number of instances) for a training dataset. * @param m the size of the full dataset */ def trSize (m: Int): Int = (m * TR_RATIO).toInt //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Align the actual response vector for comparison with the predicted/forecasted * response vector, returning a time vector and sliced response vector. * @param tr_size the size of the intial training set * @param y the actual response for the full dataset (to be sliced) */ def align (tr_size: Int, y: VectorD): (VectorD, VectorD) = (VectorD.range (tr_size, y.dim), y(tr_size until y.dim)) end align //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Use rolling-validation to compute test Quality of Fit (QoF) measures * by dividing the dataset into a TESTING SET (tr) and a TRAINING SET (te) * as follows: [ <-- tr_size --> | <-- te_size --> ] * This version calls predict for one-step ahead out-of-sample forecasts. * @param mod the forecasting model being used (e.g., `ARIMA`) * @param rc the retraining cycle (number of forecasts until retraining occurs) */ def rollValidate (mod: Forecaster & Fit, rc: Int): Unit = val y = mod.getY // get (expanded) response/output vector val tr_size = trSize (y.dim) // size of initial training set val te_size = y.dim - tr_size // size of testing set debug ("rollValidate", s"train: tr_size = $tr_size; test: te_size = $te_size, rc = $rc") val yp = new VectorD (te_size) // y-predicted over testing set for i <- 0 until te_size do // iterate through testing set val t = tr_size + i // next time point to forecast if i % rc == 0 then mod.train (null, y(0 until t)) // retrain on sliding training set yp(i) = mod.predict (t-1, y) // predict the next value end for val (t, yy) = align (tr_size, y) // align vectors val df = max (0, mod.parameter.size - 1) // degrees of freedom for model mod.resetDF (df, te_size - df) // reset degrees of freedom new Plot (t, yy, yp, "Plot yy, yp vs. t", lines = true) println (FitM.fitMap (mod.diagnose (yy, yp), qoF_names)) end rollValidate //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Use rolling-validation to compute test Quality of Fit (QoF) measures * by dividing the dataset into a TESTING SET (tr) and a TRAINING SET (te). * as follows: [ <-- tr_size --> | <-- te_size --> ] * This version calls forecast for h-steps ahead out-of-sample forecasts. * @param mod the forecasting model being used (e.g., `ARIMA`) * @param rc the retraining cycle (number of forecasts until retraining occurs) * @param h the forecasting horizon (h-steps ahead) */ def rollValidate (mod: Forecaster & Fit, rc: Int, h: Int): Unit = val y = mod.getY // get (expanded) response/output vector val yf = mod.forecastAll (y, h) // get in-sample forecasting matrix val tr_size = trSize (y.dim) // size of initial training set val te_size = y.dim - tr_size // size of testing set debug ("rollValidate", s"train: tr_size = $tr_size; test: te_size = $te_size, rc = $rc") val yp = new VectorD (te_size) // y-predicted over testing set (only for h=1) for i <- 0 until te_size do // iterate through testing set val t = tr_size + i // next time point to forecast if i % rc == 0 then mod.train (null, y(0 until t)) // retrain on sliding training set yp(i) = mod.predict (t-1, y) // predict the next value (only for h=1) val yd = mod.forecast (t-1, yf, y, h) // forecast the next h-values // yf is updated down its diagonals assert (yp(i) =~ yd(0)) // make sure h=1 forecasts agree with predictions end for // yf is updated down its diagonals val (t, yy) = align (tr_size, y) // align vectors val df = max (0, mod.parameter.size - 1) // degrees of freedom for model mod.resetDF (df, te_size - df) // reset degrees of freedom new Plot (t, yy, yp, "Plot yy, yp vs. t", lines = true) for k <- 1 to h do val yfh = yf(tr_size until y.dim, k) new Plot (t, yy, yfh, s"Plot yy, yfh vs. t (h = $k)", lines = true) banner (s"rollValidate: for horizon h = $k:") println (FitM.fitMap (mod.diagnose (yy, yfh), qoF_names)) end for end rollValidate //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Test assessment and validation for the given forecasting model: * (1) in-sample assessment on full dataset * (2) out-of-sample validation using rolling validation with predict (one-step) * (3) out-of-sample validation using rolling validation with forecast (h-steps) * @param mod the forecasting model to test (e.g., `ARIMA`) * @param rc the retraining cycle (number of forecasting until retraining occurs) * @param h the forecasting horizon (h-steps ahead) */ def testValidate (mod: Forecaster & Fit, rc: Int, h: Int): Unit = banner (s"testValidate: in-sample on full dataset for ${mod.modelName}") val (yp, qof) = mod.trainNtest ()() banner (s"testValidate: out-of-sample predict rolling validation for ${mod.modelName}") RollingValidation.rollValidate (mod, rc) banner (s"testValidate: out-of-sample forecast (h=1..$h) rolling validation for ${mod.modelName}") RollingValidation.rollValidate (mod, rc, h) end testValidate end RollingValidation /* FIX - Ideas for reducing the size of yf matrix val cp = mod.cap // maximum lag (how far into the past) val st = te_size - cp // size of shift from original y val yf = new MatrixD (te_size+cp+h, h+2) // extend before and after val yf = new MatrixD (y.dim+h, h+2) // extend before and after for t <- 0 until te_size + cp do yf(t, 0) = y(st+t) // first column is the time-step (e.g., logical day) for t <- yf.indices do yf(t, h+1) = te_size + t // last column is time (logical day) for t <- yf.indices do yf(t, h+1) = t // last column is time (logical day) */ //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `rollingValidationTest` main function is used to test the rollValidate method * in the `RollingValidation` object. * > runMain scalation.modeling.forecasting.rollingValidationTest */ @main def rollingValidationTest (): Unit = import scalation.random.Normal val m = 1200 // number of instances val y = new VectorD (m) // response/output vector val e = Normal (0, 100) // noise y(0) = 50.0 for i <- 1 until y.dim do y(i) = 0.8 * y(i-1) + e.gen val p = 3 // order of the model val h = 2 // forecasting horizon, try changing val rc = 2 // retrain cycle println (s"y.min = ${y.min}, y.max = ${y.max}") banner (s"AR($p) full dataset results at forecasting horizon h = $h") SARIMAX.hp("p") = p // val mod = new AR (y) // create an AR(p) model val mod = new ARMA (y) // create an ARMA(p, 0) model val (yp, qof) = mod.trainNtest ()() // train-test model on full dataset banner (s"AR($p) one-step ahead rolling validation results") RollingValidation.rollValidate (mod, rc) banner (s"AR($p) $h-steps rolling validation results") RollingValidation.rollValidate (mod, rc, h) end rollingValidationTest //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `rollingValidationTest2` main function is used to test the rollValidate method * in the `RollingValidation` object. * > runMain scalation.modeling.forecasting.rollingValidationTest2 */ @main def rollingValidationTest2 (): Unit = import Example_LakeLevels.y val p = 3 // order of the model val h = 2 // forecasting horizon, try changing val rc = 2 // retrain cycle println (s"y.min = ${y.min}, y.max = ${y.max}") banner (s"AR($p) full dataset results at forecasting horizon h = $h") SARIMAX.hp("p") = p // val mod = new AR (y) // create an AR(p) model val mod = new ARMA (y) // create an ARMA(p, 0) model val (yp, qof) = mod.trainNtest ()() // train-test model on full dataset val t = VectorD.range (49 until 97) // note original y must be shifted new Plot (t, y(50 until 98), yp(49 until 97), "y, yp vs t 2nd half", lines = true) banner (s"AR($p) one-step ahead rolling validation results") RollingValidation.rollValidate (mod, rc) banner (s"AR($p) $h-steps rolling validation results") RollingValidation.rollValidate (mod, rc, h) end rollingValidationTest2 //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `rollingValidationTest3` main function is used to test the rollValidate method * in the `RollingValidation` object. * Compares baseline models on in-sample and out-of-sample assessment. * > runMain scalation.modeling.forecasting.rollingValidationTest3 */ @main def rollingValidationTest3 (): Unit = import Example_LakeLevels.y val h = 2 // forecasting horizon, try changing val rc = 2 // retrain cycle RollingValidation.testValidate (new RandomWalk (y), rc, h) RollingValidation.testValidate (new NullModel (y), rc, h) RollingValidation.testValidate (new TrendModel (y), rc, h) end rollingValidationTest3 //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `rollingValidationTest4` main function is used to test the rollValidate method * in the `RollingValidation` object. * Random Walk is used to make structure of the yf matrix clear. * > runMain scalation.modeling.forecasting.rollingValidationTest4 */ @main def rollingValidationTest4 (): Unit = val y = VectorD.range (1, 25) val h = 2 // forecasting horizon, try changing banner (s"RW full dataset results at forecasting horizon h = $h") val mod = new RandomWalk (y) // create an RW model mod.train (null, y) // train the model on full dataset val (yp, qof) = mod.test (null, y) // test the model on full dataset println (mod.report (qof)) // report on Quality of Fit (QoF) println (s"yp = $yp") // print prediction matrix val yf = mod.forecastAll (y, h) // produce all forecasts up horizon h println (s"yf = $yf") // print forecast matrix val rc = 2 // retrain cycle banner ("RW one-step ahead rolling validation results") RollingValidation.rollValidate (mod, rc) banner (s"RW $h-steps rolling validation results") RollingValidation.rollValidate (mod, rc, h) end rollingValidationTest4