//:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** @author John Miller * @version 1.6 * @date Wed Jun 17 12:08:35 EDT 2020 * @see LICENSE (MIT style license file). * * @title Model Framework: Rolling Validation for Forecasters */ package scalation.analytics package forecaster import scalation.linalgebra.{MatriD, MatrixD, VectoD, VectorD} import scalation.plot.Plot import scalation.stat.Statistic import scalation.util.banner import Fit._ //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `SimpleRollingValidation` object provides '1'-fold rolling validations, e.g., * for 'm = 1200' and 'k = 1', 'kt = 5': *

* 1: tr(ain) 0 until 600, te(st) 600 until 1200 *

* In rolling validation for this case, each retraining dataset has 600 instances, * while the testing dataset has 600. Re-training occurs before every 'kt = 2' * forecasts are made. */ object SimpleRollingValidation { private val DEBUG = true // debug flag private val DEBUG2 = false // verbose debug flag private val TR_RATIO = 0.5 // min ratio train to full datasets //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Calculate the size (number of instances) for a training dataset. * @param m the size of the full dataset */ def trSize (m: Int): Int = (m * TR_RATIO).toInt //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /* Use rolling '1'-fold cross-validation to compute test Quality of Fit (QoF) measures * by dividing the dataset into a test dataset and a training dataset. * The test dataset is defined by a range of indices (test start until start + 'te_size') * and 'tr_size' of the data before this is the training dataset. *------------------------------------------------------------------------- * This version is for models that have an 'x' component and 'y' component, e.g., `Regression4TS`. * @see `PredictorMat with ForecasterMat` for the types of models * @see analytics.package.scala for 'chopr' and 'shift_r' methods * @param model the forecastering model being used (e.g., `QuadRegression4TS`) * @param kt_ the frequency of re-training (number of forecasts to make before re-training) (defaults to 5) * @param h the forecasting horizon, number of steps ahead to produce forecasts (defaults to 1) */ def crossValidate (model: PredictorMat with ForecasterMat, kt_ : Int = 5, h: Int = 1): Array [Statistic] = { val x = model.getX // get the (opt. expanded) data/input matrix val y = model.getY // get the (opt. expanded) response/output vector val m = y.dim // number of instances in full dataset val tr_size = trSize (m) // size of each training dataset val te_size = m - tr_size // size of each testing dataset val kt = if (kt_ < 0) te_size else kt_ // given size or size of testing dataset if (DEBUG) println (s"crossValidate: m = $m, tr_size = $tr_size, te_size = $te_size, kt = $kt, h = $h") if (kt < h) flaw ("crossValidate", s"kt = $kt must be at least h = $h") val stats = qofStatTable // table of statistics for QoF measures var te = tr_size // start of initial testing region banner (s"crossValidate: iteration 0: test start te = $te") val (x_e, y_e, x_r, y_r) = chopr (x, y, te, te_size, tr_size) // chop out testing and training regions var xy = (x_r, y_r) // initial training dataset (matrix, vector) var ym = xy._2.mean // mean of actual training response val yf = new VectorD (y_e.dim) // vector to hold forecasts var rt = 0 // re-training counter for (i <- y_e.range) { // iterate thru testing instances if (i % kt == 0) { // trigger re-training every kt-th iteration rt += 1 if (i > 0) { xy = shift_r (xy, (x_e.slice (i-kt, i), y_e.slice (i-kt, i))) // update training dataset by shifting // ym = xy._2.mean // update training mean } // if model.train (xy._1, xy._2) // periodically re-train model on updated training dataset if (DEBUG2) println (s"crossValidate: rt = $rt, parameter = ${model.parameter}") } // if // yf(i) = model.predict (x_e(i)) // save i-th forecasted value for h = 1 yf(i) = model.forecast (x_e, i, h) // save i-th forecasted value } // for // FIX - what should the mean be: ym (from tr) or ym2 (from te)? // val ym2 = y_e.mean model.eval (ym, y_e, yf) // evaluate model on testing dataset val qof = model.fit // get Quality of Fit (QoF) measures tallyQof (stats, qof) if (DEBUG) println (s"number of re-trainings rt = $rt \nqof = " + qof) if (DEBUG) println (model.report + "\n" + model.summary) if (DEBUG) new Plot (null, y_e, yf, s"crossValidate (h = $h): ${model.modelName} fold 0", lines = true) // plot actual test response against forecasted test response stats // return the statistics table } // crossValidate //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /* Use rolling '1'-fold cross-validation to compute test Quality of Fit (QoF) measures * by dividing the dataset into a test dataset and a training dataset. * The test dataset is defined by a range of indices (test start until start + 'te_size') * and 'tr_size' of the data before this is the training dataset. *------------------------------------------------------------------------- * This version is for models that have no 'x' component, only the 'y' component, e.g., `AR`. * @see `ForecasterVec` for the types of models * @see analytics.package.scala for 'chopr' and 'shift_r' methods * @param model the forecastering model being used (e.g., `ARIMA`) * @param kt_ the frequency of re-training (number of forecasts to make before re-training) (defaults to 5) * @param h the forecasting horizon, number of steps ahead to produce forecasts (defaults to 1) */ def crossValidate2 (model: ForecasterVec, kt_ : Int = 5, h: Int = 1): Array [Statistic] = { val y = model.getY // get the (opt. expanded) response/output vector val m = y.dim // number of instances in full dataset val tr_size = trSize (m) // size of each training dataset val te_size = m - tr_size // size of each testing dataset val kt = if (kt_ < 0) te_size else kt_ // given size or size of testing dataset if (DEBUG) println (s"crossValidate2: m = $m, tr_size = $tr_size, te_size = $te_size, kt = $kt, h = $h") if (kt < h) flaw ("crossValidate2", s"kt = $kt must be at least h = $h") val stats = qofStatTable // table of statistics for QoF measures var te = tr_size // start of initial testing region banner (s"crossValidate2: iteration 0: test start te = $te") val (y_e, y_r) = chopr (y, te, te_size, tr_size) // chop out testing and training regions var yy = y_r // initial training dataset (vector) var ym = yy.mean // mean of actual training response val yf = new VectorD (y_e.dim) // vector to hold forecasts var rt = 0 // re-training counter // for (i <- y_e.range) { // iterate thru testing instances for (i <- 0 until yf.dim-h+1) { // iterate thru testing instances if (i % kt == 0) { // trigger re-training every kt-th iteration rt += 1 if (i > 0) { yy = shift_r (yy, y_e.slice (i-kt, i)) // update training dataset by shifting // ym = yy.mean // update training mean } // if model.train (null, yy) // periodically re-train model on updated training dataset if (DEBUG2) println (s"crossValidate2: rt = $rt, parameter = ${model.parameter}") } // if // use time t = tr_size + i to adjust the index with respect to the original y yf(i+h-1) = model.forecastX (y, tr_size + i, h, i % kt) // save i-th forecasted value } // for for (i <- 0 until h-1) yf(i) = y_e(i) // when h > 1, fill in initial blanks in yf with actual y values // FIX - what should the mean be: ym (from tr) or ym2 (from te)? // val ym2 = y_e.mean // model.eval (ym, y_e, yf) // evaluate model on testing dataset // val e = y_e - yf // must create local e since the original e may be required for MA models // model.diagnose (e, y_e, yf) model.evalf (y_e, yf) val qof = model.fit // get Quality of Fit (QoF) measures tallyQof (stats, qof) if (DEBUG) println (s"number of re-trainings rt = $rt \nqof = " + qof) if (DEBUG) println (model.report) if (DEBUG) new Plot (null, y_e, yf, s"crossValidate2 (h = $h): ${model.modelName} fold 0", lines = true) // plot actual test response against forecasted test response stats // return the statistics table } // crossValidate2 //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /* Use rolling '1'-fold cross-validation to compute test Quality of Fit (QoF) measures * by dividing the dataset into a test dataset and a training dataset. * The test dataset is defined by a range of indices (test start until start + 'te_size') * and 'tr_size' of the data before this is the training dataset. *------------------------------------------------------------------------- * This version is for models that have an 'x' component and 'y' component, e.g., `NeuralNet_3L1_4TS`. * @see `PredictorMat2 with ForecasterMat` for the types of models * @see analytics.package.scala for 'chopr' and 'shift_r' methods * @param model the forecastering model being used (e.g., `NeuralNet_3L1_4TS`) * @param kt_ the frequency of re-training (number of forecasts to make before re-training) (defaults to 50) * @param h the forecasting horizon, number of steps ahead to produce forecasts (defaults to 1) */ def crossValidate3 (model: PredictorMat2 with ForecasterMat, kt_ : Int = 50, h: Int = 1): Array [Statistic] = { val x = model.getX // get the (opt. expanded) data/input matrix val y = model.getY // get the (opt. expanded) response/output vector val m = y.dim // number of instances in full dataset val tr_size = trSize (m) // size of each training dataset val te_size = m - tr_size // size of each testing dataset val kt = if (kt_ < 0) te_size else kt_ // given size or size of testing dataset if (DEBUG) println (s"crossValidate3: m = $m, tr_size = $tr_size, te_size = $te_size, kt = $kt, h = $h") if (kt < h) flaw ("crossValidate3", s"kt = $kt must be at least h = $h") val stats = qofStatTable // table of statistics for QoF measures var te = tr_size // start of initial testing region banner (s"crossValidate3: iteration 0: test start te = $te") val (x_e, y_e, x_r, y_r) = chopr (x, y, te, te_size, tr_size) // chop out testing and training regions var xy = (x_r, y_r) // initial training dataset (matrix, vector) var ym = xy._2.mean // mean of actual training response val yf = new VectorD (y_e.dim) // vector to hold forecasts var rt = 0 // re-training counter for (i <- y_e.range) { // iterate thru testing instances if (i % kt == 0) { // trigger re-training every kt-th iteration rt += 1 if (i > 0) { xy = shift_r (xy, (x_e.slice (i-kt, i), y_e.slice (i-kt, i))) // update training dataset by shifting // ym = xy._2.mean // update training mean } // if model.train (xy._1, xy._2) // periodically re-train model on updated training dataset if (DEBUG2) println (s"crossValidate3: rt = $rt, parameter = ${model.parameter}") } // if // yf(i) = model.predict (x_e(i)) // save i-th forecasted value for h = 1 yf(i) = model.forecast (x_e, i, h) // save i-th forecasted value } // for // FIX - what should the mean be: ym (from tr) or ym2 (from te)? // val ym2 = y_e.mean model.eval (ym, y_e, yf) // evaluate model on testing dataset val qof = model.fitA(0).fit // get Quality of Fit (QoF) measures tallyQof (stats, qof) if (DEBUG) println (s"number of re-trainings rt = $rt \nqof = " + qof) // if (DEBUG) println (model.report + "\n" + model.summary) if (DEBUG) new Plot (null, y_e, yf, s"crossValidate3 (h = $h): ${model.modelName} fold 0", lines = true) // plot actual test response against forecasted test response stats // return the statistics table } // crossValidate3 } // SimpleRollingValidation object //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `SimpleRollingValidationTest` object is used to test the 'crossValidate' method * in the `SimpleRollingValidation` object. * > runMain scalation.analytics.forecaster.SimpleRollingValidationTest */ object SimpleRollingValidationTest extends App { import scalation.random.Normal import scalation.math.double_exp val m = 1200 // number of instances val x = new MatrixD (m, 2) // data/input matrix val y = new VectorD (m) // response/output vector val e = Normal (0, 20000000) // noise for (i <- y.range) { val j = i + 1 x(i, 0) = 0.0000001 * (j - m/2)~^3 * - 5 * j x(i, 1) = 10 * j - 0.0001 * j~^2 y(i) = 10.0 + 3 * x(i, 0) + 2 * x(i, 1) + e.gen } // for val h = 1 // forecasting horizon, try changing val model = new Regression4TS (x, y) banner (s"Regression4TS full dataset results at forecasting horizon h = $h") model.train (x, y).eval () banner (s"Regression4TS rolling validation results at forecasting horizon h = $h") showQofStatTable (SimpleRollingValidation.crossValidate (model, h = h)) } // SimpleRollingValidationTest object //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `SimpleRollingValidationTest2` object is used to test the 'crossValidate2' method * in the `SimpleRollingValidation` object. * > runMain scalation.analytics.forecaster.SimpleRollingValidationTest2 */ object SimpleRollingValidationTest2 extends App { import scalation.random.Normal import scalation.math.double_exp val m = 1200 // number of instances val y = new VectorD (m) // response/output vector val e = Normal (0, 100) // noise y(0) = 50.0 for (i <- 1 until y.dim) { y(i) = 0.8 * y(i-1) + e.gen } // for println (y.min (), y.max ()) val h = 2 // forecasting horizon, try changing ARMA.hp("p") = 2 val model = new AR (y) banner (s"AR full dataset results at forecasting horizon h = $h") model.train (null, y).eval () println (model.report) banner (s"AR rolling validation validation results at forecasting horizon h = $h") showQofStatTable (SimpleRollingValidation.crossValidate2 (model, h = h)) } // SimpleRollingValidationTest2 object