//:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** @author Hao Peng, John Miller, Michael Cotterell * @version 1.6 * @date Sat Jun 13 01:27:00 EST 2017 * @see LICENSE (MIT style license file). * * @title Model: Auto-Regressive * * @see http://en.wikipedia.org/wiki/Autoregressive%E2%80%93moving-average_model * @see http://www.emu.edu.tr/mbalcilar/teaching2007/econ604/lecture_notes.htm * @see http://www.stat.berkeley.edu/~bartlett/courses/153-fall2010 * @see http://www.princeton.edu/~apapanic/ORFE_405,_Financial_Time_Series_%28Fall_2011%29_files/slides12-13.pdf */ package scalation.analytics package forecaster import scala.collection.mutable.Set import scala.math.{max, min} import scalation.linalgebra.{MatriD, MatrixD, VectoD, VectorD} import scalation.math.noDouble import scalation.plot.Plot import scalation.random.{Normal, Uniform} import scalation.stat.{Statistic, vectorD2StatVector} import scalation.util.banner import Fit._ import ForecasterVec.{MAX_LAGS, testInSamp} import RollingValidation.trSize // FIX - don't use actual y values for first p predictions - compare with ARIMA //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `AR` class provides basic time series analysis capabilities for Auto- * Regressive 'AR'. In an 'AR(p)' model, 'p' refers to the order of the * Auto-Regressive components of the model. `AR` models are often used for forecasting. * Given time series data stored in vector 'y', its next value 'y_t = y(t)' * may be predicted based on prior values of 'y' and its noise: *

* y_t = δ + Σ(φ_k y_t-k) *

* where 'δ' is a constant, 'φ' is the autoregressive coefficient vector, * and 'e' is the noise vector. *---------------------------------------------------------------------------------- * @param y the response vector (time-series data) * @param hparam the hyper-parameters */ class AR (y: VectoD, hparam: HyperParameter = ARMA.hp) extends ForecasterVec (y, hparam("p").toInt, hparam) { private val DEBUG = false // debug flag private var p = hparam("p").toInt // p-th order Auto-Regressive model private var φ: VectoD = null // AR(p) parameters/coefficients private var δ: Double = 0.0 // drift/constant term private var trained = false // has trained been called? if (p > MAX_LAGS) flaw ("AR", s"p = $p must not be greater than MAX_LAGS = $MAX_LAGS") //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Return the model name including its current hyper-parameter, e.g., AR(2). */ override def modelName: String = s"AR($p)" //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Train/fit an `AR` model to the times-series data in vector 'y_'. * Estimate the coefficient vector 'φ' for a 'p'th order Auto-Regressive 'AR(p)' * model. *

* z_t = φ_0 * z_t-1 + ... + φ_p-1 * z_t-p + e_t *

* Uses the Durbin-Levinson Algorithm to determine the coefficients. * The 'φ' vector is 'p'th row of 'psi' matrix (ignoring the first (0th) column). * @param x_null the data/input matrix (ignored) * @param y_ the response vector (currently only works for y) - FIX */ override def train (x_null: MatriD, y_ : VectoD): AR = { super.train (null, y_) trained = true retrain (p) this } // train //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Retrain/fit an `AR` model to the times-series data using another order 'p_' * Estimate the coefficient vector 'φ' for a 'p'th order Auto-Regressive 'AR(p)' * model. * The 'φ' vector is 'p'th row of 'psi' matrix (ignoring the first (0th) column). * @param p_ another order */ def retrain (p_ : Int): AR = { if (! trained) flaw ("retrain", "train must be called before retrain") p = p_ resetDF (p, m - p) φ = psi(p).slice (1, p+1) // AR(p) coefficients: φ_0, ..., φ_p-1 if (DEBUG) println (s"parameters for AR($p): φ = $φ") δ = stats.mu * (1 - φ.sum) this } // retrain //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Return the parameter vector. */ def parameter: VectoD = φ //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Return a detailed summary of the trained model. */ def summary: String = { super.summary ("φ", s"$δ + $φ dot [y_(t-1), ... y_(t-p)]") } // summary //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Predict values for all time points using 1-step ahead forecasts. * Return a vector that is the predictions (zero-centered) of a 'p'th order * Auto-Regressive 'AR(p)' model. * @see 'predictAll' in `ForecasterVec` for uncentered results */ def predictAllz (): VectoD = { val zp = new VectorD (m) // forecasts for all time points t for (t <- 0 until p) zp(t) = z(t) // copy first p actual values into zp for (t <- p until m) { var sum = 0.0 for (j <- 0 until p) sum += φ(j) * zp(max (0, t-1-j)) zp(t) = sum } // for zp // return vector of predicted values } // predictAllz override def predictAll (): VectoD = forecastAll (1).col (1) //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Forecast values for all 'm' time points and all horizons (1 through 'h'-steps ahead). * Record these in the 'yf' matrix, where *

* yf(t, k) = k-steps ahead forecast for y_t *

* Note, 'yf.col(0)' is set to 'y' (the actual time-series values). * @param h the maximum forecasting horizon, number of steps ahead to produce forecasts */ def forecastAll (h: Int): MatriD = { yf = new MatrixD (m, h+1) // forecasts for all time points t & horizons to h yf.setCol (0, y) // first column is actual values, horizon 0 for (k <- 1 to h) { val c = min (k, p) // cut point from actual to forecasted values for (t <- 0 until c) yf(t, k) = y(t) // copy first c actual values for (t <- c until m) { // forecast the rest var sum = δ for (j <- 0 until p) sum += φ(j) * yf(max (0, t-1-j), max (0, k-1-j)) yf(t, k) = sum } // for if (DEBUG) println (s"forecastAll: yf.col ($k) = ${yf.col (k)}") } // for yf // return matrix of forecasted values } // forecastAll //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Produce h-steps ahead forecast on the testing data during cross validation. * @param y the current response vector * @param t the time point to be forecast * @param h the forecasting horizon, number of steps ahead to produce forecast */ override def forecastX (y: VectoD = y, t: Int = y.dim, h: Int = 1): Double = { if (t > m) flaw ("forecast", "no forecasts with starting t > m are provided") val zf = new VectorD (p+h) // Must calculate the z values by doing y - mu on the fly because these values are beyond the bounds of the z vector for (l <- 0 until p) zf(l) = y(max (0, t-p+l)) - stats.mu // copy first p values into zf. for (k <- 1 to h) { // advance the forecasting horizon var sum = 0.0 for (j <- 0 until p) sum += φ(j) * zf(p-2+k-j) zf(p-1+k) = sum } // for zf.last + stats.mu // return the last forecast } // forecastX //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Perform forward selection to find the most predictive variables to add the * existing model, returning the variables to add and the new model. * Note, all lags up and including 'p' define the model. * FIX - select subsets of the lags, e.g., Set (1, 2, 5) * @see `Fit` for index of QoF measures. * @param cols the lags/columns currently included in the existing model (ignored) * @param index_q index of Quality of Fit (QoF) to use for comparing quality */ def forwardSel (cols: Set [Int] = null, index_q: Int = index_rSq): (Int, AR) = { val rSq = new MatrixD (MAX_LAGS, 3) // R^2, R^2 Bar, R^2 cv var p_mx = -1 // best column, so far var mod_mx = null.asInstanceOf [AR] // best model, so far var fit_mx = noDouble // best fit, so far for (p <- 1 to MAX_LAGS) { retrain (p).eval () // retrain model, evaluate QoF val fit_p = fit(index_q) // new fit if (fit_p > fit_mx) { p_mx = p; mod_mx = this; fit_mx = fit_p } rSq(p-1) = VectorD (fit_p, -1.0, -1.0) // use new model, mod_p, no cross } // for if (p_mx == -1) { flaw ("forwardSel", "could not add additional lags") } // if println (s"forwardSel: add $p_mx giving qof = $fit_mx") println (s"rSq = $rSq") new Plot (null, rSq.col(0), lines = true) (p_mx, mod_mx) } // forwardSel } // AR class //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `AR` companion object provides factory methods for the `AR` class. */ object AR { //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Create an `AR` object. * @param y the response vector (time series data) * @param hparam the hyper-parameters */ def apply (y: VectoD, hparam: HyperParameter = ARMA.hp): AR = { new AR (y, hparam) } // apply } // AR object //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `ARTest` object is used to test the `AR` class on simulated data with * uniformly distributed errors. * > runMain scalation.analytics.forecaster.ARTest */ object ARTest extends App { val m = 100 val noise = Uniform (-5, 5) val y = VectorD (for (i <- 0 until m) yield i + noise.gen) banner ("Build AR(1) Model") val ar = new AR (y) // create model for time series data ar.train (null, y).eval () // train the model and evaluate testInSamp (y, 1, ar, 1) // test in-sample banner ("Select model based on ACF and PACF") ar.plotFunc2 (ar.acF, "ACF") ar.plotFunc2 (ar.pacF, "PACF") banner ("Build AR(2) Model") ar.retrain (2).eval () // retrain for AR(2) model testInSamp (y, 2, ar, 1) // test in-sample } // ARTest object //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `ARTest2` object is used to test the `AR` class on simulated data with * normally distributed errors. * The order 'p' hyper-parameter should be re-assigned (try 1, 2, 3, ...). * > runMain scalation.analytics.forecaster.ARTest2 */ object ARTest2 extends App { val m = 30 val noise = Normal (0, 2) val y = VectorD (for (i <- 0 until m) yield i + noise.gen) for (p <- 1 to 5) { banner (s"Build AR($p) Model") ARMA.hp("p") = p // reassign hyper-parameter p val ar = new AR (y) // create model for time series data ar.train (null, y).eval () // train the model and evaluate testInSamp (y, p, ar, 1) // test in-sample banner ("Make Forecasts") val steps = 10 // number of steps for the forecasts val yf = ar.forecast (m, steps) println (s"$steps-step ahead forecasts using AR($p) model = $yf") new Plot (null, y ++ yf, null, s"Plot ar($p) forecasts vs. t", true) } // for } // ARTest2 object import ARMA.hp //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `ARTest3` object is used to test the `AR` class on real data: * Forecasting lake levels. * @see cran.r-project.org/web/packages/fpp/fpp.pdf * > runMain scalation.analytics.forecaster.ARTest3 */ object ARTest3 extends App { import ForecasterVec.y var ar: AR = null for (h <- 1 to 2) { // forecasting horizon for (p <- 1 to 4) { // autoregressive hyper-parameter hp("p") = p // set p hyper-parameter banner (s"Test3: AR($p) with h = $h") ar = new AR (y) // create model for time series data ar.train (null, y).eval () // train the model and evaluate val yf = ar.forecastAll (h) val yf2 = testInSamp (y, p, ar, h) // in-sample test } // for } // for banner ("Select model based on ACF and PACF") ar.plotFunc2 (ar.acF, "ACF") ar.plotFunc2 (ar.pacF, "PACF") banner ("Rolling Validation") val stats = SimpleRollingValidation.crossValidate2 (ar, kt_ = 5) // val stats = RollingValidation.crossValidate2 (ar, kt_ = 5) Fit.showQofStatTable (stats) } // ARTest3 object //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `ARTest4` object is used to test the `AR` class on real data: * Forecasting traffic flow. * The order hyper-parameter 'p' should be reassigned (try 1, 2, 3, ...). * > runMain scalation.analytics.forecaster.ARTest4 */ object ARTest4 extends App { val path = BASE_DIR + "travelTime.csv" val data = MatrixD (path) val (t, y) = (data.col(0), data.col(1)) var ar: AR = null for (h <- 1 to 5) { // forecasting horizon for (p <- 1 to 5) { // autoregressive hyper-parameter hp("p") = p // set p hyper-parameter banner (s"Build AR($p) model") ARMA.hp("p") = p // reassign hyper-parameter p ar = new AR (y) // create model for time series data ar.train (null, y).eval () // train the model and evluate testInSamp (y, p, ar, h) // test in-sample banner ("Make Forecasts") val yf = ar.forecast (h) println (s"$h steps ahead forecasts using AR($p) model = $yf") banner ("Rolling Validation") val stats = SimpleRollingValidation.crossValidate2 (ar, kt_ = 5, h) // val stats = RollingValidation.crossValidate2 (ar, kt_ = 5, h) Fit.showQofStatTable (stats) } // for } // for } // ARTest4 object //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `ARTest5` object is used to test the `AR` class on simulated data with normally * distributed errors and the response exhibits a quadratic trend. * Note: should have p <= n / 10 for reliable QoF results from ScalaTion (FIX). * > runMain scalation.analytics.forecaster.ARTest5 */ object ARTest5 extends App { val m = 50 val steps = 2 // number of steps for the forecasts val sig2 = 10000.0 val noise = Normal (0.0, sig2) val t = VectorD.range (0, m) val y = VectorD (for (i <- 0 until m) yield 45 * (i-1) - (i-2) * (i-2) + noise.gen) banner ("Build AR(1) model") val ar = new AR (y) // create model for time series data ar.train (null, y).eval () // train the model and evaluate println (ar.report) new Plot (t, y, ar.predictAll (), s"Plot of y, AR(1) vs. t", true) for (p <- 2 to 5) { banner (s"Build AR($p) model") ar.retrain (p).eval () // retrain for AR(p) model testInSamp (y, p, ar, 1) // test in-sample banner ("Make Forecasts") val yf = ar.forecast (steps) println (s"$steps-step ahead forecasts using AR($p) model = $yf") } // for banner ("Select model based on ACF and PACF") ar.plotFunc2 (ar.acF, "ACF") ar.plotFunc2 (ar.pacF, "PACF") } // ARTest5 object //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `ARTest6` object is used to test the `AR` class on real data: * Forecasting lake levels. Performs cross-validation. * @see cran.r-project.org/web/packages/fpp/fpp.pdf * > runMain scalation.analytics.forecaster.ARTest6 */ object ARTest6 extends App { import ForecasterVec.{t, y} banner ("Response Vector y") println (s"y = $y") banner ("Build AR(1) Model") val ar = new AR (y) // create model for time series data ar.train (null, y).eval () // train the model and evaluate testInSamp (y, 1, ar, 1) // test in-sample banner ("Cross-Validation") SimpleRollingValidation.crossValidate2 (ar) // perform cross-validation // RollingValidation.crossValidate2 (ar) // perform cross-validation - crashes FIX } // ARTest6 object //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `ARTest7` object is used to test the `AR` class on real data: * Generate an AR(2) process that results in rho1 being 0. * @see cran.r-project.org/web/packages/fpp/fpp.pdf * > runMain scalation.analytics.forecaster.ARTest7 */ object ARTest7 extends App { banner ("Demontrate rho1 = 0 and rho2 > 0") val m = 100 val y = new VectorD (m) val noise = Normal (0, 1) val phi = VectorD (0.072, 0.8) y(0) = noise.gen; y(1) = noise.gen for (t <- 2 until m) y(t) = phi(0) * y(t-1) + phi(1) * y(t-2) + noise.gen banner ("Build AR(1) Model") val ar = new AR (y) // create model for time series data ar.train (null, y).eval () // train the model and evaluate testInSamp (y, 1, ar, 1) // test in-sample banner ("Build AR(2) Model") ar.retrain (2).eval () // retrain for AR(2) model testInSamp (y, 2, ar, 1) // test in-sample banner ("Plot the ACF and PACF") ar.plotFunc2 (ar.acF, "ACF") ar.plotFunc2 (ar.pacF, "PACF") } // ARTest7 object //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `ARTest8` object is used to test the `AR` class on real data: * Forecasting lake levels. Select the best number of lags. * @see cran.r-project.org/web/packages/fpp/fpp.pdf * > runMain scalation.analytics.forecaster.ARTest8 */ object ARTest8 extends App { import ForecasterVec.y hp("p") = 1 // set p hyper-parameter val ar = new AR (y) // create model for time series data ar.train (null, y).eval () // train the model and evaluate val res = ar.forwardSel () println (s"forwardSel: $res") } // ARTest8 object