//:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** @author John Miller * @version 1.6 * @date Sat Jun 13 01:27:00 EST 2017 * @see LICENSE (MIT style license file). * * @title Model: Auto-Regressive with p = 2 * * @see http://en.wikipedia.org/wiki/Autoregressive%E2%80%93moving-average_model * @see http://www.emu.edu.tr/mbalcilar/teaching2007/econ604/lecture_notes.htm * @see http://www.stat.berkeley.edu/~bartlett/courses/153-fall2010 * @see http://www.princeton.edu/~apapanic/ORFE_405,_Financial_Time_Series_%28Fall_2011%29_files/slides12-13.pdf */ package scalation.analytics package forecaster import scala.collection.mutable.Set import scala.math.{max, min} import scalation.linalgebra.{MatriD, MatrixD, VectoD, VectorD} import scalation.math.noDouble import scalation.plot.Plot import scalation.random.{Normal, Uniform} import scalation.stat.{Statistic, vectorD2StatVector} import scalation.util.banner import Fit._ import ForecasterVec.{MAX_LAGS, testInSamp} import RollingValidation.trSize // FIX - don't use actual y values for first p predictions - compare with ARIMA //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `AR_2` class provides basic time series analysis capabilities for Auto- * Regressive 'AR'. In an 'AR_2' model, 1 refers to the order of the * Auto-Regressive components of the model. `AR_2` models are often used for forecasting. * Given time series data stored in vector 'y', its next value 'y_t = y(t)' * may be predicted based on prior values of 'y' and its noise: *

* y_t = c + Σ(φ_k y_t-k) *

* where 'c' is a constant, 'φ' is the autoregressive coefficient vector, * and 'e' is the noise vector. *---------------------------------------------------------------------------------- * @param y the response vector (time-series data) */ class AR_2 (y: VectoD) extends ForecasterVec (y, 1, null) with NoFeatureSelectionF { private val DEBUG = false // debug flag private val φ = new VectorD (2) // AR_2 parameters/coefficients private var δ: Double = 0.0 // drift/constant term //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Return the model name including its current hyper-parameter, i.e., AR_2. */ override def modelName: String = "AR_2" //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Train/fit an `AR_2` model to the times-series data in vector 'y_'. * Estimate the coefficient vector 'φ' for a 1-st order Auto-Regressive 'AR_2' * model. *

* z_t = φ_0 * z_t-1 + e_t *

* @param x_null the data/input matrix (ignored) * @param y_ the response vector (currently only works for y) - FIX */ override def train (x_null: MatriD, y_ : VectoD): AR_2 = { super.train (null, y_) val (r1, r2) = (acF(1), acF(2)) val mult = 1.0 / (1 - r1 * r1) φ(0) = r1 * (1 - r2) * mult φ(1) = (r2 - r1 * r1) * mult δ = stats.mu * (1 - φ.sum) this } // train //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Return the parameter vector. */ def parameter: VectoD = φ //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Return a detailed summary of the trained model. */ def summary: String = { super.summary ("φ", s"$δ + $φ dot [y_(t-1), y_(t-2)]") } // summary //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Predict values for all time points using 1-step ahead forecasts. * Return a vector that is the predictions (zero-centered) of a 1-st order * Auto-Regressive 'AR_2' model. * @see 'predictAll' in `ForecasterVec` for uncentered results */ def predictAllz (): VectoD = { val zp = new VectorD (m) // forecasts for all time points t zp(0) = z(0) // copy first actual value into zp for (t <- 1 until m) { zp(t) = φ(0) * z(t-1) + φ(1) * z(max (0, t-2)) // estimate the rest values for zp } // for zp // return vector of predicted values } // predictAllz //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Forecast values for all 'm' time points and all horizons (1 through 'h'-steps ahead). * Record these in the 'yf' matrix, where *

* yf(t, k) = k-steps ahead forecast for y_t *

* Note, 'yf.col(0)' is set to 'y' (the actual time-series values). * @param h the maximum forecasting horizon, number of steps ahead to produce forecasts */ def forecastAll (h: Int): MatriD = { yf = new MatrixD (m, h+1) // forecasts for all time points t & horizons to h yf.setCol (0, y) // first column is actual values, horizon 0 for (k <- 1 to h) { val c = min (k, 2) // cut point from actual to forecasted values for (t <- 0 until c) yf(t, k) = y(t) // copy first c actual values for (t <- c until m) { // forecast the rest yf(t, k) = δ + φ(0) * yf(t-1, k-1) + φ(1) * yf(max (0, t-2), max (0, k-2)) } // for if (DEBUG) println (s"forecastAll: yf.col ($k) = ${yf.col (k)}") } // for yf // return matrix of forecasted values } // forecastAll //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Produce h-steps ahead forecast on the testing data during cross validation. * Note, must calculate the z values by doing y - mu on the fly because these * values are beyond the bounds of the z vector. * @param y the current response vector * @param t the time point to be forecast * @param h the forecasting horizon, number of steps ahead to produce forecast */ override def forecastX (y: VectoD = y, t: Int = y.dim, h: Int = 1): Double = { if (t > m) flaw ("forecastX", "no forecasts with starting t > m are provided") val zf = new VectorD (2+h) zf(0) = y(max (0, t-2)) - stats.mu // copy first value into zf. zf(1) = y(max (0, t-1)) - stats.mu // copy second value into zf. for (k <- 1 to h) zf(k+1) = φ(0) * zf(k) + φ(1) * zf(k-1) // forecast the rest zf.last + stats.mu // return the last forecast } // forecastX } // AR_2 class //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `AR_2` companion object provides factory methods for the `AR_2` class. */ object AR_2 { //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Create an `AR_2` object. * @param y the response vector (time series data) */ def apply (y: VectoD): AR_2 = { new AR_2 (y) } // apply } // AR_2 object //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `AR_2Test` object is used to test the `AR_2` class on simulated data with * uniformly distributed errors. * > runMain scalation.analytics.forecaster.AR_2Test */ object AR_2Test extends App { val m = 100 val noise = Uniform (-5, 5) val y = VectorD (for (i <- 0 until m) yield i + noise.gen) banner ("Build AR_2 Model") val ar = new AR_2 (y) // create model for time series data ar.train (null, y).eval () // train model and evaluate testInSamp (y, 2, ar, 1) // in-sample test banner ("Select model based on ACF and PACF") ar.plotFunc2 (ar.acF, "ACF") ar.plotFunc2 (ar.pacF, "PACF") } // AR_2Test object //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `AR_2Test2` object is used to test the `AR_2` class on simulated data with * normally distributed errors. * > runMain scalation.analytics.forecaster.AR_2Test2 */ object AR_2Test2 extends App { val m = 30 val noise = Normal (0, 2) val y = VectorD (for (i <- 0 until m) yield i + noise.gen) banner (s"Build AR_2 Model") val ar = new AR_2 (y) // create model for time series data ar.train (null, y).eval () // train the model and evaluate testInSamp (y, 2, ar, 1) // in-sample test banner ("Make Forecasts") val steps = 10 // number of steps for the forecasts val yf = ar.forecast (m, steps) println (s"$steps-step ahead forecasts using AR_2 model = $yf") val tf = VectorD.range (0, m + steps) new Plot (tf, y ++ yf, null, s"Plot AR_2 forecasts vs. t", true) } // AR_2Test2 object //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `AR_2Test3` object is used to test the `AR_2` class on real data: * Forecasting lake levels. * @see cran.r-project.org/web/packages/fpp/fpp.pdf * > runMain scalation.analytics.forecaster.AR_2Test3 */ object AR_2Test3 extends App { import ForecasterVec.y var ar: AR_2 = null for (h <- 1 to 2) { // forecasting horizon banner (s"Test3: AR_2 with h = $h") ar = new AR_2 (y) // create mode for time series data ar.train (null, y).eval () // train the model and evaluate val yf = ar.forecastAll (h) val yf2 = testInSamp (y, 2, ar, h) // in-sample test println (ar.summary) } // for banner ("Select model based on ACF and PACF") ar.plotFunc2 (ar.acF, "ACF") ar.plotFunc2 (ar.pacF, "PACF") banner ("Rolling Validation") val stats = SimpleRollingValidation.crossValidate2 (ar, kt_ = 5) // val stats = RollingValidation.crossValidate2 (ar, kt_ = 5) Fit.showQofStatTable (stats) } // AR_2Test3 object //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `AR_2Test4` object is used to test the `AR_2` class on real data: * Forecasting traffic flow. * > runMain scalation.analytics.forecaster.AR_2Test4 */ object AR_2Test4 extends App { val path = BASE_DIR + "travelTime.csv" val data = MatrixD (path) val (t, y) = (data.col(0), data.col(1)) var ar: AR_2 = null for (h <- 1 to 5) { // forecasting horizon banner (s"Build AR_2 model") ar = new AR_2 (y) // create model for time series data ar.train (null, y).eval () // train the model and evaluate testInSamp (y, 2, ar, h) // in-sample test banner ("Make Forecasts") val yf = ar.forecast (h) println (s"$h steps ahead forecasts using AR_2 model = $yf") banner ("Rolling Validation") val stats = SimpleRollingValidation.crossValidate2 (ar, kt_ = 5, h) // val stats = RollingValidation.crossValidate2 (ar, kt_ = 5, h) Fit.showQofStatTable (stats) } // for } // AR_2Test4 object //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `AR_2Test5` object is used to test the `AR_2` class on simulated data with normally * distributed errors and the response exhibits a quadratic trend. * > runMain scalation.analytics.forecaster.AR_2Test5 */ object AR_2Test5 extends App { val m = 50 val steps = 2 // number of steps for the forecasts val sig2 = 10000.0 val noise = Normal (0.0, sig2) val y = VectorD (for (i <- 0 until m) yield 45 * (i-1) - (i-2) * (i-2) + noise.gen) banner ("Build AR_2 model") val ar = new AR_2 (y) // create model for time series data ar.train (null, y).eval () // train the model and evaluate testInSamp (y, 2, ar, 1) // in-sample test banner ("Select model based on ACF and PACF") ar.plotFunc2 (ar.acF, "ACF") ar.plotFunc2 (ar.pacF, "PACF") } // AR_2Test5 object //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `AR_2Test6` object is used to test the `AR_2` class on real data: * Forecasting lake levels. Performs cross-validation. * @see cran.r-project.org/web/packages/fpp/fpp.pdf * > runMain scalation.analytics.forecaster.AR_2Test6 */ object AR_2Test6 extends App { import ForecasterVec.{t, y} banner ("Response Vector y") println (s"y = $y") banner ("Build AR_2 Model") val ar = new AR_2 (y) // time series data ar.train (null, y).eval () // train the model and evaluate testInSamp (y, 2, ar, 1) // in-sample test banner ("Cross-Validation") SimpleRollingValidation.crossValidate2 (ar) // perform cross-validation // RollingValidation.crossValidate2 (ar) // perform cross-validation - crashes FIX } // AR_2Test6 object