//:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** @author Hao Peng, John Miller * @version 1.6 * @date Sat Jun 13 01:27:00 EST 2017 * @see LICENSE (MIT style license file). * * @see http://en.wikipedia.org/wiki/Autoregressive%E2%80%93moving-average_model * @see http://www.emu.edu.tr/mbalcilar/teaching2007/econ604/lecture_notes.htm * @see http://www.stat.berkeley.edu/~bartlett/courses/153-fall2010 * @see http://www.princeton.edu/~apapanic/ORFE_405,_Financial_Time_Series_%28Fall_2011%29_files/slides12-13.pdf * * @title Auto-Regressive, Integrated, Moving Average (ARIMA) Model */ package scalation.analytics package forecaster import scala.math.{max, sqrt} import scalation.linalgebra._ import scalation.math.double_exp import scalation.minima.QuasiNewton import scalation.plot.Plot import scalation.random.{Normal, Uniform} import scalation.util.banner import ForecasterVec.testInSamp //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Companion object for class `ARIMA`. Includes features related to differencing * and automated order selection. * @see www.jstatsoft.org/article/view/v027i03/v27i03.pdf */ object ARIMA { //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Return the 'd'th difference of the time-series for 'd' in {0, 1, 2, 3}. * A new vector is returned even when there is no difference taken ('d = 0'), * to ensure the original is preserved. * @param y the original time-series to be differenced * @param d the order of simple differencing */ def difference (y: VectoD, d: Int): VectoD = { d match { case 0 => y.asInstanceOf [VectorD].copy () // FIX - put copy in VectoD case 1 => VectorD (for (i <- 0 until y.dim-1) yield y(i+1) - y(i)) case 2 => VectorD (for (i <- 0 until y.dim-2) yield y(i+2) - 2*y(i+1) + y(i)) case 3 => VectorD (for (i <- 0 until y.dim-3) yield y(i+3) - 3*y(i+2) + 3*y(i+1) - y(i)) case _ => flaw ("difference", "ARIMA does not support differencing higher than order 3"); null } // match } // difference //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Transform the fitted values on the training data of a differenced time series back * to the original scale. Undo trend differencing only. * @see stats.stackexchange.com/questions/32634/difference-time-series-before-arima-or-within-arima * @param yp the vector of predicted/fitted values * @param y the original time-series vector * @param d the order of simple differencing */ def transformBack (yp: VectoD, y: VectoD, d: Int): VectoD = { d match { case 0 => yp case 1 => val tb = new VectorD (y.dim) tb(0) = y(0) for (i <- 0 until y.dim-1) tb(i+1) = yp(i) + y(i) tb case 2 => val tb = new VectorD (y.dim) tb(0) = y(0); tb(1) = y(1) for (i <- 0 until y.dim-2) tb(i+2) = yp(i) + 2*y(i+1) - y(i) tb case 3 => val tb = new VectorD (y.dim) tb(0) = y(0); tb(1) = y(1); tb(2) = y(2) for (i <- 0 until y.dim-3) tb(i+3) = yp(i) + 3*y(i+2) - 3*y(i+1) + y(i) tb case _ => flaw ("transformBack", "ARIMA does not support differencing higher than order 3"); null } // match } // transformBack //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Transform the forecasted values of a differenced time series back to the original * for all horizons scale. * @see stats.stackexchange.com/questions/32634/difference-time-series-before-arima-or-within-arima * @param ypa the matrix of all multi-horizon forecasted values * @param y the original time-series vector * @param d the order of simple differencing */ def transformBack_allH (ypa: MatriD, y: VectoD, d: Int): MatriD = { val tb = new MatrixD (ypa.dim1, ypa.dim2) tb.setCol (0, y) for (k <- 1 until ypa.dim2) tb.setCol (k, transformBack (ypa.col(k), y, d)) tb } // transformBack_allH //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Transform the forecast values of a differenced time series back to the * original scale. * @see stats.stackexchange.com/questions/32634/difference-time-series-before-arima-or-within-arima * @param yf the vector of forecasted values * @param y the original time series * @param d the order of simple differencing * @param t the time point being forecasted (@see the 'forecast' method) */ def transformBackF (yf: VectoD, y: VectoD, d: Int, t: Int): VectoD = { d match { case 0 => yf case 1 => val tb = y.slice (t - 1, t) ++ yf for (i <- 1 until tb.dim) tb(i) += tb(i-1) tb.slice (1) case 2 => val tb = y.slice (t - 2, t) ++ yf for (i <- 2 until tb.dim) tb(i) += (2*tb(i-1) - tb(i-2)) tb.slice (2) case 3 => val tb = y.slice (t - 3, t) ++ yf for (i <- 3 until tb.dim) tb(i) += (3*tb(i-1) - 3*tb(i-2) + tb(i-3)) tb.slice (3) case _ => flaw ("transformBackF", "ARIMA does not support differencing higher than order 3"); null } // match } // transformBackF } // ARIMA object import ARIMA._ //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `ARIMA` class provides basic time series analysis capabilities for Auto- * Regressive 'AR' Integrated 'I' Moving-Average 'MA' models. In an * 'ARIMA(p, d, q)' model, 'p' and 'q' refer to the order of the Auto-Regressive * and Moving-Average components of the model; 'd' refers to the order of * differencing. Given time series data stored in vector 'y', its next value 'y_t = y(t)' * may be predicted based on prior values of 'y' and its noise: *

* y_t = δ + Σ(φ_i y_t-i) + Σ(θ_i e_t-i) + e_t *

* where 'δ' is a constant, 'φ' is the auto-regressive coefficient vector, * 'θ' is the moving-average coefficient vector, and 'e' is the noise vector. *------------------------------------------------------------------------------ * If 'd > 0', then the time series must be differenced first before applying * the above model. *------------------------------------------------------------------------------ * @param y the original input vector (time series data) * @param d the order of Integration/simple differencing */ class ARIMA (y: VectoD, d: Int) extends ARMA (y) { private val DEBUG = true // debug flag differenced = d > 0 // flag indicating whether differencing will be applied init (y) // initialize vectors and parameters //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Initialize variables based on the working time-series 'v'. * Set/change the working time series. May be used to set the time series * to a different time window in order to produce newer forecast. * @param v the working vector/time-series */ override def init (v: VectoD) { mu = v.mean // sample mean z = difference (v, d) // take the d-th difference of the time series zp = new VectorD (z.dim) // predicted values prior to undifferencing/uncentering e = new VectorD (z.dim) // vector of errors/residuals sig2 = z.variance // sample variance } // init //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Return the model name including current hyper-parameters, e.g., ARIMA(2, 1, 1). */ override def modelName: String = s"ARIMA($p, $d, $q)" //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Train/fit an `ARIMA` model to the times series data. Must call 'setPQ' first. * It uses BFGS, a Quasi-Newton optimizer, to minimize the negative log-likelihood. */ override def train (): ARIMA = { val solver = new QuasiNewton (nll) // nonlinear optimizer val b = new VectorD (params + 1) // parameter values if (! differenced) b(b.size-2) = mu // sample mean, initial est. for μ parameter b(b.size-1) = sqrt (sig2) // sample standard deviation, initial est. for σ parameter solver.solve (b) // find b that maximizes likelihood δ = μ * (1 - φ.sum) // update drift value // δ = stats.mu * (1 - φ.sum) if (DEBUG) showParameterEstimates () this } // train //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Return the vector of predicted/fitted values on the training/full dataset. * Based on 'zp' calculated in the 'updateFittedValues' method. */ override def predictAll (): VectoD = { if (differenced) transformBack (zp, y, d) else zp + μ } // predictAll //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Produce h-steps-ahead forecast for ARIMA models. * @see ams.sunysb.edu/~zhu/ams586/Forecasting.pdf * @param t the time point from which to make forecasts (in the original scale) * @param h the number of steps to forecast, must be at least one */ override def forecast (t: Int = y.dim, h: Int = 1): VectoD = { if (t > y.dim) flaw ("forecast", s"t ($t) cannot be greater than y.dim (${y.dim})") val tz = t - d // scale t to match vector z and e if (tz < cap) flaw ("forecast", s"tz ($tz) must be at least cap ($cap)") val zf = new VectorD (cap + h) // forecasted centered values val e_ = new VectorD (cap + h) // available observed errors for (i <- 0 until cap if tz-cap+i >= 0) { // seed with first cap = max(p, q) values zf(i) = z(tz-cap+i) // copy first cap values e_(i) = e(tz-cap+i) // unveil first cap errors (observed in training) } // for for (i <- cap until zf.dim) { // start at t = cap (enough for first value to forecast) var sum = 0.0 for (j <- φ.range) sum += φ(j) * zf(i-1-j) for (j <- θ.range) sum += θ(j) * e_(i-1-j) zf(i) = sum } // for val f = zf.slice (cap) // dump first cap values if (differenced) transformBackF (f, y, d, t) else f + μ // return the vector of forecasts } // forecast //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Forecast values for all time points using 1 through 'h'-steps ahead forecasts. * The 'h'-th row of matrix is the horizon 'h' forecast (where 'h = 0' is actual data). * @param h the forecasting horizon, number of steps ahead to produce forecasts */ override def forecastAll (h: Int = 1): MatriD = { val yf = new MatrixD (y.dim, h+1) // forecasts for all horizons h & time points t yf.setCol(0, y) // first row is actual values val cut = cap + d // cut over from actual to forecasted values for (t <- y.range) { if (t < cut) { for (k <- 1 to h) yf(t, k) = y(t) // copy first cut observed values from y } else { val ft = forecast (t, h) // forecasts at time point t, horizons 1 to h for (k <- 1 to h if t+k-1 < y.dim) { yf(t+k-1, k) = ft(k-1) // place forecasts diagonally } // for } // if } // for // fill in blank values in first few rows where no forecasts can be produced by copying values from previous columns for (k <- 2 to h; t <- cut until cut+k-1) yf(t, k) = yf(t, k-1) // copy forecasted values yf // return matrix of forecasted values } // forecastAll //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Forecast values for all 'm' time points and all horizons (1 through 'h'-steps ahead). * Record these in the 'yf' matrix, where *

* yf(t, k) = k-steps ahead forecast for y_t *

* Note, 'yf.col(0)' is set to 'y' (the actual time-series values). * Do not forecast errors, rather use observed errors from training and make sure not * to use errors that would correspond to knowing future errors (all future errors should * be assumed to be 0). * @see https://otexts.com/fpp3/arima-forecasting.html, section 9.8 * @param h the maximum forecasting horizon, number of steps ahead to produce forecasts */ override def forecastAll2 (h: Int): MatriD = forecastAll (h) /*** FIX - values must be computed diagonially - does not work for d = 1, etc. (missing value at 'cut') override def forecastAll2 (h: Int): MatriD = { val zf = new MatrixD (y.dim, h+1) // forecast matrix: rows - time, cols - horizon for (t <- z.range) zf(t, 0) = z(t) // first column is actual values, horizon 0 val cut = cap + d // cut over from actual to forecasted values for (k <- 1 to h) { // loop through k-steps ahead forecasts val e_ = new VectorD (z.dim) // redetermine errors from a clean slate for (t <- 0 until cut) { // seed the first cap = max(p, q) values zf(t, k) = z(t) // copy first cap actual values e_(t) = e(t) // copy first cap errors (observed in training) } // for for (t <- cut until y.dim) { // forecast from cap to the end of time-series if (t-k >= 0) e_(t-k) = e(t-k) // unveil previous error at time t-k var sum = 0.0 for (j <- φ.range if t-j > 0) sum += φ(j) * zf(t-1-j, max (0, k-1-j)) for (j <- θ.range if t-j > 0) sum += θ(j) * e_(t-1-j) zf(t, k) = sum // centered forecast for time t } // for } // for println (s"forecastAll2: zf (${zf.dim1}) = $zf") if (differenced) transformBack_allH (zf, y, d) else zf + μ // return uncentered forecasts } // forecastAll2 ***/ //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Obtain residuals/errors in the original scale. */ override def residuals: VectoD = if (differenced) y - predictAll () else e } // ARIMA class //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `ARIMATest` object is used to test the `ARIMA` class. * > runMain scalation.analytics.forcaster.ARIMATest */ object ARIMATest extends App { println ("ARIMA!") val m = 100 val noise = Uniform (-5, 5) val y = VectorD (for (i <- 0 until m) yield i + noise.gen) val mod = new ARIMA (y, 1) // time series data: y vs. t, apply 1st order differencing // Build ARIMA(p, d, q) models mod.setPQ (VectorI (1, 0)).train ().eval_ () println (mod.report) new Plot (null, y, mod.predictAll (), "Plot of y, ar(1) vs. t", true) mod.setPQ (VectorI (2, 0)).train ().eval_ () println (mod.report) new Plot (null, y, mod.predictAll (), "Plot of y, ar(2) vs. t", true) mod.setPQ (VectorI (0, 3)).train ().eval_ () println (mod.report) new Plot (null, y, mod.predictAll () , "Plot of y, ma(3) vs. t", true) println (mod.forecastAll(3)) // mod.plotFunc2 (mod.acF, "ACF") // must call super.train, so that ACF is actually computed // mod.plotFunc2 (mod.pacF, "PACF") // must call super.train, so that PACF is actually computed } // ARIMATest object //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `ARIMATest2` object is used to test the `ARIMA` class. * > runMain scalation.analytics.forecaster.ARIMATest2 */ object ARIMATest2 extends App { val m = 20 val noise = Normal (0, 2) val y = VectorD (for (i <- 0 until m) yield i + noise.gen) println (s"y = $y") val p = 1 val d = 1 // apply 1st order differencing val q = 1 val steps = 2 // number of steps for the forecasts val mod = new ARIMA (y, d) // time series data: y vs. t // Build ARIMA(p, d, q) models mod.setPQ (VectorI (p, 0)).train ().eval_ () println (mod.report) new Plot (null, y, mod.predictAll (), s"Plot of y, ARIMA($p, 0) vs. t", true) val ar_f = mod.forecast (h = steps) println (s"$steps-step ahead forecasts using ARIMA($p, 0) model = $ar_f") mod.setPQ (VectorI (0, q)).train ().eval_ () println (mod.report) new Plot (null, y, mod.predictAll (), s"Plot of y, ARIMA(0, $q) vs. t", true) val ma_f = mod.forecast (h = steps) println (s"$steps-step ahead forecasts using ARIMA(0, $q) model = $ma_f") mod.setPQ (VectorI (p, q)).train ().eval_ () println (mod.report) new Plot (null, y, mod.predictAll (), s"Plot of y, ARIMA($p, $q) vs. t", true) val arma_f = mod.forecast (h = steps) println (s"$steps-step ahead forecasts using ARIMA($p, $q) model = $arma_f") } // ARIMATest2 object //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `ARIMATest3` object is used to test the `ARIMA` class on real data: * Forecasting lake levels. * @see cran.r-project.org/web/packages/fpp/fpp.pdf * > runMain scalation.analytics.forecaster.ARIMATest3 */ object ARIMATest3 extends App { import ForecasterVec.y // val d = 0 // apply d-th order differencing - no differencing val d = 1 // apply d-th order differencing - first differences for (h <- 1 to 2) { // forecasting horizon for (p <- 0 to 4) { // auto-regressive hyper-parameter settings for (q <- 0 to 4 if p+q > 0) { // moving-average hyper-parameter settings banner (s"Test3: ARIMA ($p, $d, $q) with h = $h") val mod = new ARIMA (y, d) // create an ARIMA model mod.setPQ (VectorI (p, q)).train ().eval_ () // set p and q, train and evaluate the ARIMA model println (mod.report) val yp = mod.predictAll () // predicted/fitted values val yfa = mod.forecastAll (h) val yfb = mod.forecastAll2 (h) assert (yfa == yfb) val yf = yfa.col(h) // forecasted values - h steps ahead val yf2 = yfb.col(h) // forecasted 2 values - h steps ahead new Plot (null, y, yp, s"Plot of y & yp, predicted ARIMA ($p, $d, $q) vs. t", true) new Plot (null, y, yf, s"Plot of y & yf, forecasted (h = $h) ARIMA ($p, $d, $q) vs. t", true) new Plot (null, y, yf2, s"Plot of y & yf2, forecasted2 (h = $h) ARIMA ($p, $d, $q) vs. t", true) if (h == 1) ForecasterVec.differ (yp, yf, allow = true) ForecasterVec.differ (yf, yf2, allow = true) val skip = max (p, q) // skip the cap start-up banner (s"ARIMATest3: QoF (@h = $h) for yf = mod.forecastAll") println (s"rSq (yf) for h = $h is ${rSqF (y, yf)}") println (s"rSq (yf) for h = $h is ${rSqF (y, yf, max (p, q))}, skip = $skip") println (s"sMAPE (yf) for h = $h is ${smapeF (y, yf)}") println (s"sMAPE (yf) for h = $h is ${smapeF (y, yf, max (p, q))}, skip = $skip") banner (s"ARIMATest3: QoF (@h = $h) for yf2 = mod.forecastAll2") println (s"rSq (yf2) for h = $h is ${rSqF (y, yf2)}") println (s"rSq (yf2) for h = $h is ${rSqF (y, yf2, max (p, q))}, skip = $skip") println (s"sMAPE (yf2) for h = $h is ${smapeF (y, yf2)}") println (s"sMAPE (yf2) for h = $h is ${smapeF (y, yf2, max (p, q))}, skip = $skip") } // for } // for } // for } // ARIMATest3 object //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `ARIMATest4` object is used to test the `ARIMA` class. * > runMain scalation.analytics.forecaster.ARIMATest4 */ object ARIMATest4 extends App { val path = BASE_DIR + "travelTime.csv" val data = MatrixD (path) val t = data.col(0) val y = data.col(1) val p = 1 val q = 1 val d = 1 // apply 1st order differencing val steps = 1 // number of steps for the forecasts val mod = new ARIMA (y, d) // time series data: y vs. t println (s"y = $y") // Build ARIMA(p, d, 1) models mod.setPQ (VectorI (p, 0)).train ().eval_ () println (mod.report) new Plot (null, y, mod.predictAll (), s"Plot of y, ARIMA($p, $d, 0) vs. t", true) val ar_f = mod.forecast (h = steps) println (s"$steps-step ahead forecasts using ARIMA($p, $d, 0) model = $ar_f") mod.setPQ (VectorI (0, q)).train ().eval_ () println (mod.report) new Plot (null, y, mod.predictAll (), s"Plot of y, ARIMA(0, $d, $q) vs. t", true) val ma_f = mod.forecast (h = steps) println (s"$steps-step ahead forecasts using ARIMA(0, $d, $q) model = $ma_f") mod.setPQ (VectorI (p, q)).train ().eval_ () println (mod.report) new Plot (null, y, mod.predictAll (), s"Plot of y, ARIMA($p, $d, $q) vs. t", true) val arma_f = mod.forecast (h = steps) println (s"$steps-step ahead forecasts using ARIMA($p, $d, $q) model = $arma_f") } // ARIMATest4 object //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `ARIMATest5` object is used to test the `ARIMA` class. * > runMain scalation.analytics.forecaster.ARIMATest5 */ object ARIMATest5 extends App { val m = 50 val d = 0 // levels of differencing val steps = 2 // number of steps for the forecasts val sig2 = 10000.0 val noise = Normal (0.0, sig2) val y = VectorD (for (i <- 0 until m) yield 40 * (i-1) - (i-2) * (i-2) + noise.gen) val mod = new ARIMA (y, d) // time series model ARIMA banner (s"Build ARIMA(1, $d, 0) model") mod.setPQ (VectorI (1, 0)).train ().eval_ () // train for ARIMA(1, d, 0) model testInSamp (y, 2, mod, 1) for (p <- 1 to 3) { banner (s"Build ARIMA($p, $d, $p) model") mod.setPQ (VectorI (p, p)).train ().eval_ () // retrain for ARIMA(p, d, p) model testInSamp (y, p+p, mod, 1) banner ("Make Forecasts") val yf = mod.forecast (steps) println (s"$steps-step ahead forecasts using ARIMA($p, $d, $p) model = $yf") } // for } // ARIMATest5 object //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `ARIMATest6` object is used to test the `ARIMA` class on real data: * Forecasting lake levels. Select the best number of lags. * @see cran.r-project.org/web/packages/fpp/fpp.pdf * > runMain scalation.analytics.forecaster.ARIMATest6 */ object ARIMATest6 extends App { import ForecasterVec.y val d = 0 // level of differencing val mod = new ARIMA (y, d) // create model for time series data mod.setPQ (VectorI (1, 1)).train ().eval_ () // train the model and evaluate val res = mod.forwardSel () println (s"forwardSel: $res") for ((sp, sq) <- Array ((1, 0), (2, 1), (1, 1), (1, 2), (0, 1))) { val res = mod.forwardSel2 (VectorI (sp, sq)) println (s"forwardSel2: $res") } // for } // ARIMATest6 object