//:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** @author Hao Peng, John Miller * @version 1.6 * @date Sat Jun 13 01:27:00 EST 2017 * @see LICENSE (MIT style license file). * * @see http://en.wikipedia.org/wiki/Autoregressive%E2%80%93moving-average_model * @see http://www.emu.edu.tr/mbalcilar/teaching2007/econ604/lecture_notes.htm * @see http://www.stat.berkeley.edu/~bartlett/courses/153-fall2010 * @see http://www.princeton.edu/~apapanic/ORFE_405,_Financial_Time_Series_%28Fall_2011%29_files/slides12-13.pdf * * @title Auto-Regressive, Integrated, Moving Average (ARIMA) Model */ package scalation.analytics package forecaster import scala.collection.mutable.Set import scala.math.{log, max, min, sqrt} import scalation.linalgebra._ import scalation.math.double_exp import scalation.minima.QuasiNewton import scalation.plot.Plot import scalation.random.{Normal, Uniform} import scalation.util.banner import ForecasterVec.testInSamp //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Companion object for class `ARIMA`. Includes features related to differencing * and automated order selection. * @see www.jstatsoft.org/article/view/v027i03/v27i03.pdf */ object ARIMA { def formula (z: VectoD, e: VectoD, φ: VectoD, θ: VectoD, t: Int): Double = { var sum = 0.0 for (j <- φ.range if t-j > 0) sum += φ(j) * z(t-1-j) for (j <- θ.range if t-j > 0) sum += θ(j) * e(t-1-j) sum } // formula //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Return the 'd'th difference of the time-series. * @param y the original time-series to be differenced * @param d the order of simple differencing */ def difference (y: VectoD, d: Int): VectoD = { d match { case 0 => y case 1 => VectorD (for (i <- 0 until y.dim-1) yield y(i+1) - y(i)) case 2 => VectorD (for (i <- 0 until y.dim-2) yield y(i+2) - 2*y(i+1) + y(i)) case 3 => VectorD (for (i <- 0 until y.dim-3) yield y(i+3) - 3*y(i+2) + 3*y(i+1) - y(i)) case _ => flaw ("difference", "ARIMA does not support differencing higher than order 3"); null } // match } // difference //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Transform the fitted values on the training data of a differenced time series back * to the original scale. Undo trend differencing only. * @see stats.stackexchange.com/questions/32634/difference-time-series-before-arima-or-within-arima * @param yp the vector of predicted/fitted values * @param y the original time-series vector * @param d the order of simple differencing */ def transformBack (yp: VectoD, y: VectoD, d: Int): VectoD = { d match { case 0 => yp case 1 => val tb = new VectorD (y.dim) tb(0) = y(0) for (i <- 0 until y.dim-1) tb(i+1) = yp(i) + y(i) tb case 2 => val tb = new VectorD (y.dim) tb(0) = y(0); tb(1) = y(1) for (i <- 0 until y.dim-2) tb(i+2) = yp(i) + 2*y(i+1) - y(i) tb case 3 => val tb = new VectorD (y.dim) tb(0) = y(0); tb(1) = y(1); tb(2) = y(2) for (i <- 0 until y.dim-3) tb(i+3) = yp(i) + 3*y(i+2) - 3*y(i+1) + y(i) tb case _ => flaw ("transformBack", "ARIMA does not support differencing higher than order 3"); null } // match } // transformBack def transformBack_allH (ypa: MatriD, y: VectoD, d: Int): MatriD = { val tb = new MatrixD (ypa.dim1, ypa.dim2) tb.setCol (0, y) for (k <- 1 until ypa.dim2) tb.setCol (k, transformBack (ypa(k), y, d)) tb } // transformBack_allH //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Transform the forecast values of a differenced time series back to the * original scale. Undo trend differencing only. * @see stats.stackexchange.com/questions/32634/difference-time-series-before-arima-or-within-arima * @param xf_ the vector of forecasted values * @param xx the original zero-mean time series * @param d the order of simple differencing * @param t the time FIX - explain better */ def transformBackF (xf_ : VectoD, xx: VectoD, d: Int, t: Int): VectoD = { d match { case 0 => xf_ case 1 => val tbx = xx.slice(t - 1, t) ++ xf_ for (i <- 1 until tbx.dim) tbx(i) += tbx(i-1) tbx.slice(1) case 2 => val tbx = xx.slice(t - 2, t) ++ xf_ for (i <- 2 until tbx.dim) tbx(i) += (2*tbx(i-1) - tbx(i-2)) tbx.slice(2) case 3 => val tbx = xx.slice(t - 3, t) ++ xf_ for (i <- 3 until tbx.dim) tbx(i) += (3*tbx(i-1) - 3*tbx(i-2) + tbx(i-3)) tbx.slice(3) case _ => println ("ARIMA does not support differencing higher than order 3"); null } // match } // transformBackF } // ARIMA object import ARIMA._ //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `ARIMA` class provides basic time series analysis capabilities for Auto- * Regressive 'AR' Integrated 'I' Moving-Average 'MA' models. In an * 'ARIMA(p, d, q)' model, 'p' and 'q' refer to the order of the Auto-Regressive * and Moving-Average components of the model; 'd' refers to the order of * differencing. `ARIMA` models are often used for forecasting. * Given time series data stored in vector 'y', its next value 'y_t = y(t)' * may be predicted based on prior values of 'y' and its noise: *

* y_t = c + Σ(φ_i y_t-i) + Σ(θ_i e_t-i) + e_t *

* where 'c' is a constant, 'φ' is the autoregressive coefficient vector, * 'θ' is the moving-average coefficient vector, and 'e' is the noise vector. * If 'd' > 0, then the time series must be differenced first before applying * the above model. *------------------------------------------------------------------------------ * @param y the original input vector (time series data) * @param d the order of Integration/simple differencing */ class ARIMA (y: VectoD, d: Int) extends ARMA (y) { private val DEBUG = true // debug flag private var m_ = 0 // size of the time series (after differencing) differenced = d > 0 // whether differencing will be applied modelName = s"ARIMA ($p, $d, $q)" // name of the model init (y) //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Initialize based on working time-series. Intended to work for this and derived * classes. * @param v the working vector/time-series */ override def init (v: VectoD) { mu = v.mean // the sample mean val z0 = difference (v, d) // difference the time series z = if (differenced) z0 else z0.copy // time series after differencing // deep copy to avoid modifying original y m_ = z.dim // size of the time series (after differencing) e = new VectorD (m_) // vector of residuals zp = new VectorD (m_) // predicted values prior to undifferencing/uncentering sig2 = z.variance // the sample variance } // setTS //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Set values for 'p', 'q'. * Note, intercept/mean counts as a parameter for undifferenced time series. * @param p_ the order of the AR part of the model * @param q_ the order of the MA part of the model */ override def setPQ (p_ : Int = 0, q_ : Int = 0): ARIMA = { p = p_ ; φ = new VectorD (p) // p and AR coefficients q = q_ ; θ = new VectorD (q) // q and MA coefficients cap = max (p, q) // greatest lag params = p + q + (if (differenced) 0 else 1) // number of parameters modelName = s"ARIMA ($p, $d, $q)" // updated model name this } // setPQ //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Update the vector of fitted values 'zp', the vector of errors 'e', and * return the negative log-likelihood '-ll'. * @see `Fit` for definition of 'll'. */ override def updateFittedValues (): Double = { if ( ! differenced) for (i <- z.range) z(i) = y(i) - μ // for undifferenced time series, center using est. μ zp(0) = z(0) // no past values or errors => copy actual for (t <- 1 until zp.dim) { e(t-1) = z(t-1) - zp(t-1) // error in previous forecast zp(t) = formula (z, e, φ, θ, t) } // for -ll (e.normSq / m, σ2, m) // return negative log likelihood } // updateFittedValues //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Return the vector of predicted/fitted values on the training/full dataset. * Based on 'zp' calculated in the 'updateFittedValues' method. */ override def predictAll (): VectoD = if (differenced) transformBack (zp, y, d) else zp + μ //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Produce h-steps-ahead forecast for ARIMA models. * @see ams.sunysb.edu/~zhu/ams586/Forecasting.pdf * @param t the time point from which to make forecasts (in the original scale) * @param h the number of steps to forecast, must be at least one. */ override def forecast (t: Int = y.dim, h: Int = 1): VectoD = { if (t > y.dim) flaw ("forecast", s"t ($t) cannot be greater than y.dim (${y.dim})") val tz = t - d // scale t to match vector z and e // if (tz < cap) flaw ("forecast", s"tz ($tz) must be at least cap ($cap)") val zf = new VectorD (cap + h) val ef = new VectorD (cap + h) for (i <- 0 until cap if tz-cap+i >= 0) { zf(i) = z(tz-cap+i) ef(i) = e(tz-cap+i) } // for for (i <- cap until zf.dim) { // start at t = cap (enough for first value to forecast) var sum = 0.0 for (j <- 0 until p) sum += φ(j) * zf(i-1-j) for (j <- 0 until q) sum += θ(j) * ef(i-1-j) zf(i) = sum } // for val f = zf.slice (cap) if (differenced) transformBackF (f, y, d, t) else f + μ // return the vector of forecasts } // forecast //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Forecast values for all time points using 1 through 'h'-steps ahead forecasts. * The 'h'-th row of matrix is the horizon 'h' forecast (where 'h = 0' is actual data). * @param h the forecasting horizon, number of steps ahead to produce forecasts */ override def forecastAll (h: Int = 1): MatriD = { /*------------------------------------------------------------------------------------------------------- val yf = new MatrixD (h+1, y.dim) // forecasts for all horizons h & time points t yf(0) = y // first row is actual values val cut = d + 2 // cut over from actual to forecasted values for (t <- y.range) { if (t < cut) { for (k <- 1 to h) yf(k, t+k-1) = y(k-1) // place actual values diagonally } else { val ft = forecast (t, h) // forecasts at time point t, horizons 1 to h for (k <- 1 to h if t+k-1 < y.dim) yf(k, t+k-1) = ft(k-1) // place forecasts diagonally } // if } // for yf // return matrix of forecasted values } // forecastAll ---------------------------------------------------------------------------------------------------------*/ val yf = new MatrixD (h+1, y.dim) // forecasts for all horizons h & time points t yf(0) = y // first row is actual values val st = cap + d // define the starting index for (t <- st until y.dim) { val ft = forecast (t, h) // forecasts at time point t, horizons 1 to h for (k <- 1 to h if t+k-1 < y.dim) { yf(k, t+k-1) = ft(k-1) // place the forecasted values diagonally down into yf // (to align with the appropriate forecasting horizon/step) } // for } // for // fill in the blank values in the first few columns where no forecasts can be produced by copying values from previous rows for (k <- 1 to h; t <- 0 until st) yf(k, t) = y(t) // copy observed values from y for (k <- 2 to h; t <- st until st+k-1) yf(k, t) = yf(k-1, t) // copy forecasted values // for (k <- 1 to h; t <- 0 until max(1, st)) yf(k, t) = y(t) // copy observed values from y // yf(1, 1) = zp(1) + μ if (DEBUG) for (k <- 1 to h) println (s"for k = $k, sse = ${(y - yf(k)).normSq}") yf // return matrix of forecasted values } // forecastAll def forecast2 (h: Int): MatrixD = { val zf = new MatrixD (z.dim, h+1) // forecast matrix: rows - time, cols - horizon zf.setCol (0, z) // first column is actual values, horizon 0 for (k <- 1 to h) { val e_ = new VectorD (z.dim) // recalculated errors from clean slate for (t <- 0 until cap) { zf(t, k) = z(t) // copy first cap actual values e_(t) = e(t) // copy first cap errors (observed in training) } // for for (t <- cap until y.dim) { if (t-k >= 0) e_(t-k) = e(t-k) // calculate previous error at time t-k var sum = 0.0 for (j <- φ.range if t-j > 0) sum += φ(j) * zf(t-1-j, max (0, k-1-j)) for (j <- θ.range if t-j > 0) sum += θ(j) * e_(t-1-j) zf(t, k) = sum // centered forecast for time t } // for } // for zf + μ // return uncentered forecasts } // forecast2 //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Forecast values for all 'm' time points and all horizons (1 through 'h'-steps ahead). * Record these in the 'yf' matrix, where *

* yf(t, k) = k-steps ahead forecast for y_t *

* Note, 'yf.col(0)' is set to 'y' (the actual time-series values). * @param h the maximum forecasting horizon, number of steps ahead to produce forecasts */ def forecastAll2 (h: Int): MatriD = { val zf = new MatrixD (m_, h+1) // forecasts for all time points t & horizons to h val ef = new MatrixD (m_, h+1) // error vector based on forecasts zf.setCol (0, z) // first column is actual values, horizon 0 for (k <- 1 to h) { //val c = min (k, cap) // cut point from actual to forecasted values val c = 0 for (t <- 0 until c) { zf(t, k) = z(t) // copy first c actual values ef(t, k) = 0.0 // copy => no error at first } // for for (t <- c until m) { // forecast the rest var sum = 0.0 for (j <- 0 until p) sum += φ(j) * zf(max (0, t-1-j), max (0, k-1-j)) for (j <- 0 until q) sum += θ(j) * ef(max (0, t-1-j), max (0, k-1-j)) zf(t, k) = sum ef(t, k) = z(t) - zf(t, k) } // for if (DEBUG) println (s"forecastAll: zf.col ($k) = ${zf.col (k)}") } // for yf = if (differenced) transformBack_allH (zf, y, d) else zf + μ // return matrix of forecasted values yf } // forecastAll2 //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Obtain residuals in the original scale. */ override def residuals: VectoD = if (differenced) y - predictAll () else e } // ARIMA class //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `ARIMATest` object is used to test the `ARIMA` class. * > runMain scalation.analytics.forcaster.ARIMATest */ object ARIMATest extends App { println ("ARIMA!") val m = 100 val noise = Uniform (-5, 5) val y = VectorD (for (i <- 0 until m) yield i + noise.gen) val mod = new ARIMA (y, 1) // time series data: y vs. t, apply 1st order differencing // Build ARIMA(p, d, q) models mod.setPQ (1).train ().eval_ () println (mod.report) new Plot (null, y, mod.predictAll (), "Plot of y, ar(1) vs. t", true) mod.setPQ (2).train ().eval_ () println (mod.report) new Plot (null, y, mod.predictAll (), "Plot of y, ar(2) vs. t", true) mod.setPQ (0, 3).train ().eval_ () println (mod.report) new Plot (null, y, mod.predictAll () , "Plot of y, ma(3) vs. t", true) println (mod.forecastAll(3)) // mod.plotFunc2 (mod.acF, "ACF") // must call super.train, so that ACF is actually computed // mod.plotFunc2 (mod.pacF, "PACF") // must call super.train, so that PACF is actually computed } // ARIMATest object //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `ARIMATest2` object is used to test the `ARIMA` class. * > runMain scalation.analytics.forecaster.ARIMATest2 */ object ARIMATest2 extends App { val m = 20 val noise = Normal (0, 2) val y = VectorD (for (i <- 0 until m) yield i + noise.gen) println (s"y = $y") val p = 1 val d = 1 // apply 1st order differencing val q = 1 val steps = 2 // number of steps for the forecasts val mod = new ARIMA (y, d) // time series data: y vs. t // Build ARIMA(p, d, q) models mod.setPQ (p).train ().eval_ () println (mod.report) new Plot (null, y, mod.predictAll (), s"Plot of y, ARIMA($p, 0) vs. t", true) val ar_f = mod.forecast (h = steps) println (s"$steps-step ahead forecasts using ARIMA($p, 0) model = $ar_f") mod.setPQ (0, q).train ().eval_ () println (mod.report) new Plot (null, y, mod.predictAll (), s"Plot of y, ARIMA(0, $q) vs. t", true) val ma_f = mod.forecast (h = steps) println (s"$steps-step ahead forecasts using ARIMA(0, $q) model = $ma_f") mod.setPQ (p, q).train ().eval_ () println (mod.report) new Plot (null, y, mod.predictAll (), s"Plot of y, ARIMA($p, $q) vs. t", true) val arma_f = mod.forecast (h = steps) println (s"$steps-step ahead forecasts using ARIMA($p, $q) model = $arma_f") } // ARIMATest2 object //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `ARIMATest3` object is used to test the `ARIMA` class on real data: * Forecasting lake levels. * @see cran.r-project.org/web/packages/fpp/fpp.pdf * > runMain scalation.analytics.forecaster.ARIMATest3 */ object ARIMATest3 extends App { import ForecasterVec.y val d = 0 // apply d-th order differencing - no differencing // val d = 1 // apply d-th order differencing - first differences for (h <- 1 to 2) { // forecasting horizon for (p <- 0 to 3) { // auto-regressive hyper-parameter settings for (q <- 0 to 3 if p+q > 0) { // moving-average hyper-parameter settings banner (s"Test3: ARIMA ($p, $d, $q) with h = $h") val mod = new ARIMA (y, d) // create an ARIMA model mod.setPQ (p, q).train ().eval_ () // set p and q, train and evaluate the ARIMA model println (mod.report) val yp = mod.predictAll () // predicted values val yf = mod.forecastAll (h)(h) // forecasted values - h steps ahead val yf2 = mod.forecast2 (h).col(h) // forecasted values - h steps ahead // val yf2 = mod.forecastAll2 (h).col(h) // forecasted values - h steps ahead new Plot (null, y, yp, s"Plot of y & yp, predicted ARIMA ($p, $d, $q) vs. t", true) new Plot (null, y, yf, s"Plot of y & yf, forecasted (h = $h) ARIMA ($p, $d, $q) vs. t", true) new Plot (null, y, yf2, s"Plot of y & yf2, forecasted2 (h = $h) ARIMA ($p, $d, $q) vs. t", true) if (h == 1) { ForecasterVec.differ (yp, yf, allow = true) ForecasterVec.differ (yp, yf2, allow = true) } // if // ForecasterVec.differ (yf, yf2, allow = true) } // for } // for } // for } // ARIMATest3 object //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `ARIMATest4` object is used to test the `ARIMA` class. * > runMain scalation.analytics.forecaster.ARIMATest4 */ object ARIMATest4 extends App { val path = BASE_DIR + "travelTime.csv" val data = MatrixD (path) val t = data.col(0) val y = data.col(1) val p = 1 val q = 1 val d = 1 // apply 1st order differencing val steps = 1 // number of steps for the forecasts val mod = new ARIMA (y, d) // time series data: y vs. t println (s"y = $y") // Build ARIMA(p, d, 1) models mod.setPQ (p).train ().eval_ () println (mod.report) new Plot (null, y, mod.predictAll (), s"Plot of y, ARIMA($p, $d, 0) vs. t", true) val ar_f = mod.forecast (h = steps) println (s"$steps-step ahead forecasts using ARIMA($p, $d, 0) model = $ar_f") mod.setPQ (0, q).train ().eval_ () println (mod.report) new Plot (null, y, mod.predictAll (), s"Plot of y, ARIMA(0, $d, $q) vs. t", true) val ma_f = mod.forecast (h = steps) println (s"$steps-step ahead forecasts using ARIMA(0, $d, $q) model = $ma_f") mod.setPQ (p, q).train ().eval_ () println (mod.report) new Plot (null, y, mod.predictAll (), s"Plot of y, ARIMA($p, $d, $q) vs. t", true) val arma_f = mod.forecast (h = steps) println (s"$steps-step ahead forecasts using ARIMA($p, $d, $q) model = $arma_f") } // ARIMATest4 object //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `ARIMATest5` object is used to test the `ARIMA` class. * > runMain scalation.analytics.forecaster.ARIMATest5 */ object ARIMATest5 extends App { val m = 50 val d = 0 // levels of differencing val steps = 2 // number of steps for the forecasts val sig2 = 10000.0 val noise = Normal (0.0, sig2) val y = VectorD (for (i <- 0 until m) yield 40 * (i-1) - (i-2) * (i-2) + noise.gen) val mod = new ARIMA (y, d) // time series model ARIMA banner ("Build ARIMA(1, 0) model") mod.setPQ (1, 0).train ().eval_ () // train for ARIMA(1, 0) model testInSamp (y, 2, mod, 1) for (p <- 1 to 3) { banner (s"Build ARIMA($p, $p) model") mod.setPQ (p, p).train ().eval_ () // retrain for ARIMA(p, p) model testInSamp (y, p+p, mod, 1) banner ("Make Forecasts") val yf = mod.forecast (steps) println (s"$steps-step ahead forecasts using ARIMA($p, $p) model = $yf") } // for } // ARIMATest5 object //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `ARIMATest6` object is used to test the `ARIMA` class on real data: * Forecasting lake levels. Select the best number of lags. * @see cran.r-project.org/web/packages/fpp/fpp.pdf * > runMain scalation.analytics.forecaster.ARIMATest6 */ object ARIMATest6 extends App { import ForecasterVec.y val d = 0 // level of differencing val mod = new ARIMA (y, d) // create model for time series data mod.setPQ (1, 1).train ().eval_ () // train the model and evaluate val res = mod.forwardSel () println (s"forwardSel: $res") for ((sp, sq) <- Array ((1, 0), (2, 1), (1, 1), (1, 2), (0, 1))) { val res = mod.forwardSel2 (VectorI (sp, sq)) println (s"forwardSel2: $res") } // for } // ARIMATest6 object