//:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** @author Hao Peng, John Miller * @version 1.6 * @date Sat Jun 13 01:27:00 EST 2017 * @see LICENSE (MIT style license file). * * @see http://en.wikipedia.org/wiki/Autoregressive%E2%80%93moving-average_model * @see http://www.emu.edu.tr/mbalcilar/teaching2007/econ604/lecture_notes.htm * @see http://www.stat.berkeley.edu/~bartlett/courses/153-fall2010 * @see http://www.princeton.edu/~apapanic/ORFE_405,_Financial_Time_Series_%28Fall_2011%29_files/slides12-13.pdf * @see www.jstatsoft.org/article/view/v027i03/v27i03.pdf * * @title Auto-Regressive, Moving Average (ARMA) Model */ package scalation.analytics package forecaster import scala.collection.mutable.Set import scala.math.{log, max, min, sqrt} import scalation.linalgebra._ import scalation.math.{double_exp, noDouble} import scalation.minima.QuasiNewton import scalation.plot.Plot import scalation.random.{Normal, Uniform} import scalation.util.banner import Fit._ import ForecasterVec.{MAX_LAGS, testInSamp} //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `ARMA` companion object maintains values for hyper-parameters. */ object ARMA { /** Base hyper-parameter specification for `ARMA` as well as `AR` and `MA` */ val hp = new HyperParameter hp += ("p", 1, 1) // for the AR part hp += ("q", 1, 1) // for the MA part hp += ("mle", 3, 3) // optimizer to use } // ARMA object import ARMA._ //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `ARMA` class provides basic time series analysis capabilities for Auto- * Regressive 'AR' and Moving-Average 'MA' models. In an 'ARMA(p, q)' model, * 'p' and 'q' refer to the order of the Auto-Regressive and Moving-Average * components of the model. `ARMA` models are often used for forecasting. * Given time series data stored in vector 'y', its next value 'y_t = y(t)' * may be predicted based on prior values of 'y' and its noise: *

* y_t = δ + Σ(φ_i y_t-i) + Σ(θ_i e_t-i) + e_t *

* where 'δ' is a constant, 'φ' is the autoregressive coefficient vector, * 'θ' is the moving-average coefficient vector, and 'e' is the noise vector. *------------------------------------------------------------------------------ * @param y the original input vector (time series data) */ class ARMA (y: VectoD) extends ForecasterVec (y, -1, null) // call resetDF in eval_ { private val DEBUG = true // debug flag protected var differenced = false // flag indicating whether differencing will be applied protected var mu = 0.0 // the sample mean protected var μ = 0.0 // the true population mean (estimated using MLE) protected var sig2 = y.variance // the sample variance protected var σ2 = 0.0 // the true population variance (estimated using MLE) protected var p = 0 // AR order protected var q = 0 // MA order protected var cap = 0 // max among p, q protected var params = 0 // max among p, q protected var zp: VectoD = null // vector of predicted/fitted values prior to un-zero mean protected var φ: VectoD = null // AR(p) coefficients protected var θ: VectoD = null // MA(q) coefficients protected var δ: Double = 0.0 // drift/constant term init (y) //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Initialize based on working time-series. Intended to work for this and derived * classes. * @param v the working vector/time-series */ def init (v: VectoD) { e = new VectorD (v.dim) // vector of residuals/errors zp = new VectorD (v.dim) // vector of predicted/fitted values prior to un-zero mean mu = v.mean // the sample mean sig2 = v.variance // the sample variance } // init //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Return the model name including its current hyper-paramaeters, e.g., ARMA(2, 1). */ override def modelName: String = s"ARMA($p, $q)" //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Show estimates for parameters. */ def showParameterEstimates () { println (s"φ = $φ") // AR parameters println (s"θ = $θ") // MA parameters println (s"δ = $δ") // drift println (s"mu = $mu") // sample mean println (s"μ = $μ") // MLE mean println (s"sig2 = $sig2") // sample variance println (s"σ2 = $σ2") // MLE variance } // showParameterEstimates //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Set values for 'p', 'q'. * Note, intercept/mean counts as a parameter for the time series. * @param p_ the order of the AR part of the model * @param q_ the order of the MA part of the model */ def setPQ (p_ : Int = 0, q_ : Int = 0): ARMA = { p = p_ ; φ = new VectorD (p) // p and AR coefficients q = q_ ; θ = new VectorD (q) // q and MA coefficients cap = max (p, q) // greatest lag params = p + q + (if (differenced) 0 else 1) // number of parameters this } // setPQ //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Train/fit an `ARMA` model to the times series data. Must call 'SetPQ' first. * It uses BFGS, a Quasi-Newton optimizer, to minimize the negative log-likelihood. */ def train (): ARMA = { super.train (null, y) val solver = new QuasiNewton (nll) // nonlinear optimizer val b = new VectorD (params + 1) // parameter values incl. σ2 if (! differenced) b(b.size-2) = mu b(b.size-1) = sqrt (sig2) // sample standard deviation, initial est. for σ parameter solver.solve (b) // find b that minimizes negative log-likelihood δ = μ * (1 - φ.sum) // update drift value // δ = stats.mu * (1 - φ.sum) if (DEBUG) showParameterEstimates () this } // train //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The negative log-likelihood function to be minimized. * @see math.unice.fr/~frapetti/CorsoP/Chapitre_4_IMEA_1.pdf, page 36 * @see spia.uga.edu/faculty_pages/monogan/teaching/ts/Barima.pdf * @see stats.stackexchange.com/questions/77663/arima-estimation-by-hand * @param b the input parameter vector */ def nll (b: VectoD): Double = { if (b.size != params + 1) flaw ("nll", "input parameter vector size incorrect") for (i <- 0 until p) φ(i) = b(i) for (i <- p until p+q) θ(i-p) = b(i) if (! differenced) μ = b(b.size-2) σ2 = b.last~^2 updateFittedValues () } // nll //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Update the vector of fitted values 'zp', the vector of errors 'e', and * return the negative log-likelihood '-ll'. * @see `Fit` for definition of 'll'. */ def updateFittedValues (): Double = { if (! differenced) for (i <- z.range) z(i) = y(i) - μ // for undifferenced time series, center using est. μ zp(0) = z(0) // no past values or errors => copy actual for (t <- 1 until zp.dim) { e(t-1) = z(t-1) - zp(t-1) // error in previous forecast var sum = 0.0 for (j <- 0 until p if t-j > 0) sum += φ(j) * z(t-1-j) for (j <- 0 until q if t-j > 0) sum += θ(j) * e(t-1-j) zp(t) = sum } // for -ll (e.normSq / m, σ2, m) // return negative log likelihood } // updateFittedValues //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Return the vector of predicted/fitted values on the training/full dataset. * Based on 'zp' calculated in the 'updateFittedValues' method. */ override def predictAll (): VectoD = zp + μ //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Return the vector of zero-centered predicted/fitted values on the training/full * dataset. Based on 'zp' calculated in the 'updateFittedValues' method. */ override def predictAllz (): VectoD = predictAll () - μ //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Forecast values for all 'm' time points and all horizons (1 through 'h'-steps ahead). * Record these in the 'yf' matrix, where *

* yf(t, k) = k-steps ahead forecast for y_t *

* Note, 'yf.col(0)' is set to 'y' (the actual time-series values). * @param h the maximum forecasting horizon, number of steps ahead to produce forecasts */ def forecastAll (h: Int): MatriD = { yf = new MatrixD (m, h+1) // forecasts for all time points t & horizons to h val ef = new VectorD (m) // error vector based on forecasts yf.setCol (0, y) // first column is actual values, horizon 0 for (k <- 1 to h) { val c = min (k, cap) // cut point from actual to forecasted values for (t <- 0 until c) { yf(t, k) = y(t) // copy first c actual values ef(t) = 0.0 // copy => no error at first } // for for (t <- c until m) { // forecast the rest var sum = δ for (j <- 0 until p) sum += φ(j) * yf(max (0, t-1-j), max (0, k-1-j)) for (j <- 0 until q if t-j > 0) sum += θ(j) * ef(t-1-j) yf(t, k) = sum ef(t+1-k) = y(t+1-k) - yf(t+1-k, k) // ef(t) = y(t) - sum } // for if (DEBUG) println (s"forecastAll: yf.col ($k) = ${yf.col (k)}") } // for yf // return matrix of forecasted values } // forecastAll //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Return the parameter vector (φ concatented with θ). */ override def parameter: VectoD = if (q == 0) φ else if (p == 0) θ else φ ++ θ //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Obtain residuals/errors in the original scale. */ def residuals: VectoD = e //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Compute the error (difference between actual and predicted) and useful * diagnostics for the dataset. * @param e vector of residuals * @param y vector of observed values * @param yp vector of predicted values */ def eval_ (e: VectoD = residuals, y: VectoD = y, yp: VectoD = predictAll ()): ARMA = { resetDF (params, y.dim - params) diagnose (e, y, yp) this } // eval_ //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Perform forward selection to find the most predictive variables to add the * existing model, returning the variables to add and the new model. * Note, all lags up and including 'p' define the model. * FIX - select subsets of the lags, e.g., Set (1, 2, 5) * @see `Fit` for index of QoF measures. * @param cols the lags/columns currently included in the existing model (ignored) * @param index_q index of Quality of Fit (QoF) to use for comparing quality */ def forwardSel (cols: Set [Int] = null, index_q: Int = index_rSq): (Int, ARMA) = { val rSq = new MatrixD (MAX_LAGS, 3) // R^2, R^2 Bar, R^2 cv var p_mx = -1 // best column, so far var mod_mx = null.asInstanceOf [ARMA] // best model, so far var fit_mx = noDouble // best fit, so far for (p <- 1 to MAX_LAGS) { setPQ (p, p).train ().eval_ () // set p and q, retrain model, evaluate QoF val fit_p = fit(index_q) // new fit if (fit_p > fit_mx) { p_mx = p; mod_mx = this; fit_mx = fit_p } rSq(p-1) = VectorD (fit_p, -1.0, -1.0) // use new model, mod_p, no cross } // for if (p_mx == -1) { flaw ("forwardSel", "could not add additional lags") } // if println (s"forwardSel: add $p_mx giving qof = $fit_mx") println (s"rSq = $rSq") new Plot (null, rSq.col(0), null, "forwardSel ARMA(p, p)", lines = true) (p_mx, mod_mx) } // forwardSel //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Perform forward selection to find the most predictive variables to add the * existing model, returning the variables to add and the new model. * Note, all lags up and including 'p' define the model. * FIX - select subsets of the lags, e.g., Set (1, 2, 5) * @see `Fit` for index of QoF measures. * @param step the amount to increment p and q for each iteration * @param cols the lags/columns currently included in the existing model (ignored) * @param index_q index of Quality of Fit (QoF) to use for comparing quality */ def forwardSel2 (step: VectoI = VectorI (1, 1), cols: Set [Int] = null, index_q: Int = index_rSq): (Int, ARMA) = { val rSq = new MatrixD (MAX_LAGS, 3) // R^2, R^2 Bar, R^2 cv var p_mx = -1 // best column, so far var mod_mx = null.asInstanceOf [ARMA] // best model, so far var fit_mx = noDouble // best fit, so far for (p <- 1 to MAX_LAGS) { val pq = step * p setPQ (pq(0), pq(1)).train ().eval_ () // set p and q, retrain model, evaluate QoF val fit_p = fit(index_q) // new fit if (fit_p > fit_mx) { p_mx = p; mod_mx = this; fit_mx = fit_p } rSq(p-1) = VectorD (fit_p, -1.0, -1.0) // use new model, mod_p, no cross } // for if (p_mx == -1) { flaw ("forwardSel2", "could not add additional lags") } // if println (s"forwardSel2: add $p_mx giving qof = $fit_mx") println (s"for step = $step, rSq = $rSq") new Plot (null, rSq.col(0), null, s"forwardSel2 ARMA(p, q), step = $step", lines = true) (p_mx, mod_mx) } // forwardSel2 } // ARMA class //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `ARMATest` object is used to test the `ARMA` class. * > runMain scalation.analytics.forecaster.ARMATest */ object ARMATest extends App { println ("ARMA!") val m = 100 val noise = Uniform (-5, 5) val y = VectorD (for (i <- 0 until m) yield i + noise.gen) val arma = new ARMA (y) // time series data: y vs. t // Build ARMA(p, q) models arma.setPQ (1).train ().eval_ () testInSamp (y, 3, arma, 1) arma.setPQ (2).train ().eval_ () testInSamp (y, 3, arma, 1) arma.setPQ (0, 3).train ().eval_ () testInSamp (y, 3, arma, 1) println (arma.forecastAll (3)) arma.plotFunc2 (arma.acF, "ACF") // must call super.train, so that ACF is actually computed - FIX arma.plotFunc2 (arma.pacF, "PACF") // must call super.train, so that PACF is actually computed } // ARMATest object //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `ARMATest2` object is used to test the `ARMA` class. * > runMain scalation.analytics.forecaster.ARMATest2 */ object ARMATest2 extends App { val m = 20 val noise = Normal (0, 2) val y = VectorD (for (i <- 0 until m) yield i + noise.gen) println (s"y = $y") val p = 1 val q = 1 val steps = 2 // number of steps for the forecasts val arma = new ARMA (y) // time series data: y vs. t // Build ARMA(p, q) models arma.setPQ (p).train ().eval_ () testInSamp (y, p, arma, 1) var yf = arma.forecast (t = m, h = steps) println (s"$steps-step ahead forecasts using ARMA($p, 0) model = $yf") arma.setPQ (0, q).train ().eval_ () testInSamp (y, q, arma, 1) yf = arma.forecast (t = m, h = steps) println (s"$steps-step ahead forecasts using ARMA(0, $q) model = $yf") arma.setPQ (p, q).train ().eval_ () testInSamp (y, p+q, arma, 1) yf = arma.forecast (t = m, h = steps) println (s"$steps-step ahead forecasts using ARMA($p, $q) model = $yf") } // ARMATest2 object //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `ARMATest3` object is used to test the `ARMA` class on real data: * Forecasting lake levels. * @see cran.r-project.org/web/packages/fpp/fpp.pdf * > runMain scalation.analytics.forecaster.ARMATest3 */ object ARMATest3 extends App { import ForecasterVec.y for (h <- 1 to 1) { // forecasting horizon for (p <- 0 to 6) { // auto-regressive hyper-parameter settings for (q <- 0 to 6 if p+q > 0) { // moving-average hyper-parameter settings banner (s"Test3: ARMA ($p, $q) with h = $h") val arma = new ARMA (y) // create an ARMA model arma.setPQ (p, q).train ().eval_ () // set p and q, train and evaluate the ARMA model testInSamp (y, p+q, arma, h) } // for } // for } // for } // ARMATest3 object //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `ARMATest4` object is used to test the `ARMA` class. * > runMain scalation.analytics.forecaster.ARMATest4 */ object ARMATest4 extends App { val path = BASE_DIR + "travelTime.csv" val data = MatrixD (path) val y = data.col(1) val p = 1 val q = 1 val steps = 1 // number of steps for the forecasts val arma = new ARMA (y) // time series data: y vs. t println (s"y = $y") // Build ARMA(p, q) models arma.setPQ (p).train ().eval_ () testInSamp (y, p, arma, 1) var yf = arma.forecast (t = y.dim, h = steps) println (s"$steps-step ahead forecasts using ARMA($p, 0) model = $yf") arma.setPQ (0, q).train ().eval_ () testInSamp (y, q, arma, 1) yf = arma.forecast (t = y.dim, h = steps) println (s"$steps-step ahead forecasts using ARMA(0, $q) model = $yf") arma.setPQ (p, q).train ().eval_ () testInSamp (y, p+q, arma, 1) yf = arma.forecast (t = y.dim, h = steps) println (s"$steps-step ahead forecasts using ARMA($p, $q) model = $yf") } // ARMATest4 object //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `ARMATest5` object is used to test the `ARMA` class. * > runMain scalation.analytics.forecaster.ARMATest5 */ object ARMATest5 extends App { val m = 50 val steps = 2 // number of steps for the forecasts val sig2 = 10000.0 val noise = Normal (0.0, sig2) val y = VectorD (for (i <- 0 until m) yield 40 * (i-1) - (i-2) * (i-2) + noise.gen) val arma = new ARMA (y) // time series model ARMA banner ("Build ARMA(1, 0) model") arma.setPQ (1, 0).train ().eval_ () // train for ARMA(1, 0) model testInSamp (y, 2, arma, 1) for (p <- 1 to 3) { banner (s"Build ARMA($p, $p) model") arma.setPQ (p, p).train ().eval_ () // retrain for ARMA(p, p) model testInSamp (y, p+p, arma, 1) banner ("Make Forecasts") val yf = arma.forecast (steps) println (s"$steps-step ahead forecasts using ARMA($p, $p) model = $yf") } // for } // ARMATest5 object //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `ARMATest6` object is used to test the `ARMA` class on real data: * Forecasting lake levels. Select the best number of lags. * @see cran.r-project.org/web/packages/fpp/fpp.pdf * > runMain scalation.analytics.forecaster.ARMATest6 */ object ARMATest6 extends App { import ForecasterVec.y val arma = new ARMA (y) // create model for time series data arma.setPQ (1, 1).train ().eval_ () // train the model and evaluate val res = arma.forwardSel () println (s"forwardSel: $res") for ((sp, sq) <- Array ((1, 0), (2, 1), (1, 1), (1, 2), (0, 1))) { val res = arma.forwardSel2 (VectorI (sp, sq)) println (s"forwardSel2: $res") } // for } // ARMATest6 object