//:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** @author Hao Peng, John Miller * @version 1.6 * @date Sat Jun 13 01:27:00 EST 2017 * @see LICENSE (MIT style license file). * * @title Model: Moving-Average * * @see http://en.wikipedia.org/wiki/Autoregressive%E2%80%93moving-average_model * @see http://www.emu.edu.tr/mbalcilar/teaching2007/econ604/lecture_notes.htm * @see http://www.stat.berkeley.edu/~bartlett/courses/153-fall2010 * @see http://www.princeton.edu/~apapanic/ORFE_405,_Financial_Time_Series_%28Fall_2011%29_files/slides12-13.pdf * @see https://maryclare.github.io/atsa/content/notes/notes_3.pdf * @see https://math.unice.fr/~frapetti/CorsoP/Chapitre_4_IMEA_1.pdf * * Algorithms: Innovations Algorithm (IA), * Conditional Sum of Squares (CCS), and * Maximum Likelihood Estimator (MLE). */ package scalation.analytics package forecaster import scala.collection.mutable.Set import scala.math.{log, max, min, Pi, sqrt} import scalation.linalgebra.{MatriD, MatrixD, VectoD, VectorD} import scalation.math.{double_exp, noDouble} import scalation.minima._ import scalation.plot.Plot import scalation.random.{Normal, Uniform} import scalation.stat.Statistic import scalation.util.banner import Fit._ import ForecasterVec.{MAX_LAGS, testInSamp} import RollingValidation.trSize //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `MA` class provides basic time series analysis capabilities for 'MA' * models. In an 'MA(q)' model, 'q' refers to the order of the Moving-Average * component of the model. `MA` models are often used for forecasting. * Given time series data stored in vector 'y', its next value 'y_t = y(t)' * may be predicted based on the noise/residuals/shocks: *

* y_t = c + Σ(θ_i e_t-i) + e_t *

* where 'c' is a constant, 'θ' is the moving-average parameters vector, * and 'e' is the noise vector. *------------------------------------------------------------------------------ * @param y the response vector (time series data) * @param hparam the hyper-parameters */ class MA (y: VectoD, hparam: HyperParameter = ARMA.hp) extends ForecasterVec (y, hparam("q").toInt, hparam) { private val DEBUG = true // debug flag private val mle = hparam("mle").toInt // use IA (0), CCS (1) or MLE (2) private var q = hparam("q").toInt // q-th order Auto-Regressive model (p from parent) private var trained = false // has trained been called? private var θ: VectoD = null // MA(q) parameters // e = new VectorD (y.dim) // residual/error vector // private val z = y - mu // work with mean zero time series private var zp: VectoD = null // centered predicted values private var σ2 = 0.0 // estimated variance of the residual (updated using MLE) // private val (sig2, acv, acr) = acf (z, ml) // variance, auto-covariance, auto-correlation //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Return the model name including its current hyper-parameter, e.g., MA(1). */ override def modelName: String = s"MA($q)" //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Train an `MA` model on the time-series data in vector 'y_'. * Estimate the parameters vector 'θ' for a 'q'th order a Moving-Average 'MA(q)' * model. *

* z_t = θ_0 * e_t-1 + ... + θ_q-1 * e_t-q + e_t *

* @param x_null the data/input vector (ignored) * @param y_ the response/output vector (currently only works for y) */ override def train (x_null: MatriD, y_ : VectoD): MA = { super.train (null, y_) trained = true retrain (q) } // train //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Retrain an `MA` model on the time-series data for new values for 'q'. * Estimate the parameters vector 'θ' for a 'q'th order a Moving-Average 'MA(q)' * model. * @param q_ the order of the MA model */ def retrain (q_ : Int = 1): MA = { if (! trained) flaw ("retrain", "train must be called before retrain") q = q_ hparam("q") = q resetDF (q, m - q) mle match { // which estimation algorithm case 0 => θ = innovationsAlg (stats.acr) // IA innovationsAlg2 () // sig2e = σ2 // FIX - calculate case 1 => θ = conditionalSumSq () // CSS // sig2e = σ2 // FIX - calculate case 2 => θ = maxLikelihood () // MLE sig2e = σ2 case 3 => θ = maxLikelihood (conditionalSumSq ().asInstanceOf [VectorD]) // CSS-MLE sig2e = σ2 } // match this } // retrain //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The negative log-likelihood function to be minimized. * @see spia.uga.edu/faculty_pages/monogan/teaching/ts/Barima.pdf * @see stats.stackexchange.com/questions/77663/arima-estimation-by-hand * @param input the input parameter vector */ private def nll (input: VectoD): Double = { if (input.size != q+1) flaw ("nll", "Input param vector size incorrect") for (i <- 0 until q) θ(i) = input(i) σ2 = input.last~^2 predictAllz () -ll (e.normSq / m, σ2, m) } // nll //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Apply the Innovations Algorithm (IA) to estimate parameters for MA(q) model. * @see www.stat.berkeley.edu/~bartlett/courses/153-fall2010/lectures/10.pdf * @see www.math.kth.se/matstat/gru/sf2943/tsform.pdf */ private def innovationsAlg2 (): VectoD = { val sz = 5 * (q + 1) val v = new VectorD (sz+1) v(0) = stats.acv(0) // auto-covariance (gamma) for lag 0 val θ = new MatrixD (sz, sz) for (l <- 0 until sz) { for (k <- 0 to l) { var sum1 = 0.0 for (j <- 0 until k) sum1 += θ(l, l-j) * θ(k-1, k-j-1) * v(j) θ(l, l-k) = 1.0 / v(k) * (stats.acv (l-k+1) - sum1) } // for var sum2 = 0.0 for (j <- 0 to l) sum2 += θ(l, l-j) * θ(l, l-j) * v(j) v(l+1) = v(0) - sum2 } // for val tht = θ(sz-1).slice (q) if (DEBUG) println (s"innovationsAlg2: θ = $θ \n tht = $tht") tht } // innovationsAlg2 //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Apply the Innovations Algorithm (IA) to estimate parameters for MA(q) model. * @see www.stat.berkeley.edu/~bartlett/courses/153-fall2010/lectures/10.pdf * @see www.math.kth.se/matstat/gru/sf2943/tsform.pdf * @see www.math.chalmers.se/Stat/Grundutb/CTH/tms086/1213/fintid4.pdf */ private def innovationsAlg (g: VectoD): VectoD = { val sz = 5 * (q + 1) val v = new VectorD (sz+1) v(0) = g(0) // auto-covariance (gamma) for lag 0 val θ = new MatrixD (sz, sz) def sum1 (m: Int, k: Int) : Double = { var s = 0.0; for (j <- 0 until k) s += θ(m, m-j) * θ(k, k-j) * v(j); s } def sum2 (m: Int) : Double = { var s = 0.0; for (j <- 0 until m) s += θ(m, m-j)~^2 * v(j); s } for (m <- 1 until sz; k <- 0 until m) { θ(m, m-k) = (g(m-k) - sum1 (m, k)) / v(k) v(m) = g(0) - sum2 (m) } // for val tht = θ(sz-1).slice (q) if (DEBUG) println (s"innovationsAlg: θ = $θ \n tht = $tht") tht } // innovationsAlg //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Given a value for parameter 'th', compute the Conditional Sum of Squared Errors. * @param th the given theta (θ) parameters */ def csse (th: VectoD): Double = { var sumSq = 0.0 // for (t <- q until y.dim) { for (t <- q until e.dim) { var sum = 0.0 for (j <- 0 until q) sum += th(j) * e(t-1-j) e(t) = z(t) - sum sumSq += e(t) ~^ 2 } // for sumSq } // csse def d_csse (th: Double): Double = { var sum = 0.0 for (t <- 1 until e.dim) { e(t) = z(t) - th * e(t-1) sum += -2 * e(t-1) * (z(t) - th * e(t-1)) } // for sum } // d_csse def dd_csse (th: Double): Double = { var sum = 0.0 for (t <- 1 until e.dim) { e(t) = z(t) - th * e(t-1) sum += 2 * e(t-1) ~^ 2 } // for sum } // dd_csse def f_th (th: Double): Double = { val eta = 0.01 var sum1, sum2 = 0.0 for (t <- 1 until e.dim) { sum1 += e(t-1) * (z(t) - th * e(t-1)) sum2 += e(t-1) ~^2 } // for th + eta * sum1 / sum2 } // f_th //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Apply the Conditional Sum of Squares (CCS) algorithm to estimate parameters * for MA(q) model. * @see math.unice.fr/~frapetti/CorsoP/Chapitre_4_IMEA_1.pdf * @see www.nuffield.ox.ac.uk/economics/Papers/1997/w6/ma.pdf */ private def conditionalSumSq (): VectoD = { val init = new VectorD (q) val solver = new QuasiNewton (csse) solver.solve (init) } // conditionalSumSq /* private def conditionalSumSq (): VectoD = { var th = 0.9556 // near optimal // Grid Search val grids = new GridLS (csse) th = grids.lsearch (0.99, 0.9) // customized range println (s"grids: θ = $th, csse (θ) = ${csse (th)}") // Golden Section Search val golds = new GoldenSectionLS (csse) th = golds.lsearch (1.0, -1.0) // complete range println (s"golds: θ = $th, csse (θ) = ${csse (th)}") // Newton Method for Optimization val newts = new NewtonRaphson (d_csse, eta = 0.1) th = newts.solve (0.9, dd_csse) // custom Newton Method for Optimization th = 0.9 println (s"th = $th") for (i <- 1 to 10) { th = f_th (th); println (s"th = $th") } println (s"newt: θ = $th") VectorD (0.9556) } // conditionalSumSq */ //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Apply the Maximum Likelihood Estimation (MLE) algorithm to estimate parameters * for MA(q) model. * @see math.unice.fr/~frapetti/CorsoP/Chapitre_4_IMEA_1.pdf */ private def maxLikelihood (init_ : VectorD = null): VectoD = { if (θ == null || θ.size != q) θ = new VectorD (q) val init = if (init_ == null) VectorD.fill (q+1)(0.1) else init_ ++ VectorD (0.0) init(init.size-1) = sqrt (stats.sig2) // also optimize standard deviation val solver = new QuasiNewton (nll) // BFGS val tht = solver.solve (init).slice (0, q) // NOTE: σ2 already updated in def nll if (DEBUG) println (s"maxLikelihood: θ = $tht") tht } // maxLikelihood //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Return the parameter vector (θ). */ def parameter: VectoD = θ //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Return a detailed summary of the trained model. */ def summary: String = { super.summary ("θ", s"${stats.mu} + $θ dot [e_(t-1) ... e_{t-q}]") } // summary private val e_ = new VectorD (m) // errors for all time points t //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Return a vector of centered predictions of an MA model and update the residuals. */ def predictAllz (): VectoD = { val zp = new VectorD (m) zp(0) = z(0) for (t <- 1 until m) { var sum = 0.0 for (j <- 0 until q if t-j > 0) sum += θ(j) * e_(t-1-j) zp(t) = sum e_(t) = y(t) - stats.mu - sum // e(t) = z(t) - sum } // for zp } // predictAllz //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Forecast values for all 'm' time points and all horizons (1 through 'h'-steps ahead). * Record these in the 'yf' matrix, where *

* yf(t, k) = k-steps ahead forecast for y_t *

* Note, 'yf.col(0)' is set to 'y' (the actual time-series values). * @param h the maximum forecasting horizon, number of steps ahead to produce forecasts */ def forecastAll (h: Int): MatriD = { yf = new MatrixD (m, h+1) // forecasts for all time points t & horizons to h val ef = new VectorD (m) // error vector based on forecasts yf.setCol (0, y) // first column is actual values, horizon 0 for (k <- 1 to h) { val c = min (k, q) // cut point from actual to forecasted values for (t <- 0 until c) { yf(t, k) = y(t) // copy first c actual values ef(t) = 0.0 // copy => no error at first } // for for (t <- c until m) { // forecast the rest var sum = stats.mu for (j <- 0 until q if t-j > 0) sum += θ(j) * ef(t-1-j) yf(t, k) = sum ef(t) = y(t) - sum } // for if (DEBUG) println (s"forecastAll: yf.col ($k) = ${yf.col (k)}") } // for yf // return matrix of forecasted values } // forecastAll //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Produce h-steps ahead forecast on the testing data during cross validation. * @param y the current response vector * @param t the time point/index to be forecast * @param h the forecasting horizon, number of steps ahead to produce forecast */ override def forecastX (y: VectoD, t: Int, h: Int = 1): Double = { if (t > m) flaw ("forecast", "no forecasts with starting t > m are provided") if (h > q) flaw ("forecast", s"MA($q) is not able to forecast for more than $q step(s) ahead. The mean will be produced.") // if (e.countZero == e.dim) predictAllz () // update the residuals NOTE: should have already been updated during training val zf = new VectorD (h) val ef = e.slice (e.dim - q) ++ new VectorD (h) // this loop is a two-step process involving: // 1) computing the residuals leading up to the time point of making forecasts // 2) making forecasts. Note that the residuals should NO LONGER be updated beyond this point because // the model is supposed to be blind to the "future" y values. for (i <- q until ef.dim) { // start at t = q (enough data and first value to forecast) var sum = 0.0 for (j <- 0 until q) sum += θ(j) * ef(i-1-j) zf(i-q) = sum if (i < q) ef(i) = y(t+i-q) - stats.mu - sum // residuals should NOT be updated once forecasting starts } // for zf.last + stats.mu // return the vector of forecasts } // forecastX //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Perform forward selection to find the most predictive variables to add the * existing model, returning the variables to add and the new model. * Note, all lags up and including 'p' define the model. * FIX - select subsets of the lags, e.g., Set (1, 2, 5) * @see `Fit` for index of QoF measures. * @param cols the lags/columns currently included in the existing model (ignored) * @param index_q index of Quality of Fit (QoF) to use for comparing quality */ def forwardSel (cols: Set [Int] = null, index_q: Int = index_rSq): (Int, MA) = { val rSq = new MatrixD (MAX_LAGS, 3) // R^2, R^2 Bar, R^2 cv var q_mx = -1 // best column, so far var mod_mx = null.asInstanceOf [MA] // best model, so far var fit_mx = noDouble // best fit, so far for (q <- 1 to MAX_LAGS) { retrain (q).eval () // retrain model, evaluate QoF val fit_q = fit(index_q) // new fit if (fit_q > fit_mx) { q_mx = q; mod_mx = this; fit_mx = fit_q } rSq(q-1) = VectorD (fit_q, -1.0, -1.0) // use new model, mod_q, no cross } // for if (q_mx == -1) { flaw ("forwardSel", "could not add additional lags") } // if println (s"forwardSel: add $q_mx giving qof = $fit_mx") println (s"rSq = $rSq") new Plot (null, rSq.col(0), lines = true) (q_mx, mod_mx) } // forwardSel } // MA class //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `MA` companion object provides factory methods for the `MA` class. */ object MA { //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Create an `MA` object. * @param y the response vector (time series data) * @param hparam the hyper-parameters */ def apply (y: VectoD, hparam: HyperParameter = ARMA.hp): MA = { new MA (y, hparam) } // apply } // MA object //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `MATest` object is used to test the `MA` class. * > runMain scalation.analytics.forecaster.MATest */ object MATest extends App { val m = 100 val noise = Uniform (-5, 5) val y = VectorD (for (i <- 0 until m) yield i + noise.gen) val ma = new MA (y) // time series model MA val ar = new AR (y) // time series model AR banner ("Build AR(1) model") ar.train (null, y).eval () val yp = ar.predictAll () println (ar.report) new Plot (null, y, yp, "Plot of y, AR(1) vs. t", true) banner ("Build AR(2) model") ar.retrain (2).eval () val yp2 = ar.predictAll () println (ar.report) new Plot (null, y, yp2, "Plot of y, AR(2) vs. t", true) banner ("Build MA(1) model") ma.retrain (1).eval () val yp3 = ma.predictAll () println (ma.report) new Plot (null, y, yp3, "Plot of y, MA(1) vs. t", true) ar.plotFunc2 (ar.acF, "ACF") ar.plotFunc2 (ar.pacF, "PACF") } // MATest object //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `MATest2` object is used to test the `MA` class. * > runMain scalation.analytics.forecaster.MATest2 */ object MATest2 extends App { val m = 30 val noise = Normal (0, 2) val y = VectorD (for (i <- 0 until m) yield i + noise.gen) println (s"y = $y") val steps = 2 // number of steps for the forecasts val ma = new MA (y) // time series model MA val ar = new AR (y) // time series model AR banner ("Build AR(1) model") ar.train (null, y).eval () val yp = ar.predictAll () println (ar.report) new Plot (null, y, yp, s"Plot of y, AR(1) vs. t", true) val ar_f = ar.forecast (steps) println (s"$steps-step ahead forecasts using AR(1) model = $ar_f") banner ("Build MA(1) model") ma.retrain (1).eval () val yp2 = ma.predictAll () println (ma.report) new Plot (null, y, yp2, s"Plot of y, MA(1) vs. t", true) val ma_f = ma.forecast (y.dim, h = steps) println (s"$steps-step ahead forecasts using MA(1) model = $ma_f") } // MATest2 object //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `MATest3` object is used to test the `MA` class. * Forecasting lake levels. * @see cran.r-project.org/web/packages/fpp/fpp.pdf * > runMain scalation.analytics.forecaster.MATest3 */ object MATest3 extends App { import ForecasterVec.y ARMA.hp("q") = 1 // set the order val steps = 2 // number of steps for the forecasts val ma = new MA (y) // time series model MA var ar: AR = null // time series model AR var first = true // Build AR(p=q) and MA(q) models for the time series data for (q <- 1 to 4) { // order of the model banner (s"Build AR($q) model") ARMA.hp("p") = q ar = new AR (y) // time series model AR ar.train (null, y).eval () testInSamp (y, q, ar, 1) banner (s"Build MA($q)) model") if (first) ma.train (null, y) else ma.retrain (q) ma.eval () testInSamp (y, q, ma, 1) println (ma.summary) first = false for (h <- 1 to 0) { // FIX - h-steps ahead forecast banner (s"Rolling Validation h = $h") val stats = SimpleRollingValidation.crossValidate2 (ma, kt_ = 1, h = h) // val stats = RollingValidation.crossValidate2 (ma, kt_ = 5, h = h) Fit.showQofStatTable (stats) } // for } // for } // MATest3 object //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `MATest4` object is used to test the `MA` class. * Forecasting traffic flow. * > runMain scalation.analytics.forecaster.MATest4 */ object MATest4 extends App { val path = BASE_DIR + "travelTime.csv" val data = MatrixD (path) val (t, y) = (data.col(0), data.col(1)) val steps = 1 // number of steps for the forecasts println (s"y = $y") // Build AR(1) and MA(1) models for the time series data banner ("Build AR(1) model") val ar = new AR (y) // time series model AR ar.train (null, y).eval () val yp = ar.predictAll () println (ar.report) new Plot (t, y, yp, s"Plot of y, AR(1) vs. t", true) val ar_f = ar.forecast (steps) println (s"$steps-step ahead forecasts using AR(1) model = $ar_f") banner ("Build MA(1) model") ARMA.hp ("mle") = 0 // 0 => IA, 1 => CSS, 2 => MLE, 3 => MLE (CSS) val ma = new MA (y) // time series model MA ma.retrain (1).eval () val yp2 = ma.predictAll () println (ma.report) new Plot (t, y, yp2, s"Plot of y, MA(1) vs. t", true) val ma_f = ma.forecast (y.dim, h = steps) println (s"$steps-step ahead forecasts using MA(1) model = $ma_f") } // MATest4 object //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `MATest5` object is used to test the `MA` class. * > runMain scalation.analytics.forecaster.MATest5 */ object MATest5 extends App { val m = 50 val steps = 2 // number of steps for the forecasts val sig2 = 10000.0 val noise = Normal (0.0, sig2) val y = VectorD (for (i <- 0 until m) yield 40 * (i-1) - (i-2) * (i-2) + noise.gen) val ma = new MA (y) // time series model MA banner ("Build MA(1) model") ma.train (null, y).eval () // train for MA(1) model println (ma.report) new Plot (null, y, ma.predictAll (), s"Plot of y, MA(1) vs. t", true) for (q <- 1 to 3) { banner (s"Build MA($q) model") ma.retrain (q).eval () // retrain for MA(q) model println (ma.report) new Plot (null, y, ma.predictAll (), s"Plot of y, MA($q) vs. t", true) banner ("Make Forecasts") val ts_f = ma.forecast (steps) println (s"$steps-step ahead forecasts using MA($q) model = $ts_f") } // for } // MATest5 object //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `MATest6` object is used to test the `MA` class on real data: * Forecasting lake levels. Select the best number of lags. * @see cran.r-project.org/web/packages/fpp/fpp.pdf * > runMain scalation.analytics.forecaster.MATest6 */ object MATest6 extends App { import ForecasterVec.y ARMA.hp("q") = 1 // set q hyper-parameter val ma = new MA (y) // create model for time series data ma.train (null, y).eval () // train the model and evaluate val res = ma.forwardSel () println (s"forwardSel: $res") } // MATest6 object