//:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** @author John Miller * @version 1.6 * @date Sat Jun 13 01:27:00 EST 2017 * @see LICENSE (MIT style license file). * * @title Test: Economics Domain using Lagged Regression Family */ package scalation.analytics package forecaster import scalation.linalgebra.{MatrixD, VectoD, VectorD} import scalation.math.noDouble import scalation.plot.Plot import scalation.stat.StatVector.corr import scalation.util.banner import ActivationFun._ import ImputeMean.{impute, setMissVal} // may change to another `Imputation` object //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `ExampleEcon` object tests various prediction and forecasting techniques * on daily economic/finance time-series data. * > runMain scalation.analytics.forecaster.ExampleEcon */ object ExampleEcon extends App { val MISSING = -999999 // missing value // SEQNUM,YRMODA,NATGAS,OILBRENT,OILWTI,GASOLNYH,GASOLGULF,HEATOIL,JETFUEL,PROPANE val xyr = MatrixD (BASE_DIR + "ENERGY20200603.csv", 1) // val xyr = MatrixD (BASE_DIR + "ENERGY_DATA.csv", 1) // val xyr = MatrixD (BASE_DIR + "MERGED_DATA_7_25.csv", 1) // val xyr = MatrixD (BASE_DIR + "RESEARCH_DATA.csv", 1) println (s"xyr.dim1 = ${xyr.dim1}, xyr.dim2 = ${xyr.dim2}") // setMissVal (MISSING) // var xy = impute (xyr) // * 100.0 // var xy = xyr.slice (0, 10) var xy = xyr.sliceCol (2, xyr.dim2) println (s"xy = $xy") for (j <- xy.range2) { banner (s"column $j") val y = xy.col (j) println (s"ymean = ${y.mean}, ymin = ${y.min()}, ymax = ${y.max()}") for (i <- y.range if y(i) == MISSING) println (s"i = $i, y(i) = ${y(i)}}") for (i <- y.dim - 5 until y.dim) println (s"i = $i, y(i) = ${y(i)}}") } // for // xy = xy.slice (0, 200) // comment out for all rows val _1 = VectorD.one (xy.dim1) // column of all ones println (s"xy.dim1 = ${xy.dim1}, xy.dim2 = ${xy.dim2}") // forecast column 6 using columns 2-5 and 7-15 (first column is column 0) val RESPONSE = 6 // for ENERGY_DATA.csv val FEATURE1 = 2 // for ENERGY_DATA.csv val FEATURE2 = 15 // for ENERGY_DATA.csv // forecast column 2 using columns 3 to 15 (first column is column 0) // val RESPONSE = 2 // for ENERGY_DATA.csv // val FEATURE1 = 3 // for ENERGY_DATA.csv // forecast column 8 using columns 19 to 33 (first column is column 0) // val RESPONSE = 8 // for MERGED_DATA_7_25.csv // val FEATURE1 = 19 // for MERGED_DATA_7_25.csv val y = xy.col (RESPONSE) val x = xy.sliceCol (FEATURE1, RESPONSE) ++^ xy.sliceCol (RESPONSE + 1, FEATURE2) // val x = xy.sliceCol (FEATURE1, FEATURE2) val t = VectorD.range (0, y.dim) // println (s"x = $x") // println (s"y = $y") println (s"x.dim1 = ${x.dim1}, x.dim2 = ${x.dim2}") diagnoseMat (x) setMissVal (noDouble) val rescaled = true var rg: PredictorMat with ForecasterMat = null var yf = null.asInstanceOf [VectoD] //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Test the model. * @param modName the name of the model to test * @param mod the model to test * @param h the forecasting horizon * @param detail whether to produce a detailed summary */ def test (modName: String, mod: PredictorMat with ForecasterMat, h: Int = 1, detail: Boolean = false) { mod.test (s"$modName") mod.crossVal2 (k = 5, kt = 10, h = h) if (detail) { yf = mod.predict (x) new Plot (t, y - yf, null, s"e: $modName", true) println (mod.summary) } // if } // test new Plot (t, y, null, "y vs. t", true) // Baseline - Multiple Regression with no lags /*** rg = if (rescaled) Regression (x, y, null, null, RegTechnique.QR) else new Regression (x, y) test ("Regression - no intercept", rg) rg = if (rescaled) Regression (_1 +^: x, y, null, null, RegTechnique.QR) else new Regression (_1 +^: x, y) test ("Regression", rg) // Time Series Regression Models with lags for (l <- 1 to 2) { val hp2 = Regression4TS.hp.updateReturn (("lag1", 1.0), ("lag2", l.toDouble)) rg = if (rescaled) Regression4TS (x, y, null, hp2, RegTechnique.QR, addOne = false) else new Regression4TS (x, y, addOne = false) test (s"Regression4TS - no intercept - to lag $l", rg) } // for for (l <- 1 to 2) { val hp2 = Regression4TS.hp.updateReturn (("lag1", 1.0), ("lag2", l.toDouble)) rg = if (rescaled) Regression4TS (x, y, null, hp2, RegTechnique.QR, addOne = true) else new Regression4TS (x, y) test (s"Regression4TS - to lag $l", rg) } // for for (l <- 1 to 2) { val hp2 = Regression4TS.hp.updateReturn (("lag1", 1.0), ("lag2", l.toDouble)) rg = if (rescaled) QuadRegression4TS (x, y, null, hp2, RegTechnique.QR) else new QuadRegression4TS (x, y) test (s"QuadRegression4TS - to lag $l", rg) } // for ***/ for (l <- 1 to 2) { val hp2 = Regression4TS.hp.updateReturn (("lag1", 1.0), ("lag2", l.toDouble)) rg = if (rescaled) QuadXRegression4TS (x, y, null, hp2, RegTechnique.QR) else new QuadXRegression4TS (x, y) test (s"QuadXRegression4TS - to lag $l", rg) } // for /*** // FIX - produces NaN for (l <- 1 to 2) { val mul = 1 val hp2 = Regression4TS.hp.updateReturn (("lag1", 1.0), ("lag2", l.toDouble)) val rg = if (rescaled) ELM_3L1_4TS (x, y, mul * x.dim2 + 1, null, hp2, f0 = f_tanh, null) else new ELM_3L1_4TS (x, y, mul * x.dim2 + 1) test (s"ELM_3L1_4TS: to lag $l", rg) } // for // Neural Network Models banner ("NeuralNet_3L") val nz = x.dim2 + 1 // val nn = NeuralNet_3L (x, y, nz, null, null, f_sigmoid, f_id) val nn = NeuralNet_3L (x, y, nz, null, null, f_tanh, f_id) NeuralNet_3L.rescaleOff () // val hp2 = Optimizer.hp.updateReturn (("eta", 0.02)) // val nn = NeuralNet_3L (x, y, nz, null, hp2, f_lreLU, f_id) // val nn = NeuralNet_3L (x, y, nz, null, null, f_eLU, f_id) nn.train2 ().eval () println (nn.report) // println (nn.summary) ***/ /*** banner ("model prices instead of rates") val yp = rg.predict () val pr = new VectorD (y.dim); pr(0) = 1.0 for (i <- 1 until y.dim) pr(i) = pr(i-1) * (1 + 0.01 * y(i-1)) val pr2 = new VectorD (y.dim); pr2(0) = 1.0 for (i <- 1 until y.dim) pr2(i) = pr2(i-1) * (1 + 0.01 * yp(i-1)) val rg33 = SimpleRegression (pr, pr2) rg33.analyze () println (rg33.report) ***/ } // ExampleEcon object