//:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** @author John Miller * @version 1.6 * @date Sat Jun 13 01:27:00 EST 2017 * @see LICENSE (MIT style license file). * * @title Test: Economics Domain using Clustering */ package scalation.analytics package forecaster import scalation.linalgebra.{MatrixD, VectoD, VectorD} import scalation.math.noDouble import scalation.plot.Plot import scalation.stat.StatVector.corr import scalation.util.banner import clusterer.{KMeansClusterer, KMeansClustererHW} import clusterer.Clusterer.{test => ctest} import ActivationFun._ import ImputeMean.{impute, setMissVal} // may change to another `Imputation` object //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `ExampleEcon3` object tests various prediction and forecasting techniques * on daily economic/finance time-series data. * > runMain scalation.analytics.forecaster.ExampleEcon3 */ object ExampleEcon3 extends App { val MISSING = -999999 // missing value // . . 0 1 2 3 4 5 6 7 // SEQNUM, YRMODA, NATGAS, OILBRENT, OILWTI, GASOLNYH, GASOLGULF, HEATOIL, JETFUEL, PROPANE val fname = Seq ("NATGAS", "OILBRENT", "OILWTI", "GASOLNYH", "GASOLGULF", "HEATOIL", "JETFUEL", "PROPANE") val xyr = MatrixD (BASE_DIR + "ENERGY20200603.csv", 1) // val xyr = MatrixD (BASE_DIR + "ENERGY_DATA.csv", 1) // val xyr = MatrixD (BASE_DIR + "MERGED_DATA_7_25.csv", 1) // val xyr = MatrixD (BASE_DIR + "RESEARCH_DATA.csv", 1) println (s"xyr.dim1 = ${xyr.dim1}, xyr.dim2 = ${xyr.dim2}") // setMissVal (MISSING) // var xy = impute (xyr) // * 100.0 // var xy = xyr.slice (0, 10) var xy = xyr.sliceCol (2, xyr.dim2) // println (s"xy = $xy") for (j <- xy.range2) { banner (s"column $j") val y = xy.col (j) println (s"ymean = ${y.mean}, ymin = ${y.min()}, ymax = ${y.max()}") for (i <- y.range if y(i) == MISSING) println (s"i = $i, y(i) = ${y(i)}}") for (i <- y.dim - 5 until y.dim) println (s"i = $i, y(i) = ${y(i)}}") } // for // xy = xy.slice (0, 200) // comment out for all rows val _1 = VectorD.one (xy.dim1) // column of all ones println (s"xy.dim1 = ${xy.dim1}, xy.dim2 = ${xy.dim2}") // forecast column 0 using columns 1-7 val RESPONSE = 0 // for ENERGY_DATA.csv val FEATURE1 = 1 // for ENERGY_DATA.csv val FEATURE2 = 8 // for ENERGY_DATA.csv val y = xy.col (RESPONSE) val x = xy.sliceCol (FEATURE1, FEATURE2) val t = VectorD.range (0, y.dim) // println (s"x = $x") // println (s"y = $y") println (fname) banner ("diagnose Mat (xy)") diagnoseMat (xy) banner ("diagnose Mat (x)") diagnoseMat (x) setMissVal (noDouble) val rescaled = true var rg: PredictorMat with ForecasterMat = null var yf = null.asInstanceOf [VectoD] //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Test the model. * @param modName the name of the model to test * @param mod the model to test * @param h the forecasting horizon * @param detail whether to produce a detailed summary */ def test (modName: String, mod: PredictorMat with ForecasterMat, h: Int = 1, detail: Boolean = false): Unit = { mod.test (s"$modName") // mod.crossVal2 (k = 5, kt = 10, h = h) if (detail) { yf = mod.predict (x) new Plot (t, y - yf, null, s"e: $modName", true) println (mod.summary) } // if } // test new Plot (t, y, null, "y vs. t", true) // Time Series Regression Models with lags val lag1 = 0.0 val maxLags = 2 for (l <- 1 to maxLags) { val hp2 = Regression4TS.hp.updateReturn (("lag1", lag1), ("lag2", l.toDouble)) rg = if (rescaled) Regression4TS (x, y, null, hp2, RegTechnique.QR, addOne = false) else new Regression4TS (x, y, addOne = false) test (s"Regression4TS - no intercept - to lag $l", rg) } // for for (l <- 1 to maxLags) { val hp2 = Regression4TS.hp.updateReturn (("lag1", lag1), ("lag2", l.toDouble)) rg = if (rescaled) Regression4TS (x, y, null, hp2, RegTechnique.QR, addOne = true) else new Regression4TS (x, y) test (s"Regression4TS - to lag $l", rg) } // for // Clustering Analysis val xyt = xy.t // cluster on columns by taking the transpose banner ("Clustering Analysis") val opt = -1.0 val k = 4 val tf = Array (true, false) for (fl0 <- tf; fl1 <- tf) { val fls = Array (fl0, fl1) // val cl = new KMeansClusterer (xyt, k, fls) val cl = new KMeansClustererHW (xyt, k, fls) // better, but slower ctest (xyt, fls, cl, opt) // validation } // for } // ExampleEcon3 object