//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
/** @author  John Miller
 *  @version 1.6
 *  @date    Sat Jun 13 01:27:00 EST 2017
 *  @see     LICENSE (MIT style license file).
 *
 *  @title   Test: Economics Domain using Clustering
 */

package scalation.analytics
package forecaster

import scalation.linalgebra.{MatrixD, VectoD, VectorD}
import scalation.math.noDouble
import scalation.plot.Plot
import scalation.stat.StatVector.corr
import scalation.util.banner

import clusterer.{KMeansClusterer, KMeansClustererHW}
import clusterer.Clusterer.{test => ctest}

import ActivationFun._
import ImputeMean.{impute, setMissVal}                     // may change to another `Imputation` object

//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
/** The `ExampleEcon3` object tests various prediction and forecasting techniques
 *  on daily economic/finance time-series data.
 *  > runMain scalation.analytics.forecaster.ExampleEcon3
 */
object ExampleEcon3 extends App
{
    val MISSING = -999999                                   // missing value

//  .       .       0       1         2       3         4          5        6        7
//  SEQNUM, YRMODA, NATGAS, OILBRENT, OILWTI, GASOLNYH, GASOLGULF, HEATOIL, JETFUEL, PROPANE
    val fname = Seq ("NATGAS",    "OILBRENT", "OILWTI",  "GASOLNYH",
                     "GASOLGULF", "HEATOIL",  "JETFUEL", "PROPANE")

    val xyr = MatrixD (BASE_DIR + "ENERGY20200603.csv", 1)
//  val xyr = MatrixD (BASE_DIR + "ENERGY_DATA.csv", 1)
//  val xyr = MatrixD (BASE_DIR + "MERGED_DATA_7_25.csv", 1)
//  val xyr = MatrixD (BASE_DIR + "RESEARCH_DATA.csv", 1)
    println (s"xyr.dim1 = ${xyr.dim1}, xyr.dim2 = ${xyr.dim2}")

//    setMissVal (MISSING)
//    var xy = impute (xyr)  // * 100.0
//    var xy = xyr.slice (0, 10)
    var xy = xyr.sliceCol (2, xyr.dim2)

//  println (s"xy = $xy")

    for (j <- xy.range2) {
        banner (s"column $j")
        val y  = xy.col (j)
        println (s"ymean = ${y.mean}, ymin = ${y.min()}, ymax = ${y.max()}")

        for (i <- y.range if y(i) == MISSING) println (s"i = $i, y(i) = ${y(i)}}")
        for (i <- y.dim - 5 until y.dim) println (s"i = $i, y(i) = ${y(i)}}")
    } // for

//  xy = xy.slice (0, 200)                                  // comment out for all rows
    val _1 = VectorD.one (xy.dim1)                          // column of all ones
    println (s"xy.dim1 = ${xy.dim1}, xy.dim2 = ${xy.dim2}")

// forecast column 0 using columns 1-7
    val RESPONSE = 0                                        // for ENERGY_DATA.csv
    val FEATURE1 = 1                                        // for ENERGY_DATA.csv
    val FEATURE2 = 8                                        // for ENERGY_DATA.csv
    val y = xy.col (RESPONSE)
    val x = xy.sliceCol (FEATURE1, FEATURE2)

    val t = VectorD.range (0, y.dim)
//  println (s"x = $x")
//  println (s"y = $y")
    println (fname)
    banner ("diagnose Mat (xy)")
    diagnoseMat (xy)
    banner ("diagnose Mat (x)")
    diagnoseMat (x)
    setMissVal (noDouble)

    val rescaled = true

    var rg: PredictorMat with ForecasterMat = null
    var yf = null.asInstanceOf [VectoD]

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Test the model.
     *  @param modName  the name of the model to test
     *  @param mod      the model to test
     *  @param h        the forecasting horizon
     *  @param detail   whether to produce a detailed summary
     */
    def test (modName: String, mod: PredictorMat with ForecasterMat, h: Int = 1, detail: Boolean = false): Unit =
    {
        mod.test (s"$modName")
//      mod.crossVal2 (k = 5, kt = 10, h = h)
        if (detail) {
            yf = mod.predict (x)
            new Plot (t, y - yf, null, s"e: $modName", true)
            println (mod.summary)
        } // if
    } // test
 
    new Plot (t, y, null, "y vs. t", true)

    // Time Series Regression Models with lags

    val lag1    = 0.0  
    val maxLags = 2

    for (l <- 1 to maxLags) {
        val hp2 = Regression4TS.hp.updateReturn (("lag1", lag1), ("lag2", l.toDouble))
        rg = if (rescaled) Regression4TS (x, y, null, hp2, RegTechnique.QR, addOne = false)
             else      new Regression4TS (x, y, addOne = false)
        test (s"Regression4TS - no intercept - to lag $l", rg)
    } // for

    for (l <- 1 to maxLags) {
        val hp2 = Regression4TS.hp.updateReturn (("lag1", lag1), ("lag2", l.toDouble))
        rg = if (rescaled) Regression4TS (x, y, null, hp2, RegTechnique.QR, addOne = true)
             else      new Regression4TS (x, y)
        test (s"Regression4TS - to lag $l", rg)
    } // for

    // Clustering Analysis

    val xyt = xy.t   // cluster on columns by taking the transpose

    banner ("Clustering Analysis")
    val opt = -1.0
    val k = 4
    val tf = Array (true, false)
    for (fl0 <- tf; fl1 <- tf) {
        val fls = Array (fl0, fl1)
//      val cl  = new KMeansClusterer (xyt, k, fls)
        val cl  = new KMeansClustererHW (xyt, k, fls)     // better, but slower
        ctest (xyt, fls, cl, opt)                         // validation
    } // for

} // ExampleEcon3 object