//:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
/** @author  John Miller
 *  @version 2.0
 *  @date    Sun Feb 13 16:22:21 EST 2022
 *  @see     LICENSE (MIT style license file).
 *
 *  @note    Model Framework: Utilities for Time Series Forecasting
 */

package scalation
package modeling
package forecasting

import scala.math.max

import scalation.mathstat._

// FIX - ForecastUtil make uniform  across DIRECT vs. RECURSIVE

//:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
/** Given a response vector y, build and return
 *  (1) an input/predictor MATRIX xx and
 *  (2) an output/multi-horizon output/response MATRIX yy.
 *  Used by Multi-Variate (MV) forecast models such as `ARX_MV`.
 *  that use DIRECT multi-horizon forecasting.
 *  The first lag responses can't be predicted due to missing past values.
 *  The last h-1 responses can't be predicted due to missing future values.
 *  Therefore the number of rows in xx and yy is reduced to y.dim + 1 - lags - h.
 *
 *  FIX - try to extend to "val xx = new MatrixD (y.dim - h, lags)"
 *
 *  @param y     the given output/response vector
 *  @param lags  the maximum lag included (inclusive)
 *  @param h     the forecasting horizon (1, 2, ... h)
 */
def buildMatrix4TS (y: VectorD, lags: Int, h: Int): (MatrixD, MatrixD) =
    val xx = new MatrixD (y.dim + 1 - lags - h, lags)
    val yy = new MatrixD (y.dim + 1 - lags - h, h)
    for i <- lags to y.dim - h do
        for j <- xx.indices2 do xx(i-lags, lags - 1 - j) = y(i - 1 - j)
        for j <- yy.indices2 do yy(i-lags, j) = if i + j >= y.dim then -0.0 else y(i + j)
//      for j <- yy.indices2 do yy(i-lags, j) = y(i + j)
    end for
//  println (s"buildMatrix4TS: xx = $xx \n yy = $yy")
    (xx, yy)
end buildMatrix4TS


//:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
/** Given a response vector y, build and return
 *  (1) an input/predictor MATRIX xx and 
 *  (2) an output/single-horizon output/response VECTOR yy. 
 *  Used by Single-Variate forecast models such as `ARX`.
 *  that use RECURSIVE multi-horizon forecasting.
 *  The first response can't be predicted due to missing past values.
 *  Therefore the number of rows in xx and yy is reduced to y.dim - 1 (day 0 cut out).
 *  @param y     the given output/response vector
 *  @param lags  the maximum lag included (inclusive)
 */
def buildMatrix4TS (y: VectorD, lags: Int): (MatrixD, VectorD) =
    val xx = new MatrixD (y.dim - 1, lags)
    val yy = new VectorD (y.dim - 1)                                      // day 0 cut out
    for i <- 1 until y.dim do
        for j <- xx.indices2 do xx(i-1, lags - 1 - j) = y(max(i - 1 - j, 0))
        yy(i-1) = y(i)
    end for
//  println (s"buildMatrix4TS: xx = $xx \n yy = $yy")
    (xx, yy)
end buildMatrix4TS


//:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
/** Given an exogenous variable vector ex corresponding to an endogenous response
 *  vector y, build and return an input/predictor MATRIX xx.
 *  The first lag responses can't be predicted due to missing past values.
 *  Therefore the number of rows in xx is reduced to ex.dim - elag1.
 *  @param ex     the exogenous variable vector
 *  @param lags   the maximum lag included (inclusive) for the endogenous variable
 *  @param elag1  the minimum lag included (inclusive) for the exogenous variable
 *  @param elag2  the maximum lag included (inclusive) for the exogenous variable
 */
def buildMatrix4TS_exo (ex: VectorD, lags: Int, elag1: Int, elag2: Int): MatrixD =
    val flaw = flawf ("top")
    val n = elag2 - elag1
    if n < 1 then flaw ("buildMatrix4TS_exo", "min exo lag must be smaller than max exo lag")
//  if elag2 > lags then flaw ("buildMatrix4TS_exo", "exo lag cannot exceed endogenous lag")

    val xx = new MatrixD (ex.dim - elag1, n)
    for i <- elag1 until ex.dim do
        for j <- xx.indices2 do xx(i-elag1, n - 1 - j) = ex(max(i - elag1 - j, 0))
    end for
//  println (s"buildMatrix4TS_exo: xx = $xx")
    xx
end buildMatrix4TS_exo

/* commented out
 *  Therefore the number of rows in xx is reduced to ex.dim - lags.
def buildMatrix4TS_exo (ex: VectorD, lags: Int, elag1: Int, elag2: Int): MatrixD =
    val flaw = flawf ("top")
    val n = elag2 - elag1
    if n < 1 then flaw ("buildMatrix4TS_exo", "min exo lag must be smaller than max exo lag")
//  if elag2 > lags then flaw ("buildMatrix4TS_exo", "exo lag cannot exceed endogenous lag")

    val xx = new MatrixD (ex.dim - lags, n)
    for i <- lags until ex.dim do
        for j <- xx.indices2 do xx(i-lags, n - 1 - j) = ex(i - elag1 - j)
    end for
//  println (s"buildMatrix4TS_exo: xx = $xx")
    xx
end buildMatrix4TS_exo
*/


//:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
/** Build a 3D tensor that collects and (lag) expands the endogenous variable and all
 *  exogenous variables into the form 'time x lags x variables' --> INPUT TENSOR.
 *  Lags for endogenous variable:  1, 2, ... lags
 *  Lags for exogenous variables:  el, el+1, ... el-1+lags 
 *  For models like SARIMAX with weekly data, exo variables are not forecasted, so it
 *  is not possible to use exogenous lag 1 to make week-two forecasts.
 *  Also build a matrix of target values for each forecasting horizon --> OUTPUT MATRIX.
 *  The number of rows m = y.dim - el, as forecasts cannot be made unless there is at
 *  least one endogenous and one exogenous lag (past value) available to the model.
 *  Model: yy_hat = f(xx) with loss function, e.g., || yy - yy_hat ||_F
 *  NOTE: for models not taking tensor input, flatten into a matrix.
 *  @param y     the endogenous variable vector over time (e.g., new_deaths)
 *  @param ex    the exogenous variable matrix over time x exo_vars (e.g., icu_patients, hosp_patient)
 *  @param lags  the maximum lag included (inclusive)
 *  @param h     the forecasting horizon (1, 2, ... h)
 *  @param el    the first exogenous lag (may be larger than 1)
 */
def buildTensor4TS (y: VectorD, ex: MatrixD, lags: Int, h: Int = 1)(el: Int = h): (TensorD, MatrixD) =
    val flaw = flawf ("top")
    if y.dim != ex.dim then
        flaw ("buildTensor4TS", s"endo and exo variable sizes do not match: y.dim = ${y.dim} != ex.dim = ${ex.dim}")
    val m = y.dim - el                                                     // number of rows

    val xx = new TensorD (m, lags, 1 + ex.dim2)                            // input tensor from endo and exo vars
    for i <- xx.indices; j <- xx.indices2; k <- xx.indices3 do             // time x lags x variables
        xx(i, j, k) = if k == 0 then y(max(i - el - j, 0))
                      else ex(max(i - el - j, 0), k-1)

    val yy = new MatrixD (m, h)                                            // output matrix from endo vars
    for i <- yy.indices; j <- yy.indices2 do                               // time x horizons
        yy(i, j) = y(max(i - el - j, 0))

    println (s"buildTensor4TS: xx.dims = ${xx.dims}, yy.dims = ${yy.dims}")
    (xx, yy)                                                               // tuple of (input tensor, output matrix)
end buildTensor4TS


//:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
/** Test the actual response vector vs. forecasted matrix, returning the QoF
 *  for all forecasting horizons 1 to h.
 *  FIX - not agreeing with `ForecasterUtil.testHorizons`
 *  @param mod  the fittable model (one that extends `Fit`)
 *  @param y    the original actual response vector
 *  @param yf   the forecasted response matrix
 *  @param p    the number of variables/lags used in the model
 */
def testForecast (mod: Fit, y: VectorD, yf: MatrixD, p: Int): MatrixD =
    MatrixD (for k <- 1 until yf.dim2 - 1 yield
        val y_  = y(p + k until y.dim)
        val yf_ = yf(?, k)(0 until y.dim - p - k)
        println (s"y_.dim = ${y_.dim}, yf_.dim = ${yf_.dim}")
        mod.resetDF (p, y.dim - p - (k+1))                               // reset the degrees of freedom
        mod.diagnose (y_, yf_))                                          // return the QoF of the forecasts
end testForecast