//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
/** @author  John Miller
 *  @version 1.6
 *  @date    Wed Oct 28 20:43:47 EDT 2020
 *  @see     LICENSE (MIT style license file).
 *
 *  @title   Model Part: Convolutional Network
 */

package scalation.analytics

import scala.math.{min, max}

import scalation.linalgebra.{FunctionV_2V, MatriD, MatrixD, VectoD, VectorD}
import scalation.random.RandomVecD
import scalation.util.banner

import ActivationFun._
import Fit._
import Initializer._
import MatrixTransform._
import Optimizer._                                    // Optimizer - configuration
import StoppingRule._
import CoFilter_1D._

//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
/** The `ConvNet_1D` class implements a Convolutionsl Network model.
 *  The model is trained using a data matrix 'x' and response vector 'y'.
 *  @param x       the input/data matrix with instances stored in rows
 *  @param y       the output/response matrix, where y_i = response for row i of matrix x
 *  @param nf      the number of filters for this convolutional layer
 *  @param nc      the width of the filters (size of cofilters)
 *  @param fname_  the feature/variable names (if null, use x_j's)
 *  @param hparam  the hyper-parameters for the model/network
 *  @param f0      the activation function family for layers 1->2 (input to hidden)
 *  @param f1      the activation function family for layers 2->3 (hidden to output)
 *  @param itran   the inverse transformation function returns responses to original scale
 */
class ConvNet_1D (x: MatriD, y: MatriD, nf: Int = 1, nc: Int = 3,
                  fname_ : Strings = null, hparam: HyperParameter = hp,
                  f0: AFF = f_reLU, f1: AFF = f_reLU,
                  val itran: FunctionV_2V = null)
      extends PredictorMat2 (x, y, fname_, hparam)
{
    private val DEBUG = true
    private val maxEpochs = hp ("maxEpochs").toInt                        // maximum number of training epochs/iterations
    private val nz = nx - nc + 1

    private var c = weightVec (nc)                                        // parameters (weights & biases) in to hid
    private var b = new NetParam (weightMat (nz, ny), new VectorD (ny))   // parameters (weights & biases) hid to out

    println (s"Create a ConvNet_1D with $nx input, $nf filters and $ny output nodes")

    private val filt = Array.fill (nf)(new CoFilter_1D (nc))              // array of filters

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Filter the 'i'-th input vector with the 'f'-th filter.
     *  @param i  the index of the 'i'th row of the matrix
     *  @param f  the index of the 'f'th filter
     */
    def filter (i: Int, f: Int): VectoD =
    {
        val xi = x(i)
        val ft = filt(f)
        val xf = new VectorD (xi.dim - nc + 1)
//      for (j <- xf.range) xf(j) = ft.dot (xi, j)
        xf
    } // filter

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Update filter 'f's parameters.
     *  @param f     the index for the filter
     *  @param vec2  the new paramters for the filter's vector
     */
    def updateFilterParams (f: Int, vec2: VectoD)
    {
        filt(f).update (vec2)
    } // updateFilterParams

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Return the parameters 'c' and 'b'.
     */
    def parameters: NetParams = Array (NetParam (MatrixD (Seq (c))), b)

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Given training data 'x_' and 'y_', fit the parametera 'a' and 'b'.
     *  This is a simple algorithm that iterates over several epochs using gradient descent.
     *  It does not use batching nor a sufficient stopping rule.
     *  In practice, use the 'train' or 'train2' methods that use better optimizers.
     *  @param x_  the training/full data/input matrix
     *  @param y_  the training/full response/output matrix
     */
    def train0 (x_ : MatriD = x, y_ : MatriD = y): ConvNet_1D =
    {
        println (s"train0: eta = $eta")
        var sse0 = Double.MaxValue                                        // hold prior value of sse

        for (epoch <- 1 to maxEpochs) {                                   // iterate over each epoch
            var z  = f0.fM (conv (x_, c))                                 // Z  = f(conv (X, c))
            var yp = f1.fM (z *: b)                                       // Yp = f(ZB)
            ee     = yp - y_                                              // negative of the error matrix
            val d1 = ee ** f1.dM (yp)                                     // delta matrix for y
            val d0 = (d1 * b.w.t) ** f0.dM (z)                            // delta matrix for z
            ConvNet_1D.updateParam (x_, z, d0, d1, eta, c, b)
  
            val yp_ = f1.fM (f0.fM (conv (x, c)) *: b)                    // updated predictions
            val sse = sseF (y_, yp_)
            if (DEBUG) println (s"train0: sse for $epoch th epoch: sse = $sse")
            if (sse > sse0) return this                                   // return early if moving up
            sse0 = sse                                                    // save prior sse
            this
        } // for
        this
    } // train0

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Given training data 'x_' and 'y_', fit the parameters 'c' and 'b'.
     *  Iterate over several epochs, where each epoch divides the training set into
     *  batches.  Each batch is used to update the weights.       
     *  FIX - to be implemented
     *  @param x_  the training/full data/input matrix
     *  @param y_  the training/full response/output matrix
     */
    def train (x_ : MatriD = x, y_ : MatriD = y): ConvNet_1D =
    {
        val epochs = 0 // optimize3 (x_, y_, c, b, eta, bSize, maxEpochs, f0, f1)     // optimize parameters c, b
        println (s"ending epoch = $epochs")
//      estat.tally (epochs._2)
        this
    } // train

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Given a new input vector 'v', predict the output/response vector 'f(v)'.
     *  @param v  the new input vector
     */
    def predictV (v: VectoD): VectoD = f1.fV (b dot f0.fV (conv (v, c)))

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Given an input matrix 'v', predict the output/response matrix 'f(v)'.
     *  @param v  the input matrix
     */
    def predictV (v: MatriD = x): MatriD = f1.fM (f0.fM (conv (v, c)) *: b)

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Build a sub-model that is restricted to the given columns of the data matrix.
     *  @param x_cols  the columns that the new model is restricted to
     */
    def buildModel (x_cols: MatriD): ConvNet_1D =
    {
        new ConvNet_1D (x_cols, y, nf, nc, null, hparam, f0, f1, itran)
    } // buildModel

} // ConvNet_1D class


//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
/** The `ConvNet_1D` companion object provides factory functions for the
 *  `ConvNet_1D` class.
 */
object ConvNet_1D
{
    def apply (xy: MatrixD): ConvNet_1D = ???

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Update the parameters:  the weights in the convolutional filter 'c' and
     *  the weights biases in the fully-connected layer 'b'.
     *  @param x_  the training/full data/input matrix
     *  @param z   the training/full response/output matrix
     *  @param d0  the convolutional layer delta
     *  @param d1  the fully-connectd layer delta
     *  @param c   the convolution filter vector
     *  @param b   the fully-connectd layer parameters
     */
    def updateParam (x_ : MatriD, z: MatriD, d0: MatriD, d1: MatriD, eta: Double, c: VectoD, b: NetParam) =
    {
        for (j <- c.range) {
            var sum = 0.0
            for (i <- x_.range1; h <- z.range2) sum += x_(i, h+j) * d0(i, h)
            c(j) -= (sum / x_.dim1) * eta                                 // update 'c' weights in conv filter 
        } // for
        b -= (z.t * d1 * eta, d1.mean * eta)                              // update 'b' weights & biases
    } // updateParam

} // ConvNet_1D object

import CoFilter_1D._

//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
/** The `ConvNet_1DTest` object is used to test the `ConvNet_1D` class.
 *  Test using the simple example from section 11.10 of ScalaTion textbook.
 *  Perform four training steps.
 *  > runMain scalation.analytics.ConvNet_1DTest
 */
object ConvNet_1DTest extends App
{
    val x = new MatrixD ((2, 5), 1, 2, 3, 4, 5,
                                 6, 7, 8, 9, 10)
    val y = new MatrixD ((2, 2),  6,  9,
                                 16, 24)
    val c = VectorD (0.5, 1, 0.5)
    val b = NetParam (new MatrixD ((3, 2), 0.1, 0.2,
                                           0.3, 0.4,
                                           0.5, 0.6))

    val f0 = f_reLU                                                       // first activation function
    val f1 = f_reLU                                                       // second activation function
    hp("eta") = 0.001

    println (s"input x = $x")                                             // input/data matrix
    println (s"input y = $y")                                             // output/response matrix
    println (s"eta     = ${hp("eta")}")

    for (epoch <- 1 to 4) {
        banner (s"Start of epoch $epoch")
        println (s"filter  c = $c")                                       // values for cofilter
        println (s"weights b = $b")                                       // values for fully-connected layer

        val z  = f0.fM (conv (x, c))                                      // Z  = f(conv (X, c))
        val yp = f1.fM (z *: b)                                           // Yp = f(ZB)
        val ee = yp - y                                                   // negative error E  = Yp - Y
        val d1 = ee ** f1.dM (yp)                                         // delta matrix for y
        val d0 = (d1 * b.w.t) ** f0.dM (z)                                // delta matrix for z

        println (s"feature map z  = $z")
        println (s"response    yp = $yp")
        println (s"- error     ee = $ee")
        println (s"delta 1     d1 = $d1")
        println (s"delta 0     d0 = $d0")

        ConvNet_1D.updateParam (x, z, d0, d1, hp ("eta"), c, b)
        val yp_ = f1.fM (f0.fM (conv (x, c)) *: b)                        // updated predictions
        val sse = sseF (y, yp_)
        println (s"sse for $epoch th epoch: sse = $sse")
    } // for

} // ConvNet_1DTest


//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
/** The `ConvNet_1DTest2` object is used to test the `ConvNet_1D` class
 *  using the AutoMPG dataset.
 *  > runMain scalation.analytics.ConvNet_1DTest2
 */
object ConvNet_1DTest2 extends App
{
    import ExampleAutoMPG._
    banner ("ConvNet_1D vs. Regession - ExampleAutoMPG")

    banner ("Regression")
    val rg = Regression (oxy)
    println (rg.analyze ().report)

    banner ("ConvNet_1D")
    hp("eta") = 0.01
    val cn    = new ConvNet_1D (x, MatrixD (Seq (y)))
    cn.train0 ().eval ()
    println (cn.report)

} // ConvNet_1DTest2


//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
/** The `ConvNet_1DTest3` object is used to test the `ConvNet_1D` class
 *  for the convolutional operator.
 *  > runMain scalation.analytics.ConvNet_1DTest3
 */
object ConvNet_1DTest3 extends App
{
    val c = VectorD (1, 2)
    val x = VectorD (1, 2, 3, 4)

    val y = new VectorD (c.dim + x.dim - 1)
    for (k <- y.range) {
        var sum = 0.0
        for (j <- c.range) {
            val i = k - j
            if (0 <= i && i < x.dim) sum += c(j) * x(k - j)
        } // for
        y(k) = sum
    } // for
    println (s"y = $y")

} // ConvNet_1DTest3