//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
/** @author  Dong Yu Yu, John Miller
  * @version 1.6
  * @date    Sun Dec 16 16:09:16 EST 2018
  * @see     LICENSE (MIT style license file).
  *
  * @title   Model: Regression Tree with Gradient Boosting
  */

package scalation.analytics

import scala.collection.mutable.{ArrayBuffer, Set}

import scalation.linalgebra.{MatriD, MatrixD, VectoD, VectorD, VectorI}
import scalation.random.PermutedVecI
import scalation.stat.Statistic

//:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
/** The `RegressionTree_GB` class uses Gradient Boosting on `RegressionTree`.
 *  One Tree is included in the model at a time wisely chosen for reducing gradient.
 *  @param x       the data vectors stored as rows of a matrix
 *  @param y       the response vector
 *  @param fname_  the feature/variable names
 *  @param hparam  the hyper-parameters for the model
 */
class RegressionTree_GB (x: MatriD, y: VectoD,
                         fname_ : Strings = null, hparam: HyperParameter = RegressionTree_GB.hp)
      extends PredictorMat (x, y, fname_, hparam)
{
    private val DEBUG   = false                                         // debug flag
    private val depth   = hparam("maxDepth").toInt                      // the max_depth for the base (regression tree)
    private val iter    = hparam("iterations").toInt                    // the iterations for training
    private val stream  = 0                                             // the random rumber stream
    private val forest  = new ArrayBuffer [RegressionTree] ()           // forest is the esemble for regression trees
    private val permGen = PermutedVecI (VectorI.range (0, m), stream)

    //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Using Gradient Boosting on Training, for every iteration, we evaluate the residual and
     *  form a Regression Tree where the residual is the depedent value (equal to the gradient
     *  if using SSE as loss function).
     *  @param x_  the training/full data/input matrix
     *  @param y_  the training/full response/output vector
     */
    override def train (x_ : MatriD, y_ : VectoD): RegressionTree_GB =
    {
        val yp = VectorD.fill (y_.dim)(y_.mean)                         // initial value for y-predicted

        for (i <- 0 until iter) {
            val yres = y_ - yp                                          // y-residual
            val tree = new RegressionTree (x_, yres, fname, hparam)     // i-th tree in forest
            forest  += tree                                             // add to forest
            tree.train (x_, yres)                                       // train the i-th tree
            yp += tree.predict (x_)                                     // add to cumulative prediction

            if (DEBUG) {
                println (s"train: i = $i - ensembles trees")
                eval ()
                println ("fitMap = " + fitMap)
            } // if
        } // for
        this
    } // train

    //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Given a data vector 'z', predict the value by summing the predict for each tree.
     *  @param z  the data vector to predict
     */
    override def predict (z: VectoD): Double =
    {
        var yp = y.mean
        for (i <- forest.indices) yp += forest(i).predict (z)
        yp
    } // predict

    //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Given a data matrix 'z', predict the value by summing the predict for each tree,
     *  for each row of the matrix.
     *  @param z  the data matrix to predict
     */
    override def predict (z: MatriD = x): VectoD =
    {
        val yp = new VectorD (z.dim1)
        for (i <- z.range1) yp(i) = predict (z(i))
        yp
    }  // predict

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Build a sub-model that is restricted to the given columns of the data matrix.
     *  @param x_cols  the columns that the new model is restricted to
     */
    def buildModel (x_cols: MatriD): RegressionTree_GB =
    {
        new RegressionTree_GB (x_cols, y, null, hparam)
    } // buildModel

} // RegressionTree_GB class


//:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
/** The `RegressionTree_GB` companion object defines hyper-parameters and provides
 *  a factory function.
 */
object RegressionTree_GB extends ModelFactory
{
    val hp = new HyperParameter                      // default values for hyper-parameters
    hp += ("maxDepth", 5, 5)
    hp += ("threshold", 0.1, 0.1)
    hp += ("iterations", 50, 50)
    val drp = (null, hp)                             // default remaining parameters

    //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Create a `RegressionTree_GB` object that uses Gradient Boosting on `RegressionTree`.
     *  One Tree is included in the model at a time wisely chosen for reducing gradient.
     *  @param xy       the combined data-response matrix
     *  @param fname_   the feature/variable names
     *  @param hparam   the hyper-parameters for the model
     */
    def apply (xy: MatriD, fname: Strings = null,
               hparam: HyperParameter = hp): RegressionTree_GB =
    {
        val n = xy.dim2
        if (n < 2) {
            flaw ("apply", s"dim2 = $n of the 'xy' matrix must be at least 2")
            null
        } else {
            val (x, y) = pullResponse (xy)
            new RegressionTree_GB (x, y, fname, hparam) 
        } // if
    } // apply

    //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Create a `RegressionTree_GB` object that uses Gradient Boosting on `RegressionTree`.
     *  One Tree is included in the model at a time wisely chosen for reducing gradient.
     *  @param x       the data matrix
     *  @param y       the response vector
     *  @param fname_  the feature/variable names
     *  @param hparam  the hyper-parameters for the model
     */
    def apply (x: MatriD, y: VectoD, fname: Strings,
               hparam: HyperParameter): RegressionTree_GB =
    {
        val n = x.dim2
        if (n < 1) {
            flaw ("apply", s"dim2 = $n of the 'x' matrix must be at least 1")
            null
        } else {
// FIX - add rescale
            new RegressionTree_GB (x, y, fname, hparam) 
        } // if
    } // apply

} // RegressionTree_GB object

//:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
/** The `RegressionTree_GBTest` object is used to test the `RegressionTree_GB` class.
  *  It tests a simple case that does not require a file to be read.
  *  > runMain scalation.analytics.RegressionTree_GBTest
  */
object RegressionTree_GBTest extends App
{
    val x = new MatrixD ((5, 1), 750, 800, 850, 900, 950)
    val y = VectorD (1160, 1200, 1280, 1450, 2000)

    val rgb = new RegressionTree_GB (x, y)
    rgb.analyze ()
    println (rgb.report)

} // RegressionTree_GBTest object