//:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** @author Dong Yu Yu, John Miller * @version 1.6 * @date Sun Dec 16 16:09:16 EST 2018 * @see LICENSE (MIT style license file). * * @title Model: Regression Tree with Gradient Boosting */ package scalation.analytics import scala.collection.mutable.{ArrayBuffer, Set} import scalation.linalgebra.{MatriD, MatrixD, VectoD, VectorD, VectorI} import scalation.random.PermutedVecI import scalation.stat.Statistic //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `RegressionTree_GB` class uses Gradient Boosting on `RegressionTree`. * One Tree is included in the model at a time wisely chosen for reducing gradient. * @param x the data vectors stored as rows of a matrix * @param y the response vector * @param fname_ the feature/variable names * @param hparam the hyper-parameters for the model */ class RegressionTree_GB (x: MatriD, y: VectoD, fname_ : Strings = null, hparam: HyperParameter = RegressionTree_GB.hp) extends PredictorMat (x, y, fname_, hparam) { private val DEBUG = false // debug flag private val depth = hparam("maxDepth").toInt // the max_depth for the base (regression tree) private val iter = hparam("iterations").toInt // the iterations for training private val stream = 0 // the random rumber stream private val forest = new ArrayBuffer [RegressionTree] () // forest is the esemble for regression trees private val permGen = PermutedVecI (VectorI.range (0, m), stream) //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Using Gradient Boosting on Training, for every iteration, we evaluate the residual and * form a Regression Tree where the residual is the depedent value (equal to the gradient * if using SSE as loss function). * @param x_ the training/full data/input matrix * @param y_ the training/full response/output vector */ override def train (x_ : MatriD, y_ : VectoD): RegressionTree_GB = { val yp = VectorD.fill (y_.dim)(y_.mean) // initial value for y-predicted for (i <- 0 until iter) { val yres = y_ - yp // y-residual val tree = new RegressionTree (x_, yres, fname, hparam) // i-th tree in forest forest += tree // add to forest tree.train (x_, yres) // train the i-th tree yp += tree.predict (x_) // add to cumulative prediction if (DEBUG) { println (s"train: i = $i - ensembles trees") eval () println ("fitMap = " + fitMap) } // if } // for this } // train //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Given a data vector 'z', predict the value by summing the predict for each tree. * @param z the data vector to predict */ override def predict (z: VectoD): Double = { var yp = y.mean for (i <- forest.indices) yp += forest(i).predict (z) yp } // predict //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Given a data matrix 'z', predict the value by summing the predict for each tree, * for each row of the matrix. * @param z the data matrix to predict */ override def predict (z: MatriD = x): VectoD = { val yp = new VectorD (z.dim1) for (i <- z.range1) yp(i) = predict (z(i)) yp } // predict //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Build a sub-model that is restricted to the given columns of the data matrix. * @param x_cols the columns that the new model is restricted to */ def buildModel (x_cols: MatriD): RegressionTree_GB = { new RegressionTree_GB (x_cols, y, null, hparam) } // buildModel } // RegressionTree_GB class //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `RegressionTree_GB` companion object defines hyper-parameters and provides * a factory function. */ object RegressionTree_GB extends ModelFactory { val hp = new HyperParameter // default values for hyper-parameters hp += ("maxDepth", 5, 5) hp += ("threshold", 0.1, 0.1) hp += ("iterations", 50, 50) val drp = (null, hp) // default remaining parameters //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Create a `RegressionTree_GB` object that uses Gradient Boosting on `RegressionTree`. * One Tree is included in the model at a time wisely chosen for reducing gradient. * @param xy the combined data-response matrix * @param fname_ the feature/variable names * @param hparam the hyper-parameters for the model */ def apply (xy: MatriD, fname: Strings = null, hparam: HyperParameter = hp): RegressionTree_GB = { val n = xy.dim2 if (n < 2) { flaw ("apply", s"dim2 = $n of the 'xy' matrix must be at least 2") null } else { val (x, y) = pullResponse (xy) new RegressionTree_GB (x, y, fname, hparam) } // if } // apply //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Create a `RegressionTree_GB` object that uses Gradient Boosting on `RegressionTree`. * One Tree is included in the model at a time wisely chosen for reducing gradient. * @param x the data matrix * @param y the response vector * @param fname_ the feature/variable names * @param hparam the hyper-parameters for the model */ def apply (x: MatriD, y: VectoD, fname: Strings, hparam: HyperParameter): RegressionTree_GB = { val n = x.dim2 if (n < 1) { flaw ("apply", s"dim2 = $n of the 'x' matrix must be at least 1") null } else { // FIX - add rescale new RegressionTree_GB (x, y, fname, hparam) } // if } // apply } // RegressionTree_GB object //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `RegressionTree_GBTest` object is used to test the `RegressionTree_GB` class. * It tests a simple case that does not require a file to be read. * > runMain scalation.analytics.RegressionTree_GBTest */ object RegressionTree_GBTest extends App { val x = new MatrixD ((5, 1), 750, 800, 850, 900, 950) val y = VectorD (1160, 1200, 1280, 1450, 2000) val rgb = new RegressionTree_GB (x, y) rgb.analyze () println (rgb.report) } // RegressionTree_GBTest object