//:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** @author John Miller * @version 1.6 * @date Fri Mar 16 15:13:38 EDT 2018 * @see LICENSE (MIT style license file). * * @title Model: Neural Network with 3 Layers (input, hidden and output layers) * @see hebb.mit.edu/courses/9.641/2002/lectures/lecture03.pdf */ package scalation.analytics import scala.collection.mutable.Set import scalation.linalgebra.{FunctionV_2V, MatriD, MatrixD, VectoD, VectorD} import scalation.math.noDouble import scalation.plot.PlotM import scalation.stat.Statistic import scalation.util.banner import ActivationFun._ import Fit._ import Initializer._ import MatrixTransform._ import Optimizer._ // Optimizer - configuration //import Optimizer_SGD._ // Stochastic Gradient Descent //import Optimizer_SGDM._ // Stochastic Gradient Descent with Momentum import Optimizer_ADAM._ // ADAM import PredictorMat2._ import StoppingRule._ //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `NeuralNet_3L` class supports multi-output, 3-layer (input, hidden and output) * Neural-Networks. It can be used for both classification and prediction, * depending on the activation functions used. Given several input vectors and output * vectors (training data), fit the parameters 'a' and 'b' connecting the layers, * so that for a new input vector 'v', the net can predict the output value, i.e., *

* yp = f1 (b * f0 (a * v)) *

* where 'f0' and 'f1' are the activation functions and the parameter 'a' and 'b' * are the parameters between input-hidden and hidden-output layers. * Unlike `NeuralNet_2L` which adds input 'x0 = 1' to account for the intercept/bias, * `NeuralNet_3L` explicitly adds bias. * @param x the m-by-nx input matrix (training data consisting of m input vectors) * @param y the m-by-ny output matrix (training data consisting of m output vectors) * @param nz the number of nodes in hidden layer (-1 => use default formula) * @param fname_ the feature/variable names (if null, use x_j's) * @param hparam the hyper-parameters for the model/network * @param f0 the activation function family for layers 1->2 (input to hidden) * @param f1 the activation function family for layers 2->3 (hidden to output) * @param itran the inverse transformation function returns responses to original scale */ class NeuralNet_3L (x: MatriD, y: MatriD, protected var nz: Int = -1, fname_ : Strings = null, hparam: HyperParameter = hp, f0: AFF = f_tanh, f1: AFF = f_id, val itran: FunctionV_2V = null) extends PredictorMat2 (x, y, fname_, hparam) // sets eta in parent class { private val DEBUG = false // debug flag private val bSize = hp ("bSize").toInt // mini-batch size private val maxEpochs = hp ("maxEpochs").toInt // maximum number of training epochs/iterations // Guidelines for setting the number of nodes in hidden layer, e.g., // [1] nx + 1 or [2] nx + ny // if (nz < 1) nz = nx + 1 // [1] default number of nodes for hidden layer if (nz < 1) nz = nx + ny // [2] default number of nodes for hidden layer val df_m = compute_df_m (nz) // degrees of freedom for model (first output only) resetDF (df_m, x.dim1 - df_m) // degrees of freedom for (model, error) private var a = new NetParam (weightMat (nx, nz), new VectorD (nz)) // parameters (weights & biases) in to hid private var b = new NetParam (weightMat (nz, ny), new VectorD (ny)) // parameters (weights & biases) hid to out println (s"Create a NeuralNet_3L with $nx input, $nz hidden and $ny output nodes: df_m = $df_m") //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Construct a three-layer Neural Network that has a single output variable. * @param x the m-by-nx input matrix (training data consisting of m input vectors) * @param y the m-by-ny output vector (training data consisting of m output values) * @param nz the number of nodes in hidden layer (-1 => use default formula) * @param fname the feature/variable names (if null, use x_j's) * @param hparam the hyper-parameters for the model/network * @param f0 the activation function family for layers 1->2 (input to hidden) * @param f1 the activation function family for layers 2->3 (hidden to output) * @param itran the inverse transformation function returns responses to original scale */ def this (x: MatriD, y: VectoD, nz: Int, fname: Strings, hparam: HyperParameter, f0: AFF, f1: AFF, itran: FunctionV_2V) { this (x, MatrixD (Seq (y)), nz, fname, hparam, f0, f1, itran) } // aux. constructor //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Compute the degrees of freedom for the model (based on nx, n, ny = 1). * Rough extimate based on total number of parameters - 1. * FIX: use better estimate * @param n the number of nodes in the hidden layer */ def compute_df_m (n: Int): Int = (nx + 2) * n //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Return the parameters 'a' and 'b'. */ def parameters: NetParams = Array (a, b) //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Given training data 'x_r' and 'y_r', fit the parametera 'a' and 'b'. * This is a simple algorithm that iterates over several epochs using gradient descent. * It does not use batching nor a sufficient stopping rule. * In practice, use the 'train' or 'train2' methods that use better optimizers. * @param x_r the training/full data/input matrix * @param y_r the training/full response/output matrix */ def train0 (x_r: MatriD = x, y_r: MatriD = y): NeuralNet_3L = { println (s"train0: eta = $eta") var sse0 = Double.MaxValue // hold prior value of sse for (epoch <- 1 to maxEpochs) { // iterate over each epoch var z = f0.fM (a * x_r) // Z = f(XA) var yp = f1.fM (b * z) // Yp = f(ZB) ee = yp - y_r // negative of the error matrix val d1 = f1.dM (yp) ** ee // delta matrix for yy val d0 = f0.dM (z) ** (d1 * b.w.t) // delta matrix for z a -= (x_r.t * d0 * eta, d0.mean * eta) // update 'a' weights & biases b -= (z.t * d1 * eta, d1.mean * eta) // update 'b' weights & biases val sse = sseF (y_r, b * f1.fM (f0.fM (a * x_r))) if (DEBUG) println (s"train0: parameters for $epoch th epoch: b = $b, sse = $sse") if (sse > sse0) return this // return early if moving up sse0 = sse // save prior sse this } // for this } // train0 //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Given training data 'x_r' and 'y_r', fit the parameters 'a' and 'b'. * Iterate over several epochs, where each epoch divides the training set into * 'nbat' batches. Each batch is used to update the weights. * @param x_r the training/full data/input matrix * @param y_r the training/full response/output matrix */ def train (x_r: MatriD = x, y_r: MatriD = y): NeuralNet_3L = { val epochs = optimize3 (x_r, y_r, a, b, eta, bSize, maxEpochs, f0, f1) // optimize parameters a, b println (s"ending epoch = $epochs") estat.tally (epochs._2) this } // train //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Given training data 'x_r' and 'y_r', fit the parameter 'a' and 'b'. * Iterate over several epochs, where each epoch divides the training set into * 'nbat' batches. Each batch is used to update the weights. * This version preforms an interval search for the best 'eta' value. * @param x_r the training/full data/input matrix * @param y_r the training/full response/output matrix */ override def train2 (x_r: MatriD = x, y_r: MatriD = y): NeuralNet_3L = { val etaI = (0.25 * eta, 4.0 * eta) // quarter to four times eta val epochs = optimize3I (x_r, y_r, a, b, etaI, bSize, maxEpochs, f0, f1) // optimize parameters a, b println (s"ending epoch = $epochs") estat.tally (epochs._2) this } // train2 //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Build a sub-model that is restricted to the given columns of the data matrix. * @param x_cols the columns that the new model is restricted to */ def buildModel (x_cols: MatriD): NeuralNet_3L = { new NeuralNet_3L (x_cols, y, -1, null, hparam, f0, f1, itran) } // buildModel //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Given a new input vector 'v', predict the output/response vector 'f(v)'. * @param v the new input vector */ def predictV (v: VectoD): VectoD = f1.fV (b dot f0.fV (a dot v)) //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Given an input matrix 'v', predict the output/response matrix 'f(v)'. * @param v the input matrix */ def predictV (v: MatriD = x): MatriD = f1.fM (b * f0.fM (a * v)) //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Show statistics. */ def showEstat () { println (Statistic.labels) println (estat) } // showEstat } // NeuralNet_3L class //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `NeuralNet_3L` companion object provides factory functions for buidling three-layer * (one hidden layer) neural nets. Note, 'rescale' is defined in `ModelFactory` in Model.scala. */ object NeuralNet_3L extends ModelFactory { private val DEBUG = false // debug flag //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Create a `NeuralNet_3L` for a combined data matrix. * @param xy the combined input/data and output/response matrix * @param nz the number of nodes in hidden layer * @param fname the feature/variable names * @param hparam the hyper-parameters * @param f0 the activation function family for layers 1->2 (input to output) * @param f1 the activation function family for layers 2->3 (hidden to output) */ def apply (xy: MatriD, nz: Int = -1, fname: Strings = null, hparam: HyperParameter = hp, f0: AFF = f_tanh, f1: AFF = f_id): NeuralNet_3L = { var itran: FunctionV_2V = null // inverse transform -> original scale val (x, y) = pullResponse (xy) // assumes the last column is the response val x_s = if (rescale) rescaleX (x, f0) else x val y_s = if (f1.bounds != null) { val y_i = rescaleY (y, f1); itran = y_i._2; y_i._1 } else y if (DEBUG) println (s" scaled: x = $x_s \n scaled y = $y_s") new NeuralNet_3L (x_s, MatrixD (Seq (y_s)), nz, fname, hparam, f0, f1, itran) } // apply //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Create a `NeuralNet_3L` for a data matrix and response vector (single output variable). * @param x the m-by-nx input/data matrix * @param y the output/response m-vector * @param nz the number of nodes in hidden layer * @param fname the feature/variable names * @param hparam the hyper-parameters * @param f0 the activation function family for layers 1->2 (input to output) * @param f1 the activation function family for layers 2->3 (hidden to output) */ def apply (x: MatriD, y: VectoD, nz: Int, fname: Strings, hparam: HyperParameter, f0: AFF, f1: AFF): NeuralNet_3L = { val hp2 = if (hparam == null) Optimizer.hp else hparam var itran: FunctionV_2V = null // inverse transform -> original scale val x_s = if (rescale) rescaleX (x, f0) else x val y_s = if (f1.bounds != null) { val y_i = rescaleY (y, f1); itran = y_i._2; y_i._1 } else y if (DEBUG) println (s" scaled: x = $x_s \n scaled y = $y_s") new NeuralNet_3L (x_s, MatrixD (Seq (y_s)), nz, fname, hp2, f0, f1, itran) } // apply } // NeuralNet_3L object //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `NeuralNet_3LTest` object is used to test the `NeuralNet_3L` class. * @see www4.rgu.ac.uk/files/chapter3%20-%20bp.pdf * > runMain scalation.analytics.NeuralNet_3LTest */ object NeuralNet_3LTest extends App { val s = 0 // random number stream to use val x = new MatrixD ((3, 3), 1.0, 0.35, 0.9, // training data - input matrix (m vectors) 1.0, 0.20, 0.7, 1.0, 0.40, 0.95) val y = new MatrixD ((3, 2), 0.5, 0.4, // training data - output matrix (m vectors) 0.3, 0.3, 0.6, 0.5) println ("input matrix x = " + x) println ("output matrix y = " + y) hp("bSize") = 1 val nn = new NeuralNet_3L (x.sliceCol (1, 3), y, 3) // create a NeuralNet_3L for (i <- 1 to 20) { val eta = i * 0.5 banner (s"NeuralNet_3LTest: Fit the parameters a & b using optimization with learning rate $eta") nn.reset (eta_ = eta) nn.train ().eval () // fit the weights using training data println (nn.report) // yp = nn.predictV (x) // predicted output values // println ("target output: y = " + y) // println ("predicted output: yp = " + yp) println ("yp0 = " + nn.predict (x(0))) // predicted output values for row 0 } // for banner ("NeuralNet_3LTest: Compare with Linear Regression - first column of y") val y0 = y.col(0) // use first column of matrix y val rg0 = new Regression (x, y0) // create a Regression model println (rg0.analyze ().report) val y0p = rg0.predict (x) // predicted output value println ("target output: y0 = " + y0) println ("predicted output: y0p = " + y0p) banner ("NeuralNet_3LTest: Compare with Linear Regression - second column of y") val y1 = y.col(1) // use second column of matrix y val rg1 = new Regression (x, y1) // create a Regression model println (rg1.analyze ().report) val y1p = rg1.predict (x) // predicted output value println ("target output: y1 = " + y1) println ("predicted output: y1p = " + y1p) } // NeuralNet_3LTest object //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `NeuralNet_3LTest2` object trains a neural netowrk on the `ExampleBasketBall` dataset. * > runMain scalation.analytics.NeuralNet_3LTest2 */ object NeuralNet_3LTest2 extends App { import ExampleBasketBall._ banner ("NeuralNet_3L vs. Regession - ExampleBasketBall") println ("x = " + x) println ("y = " + y) banner ("Regression") val rg = Regression (oxy) println (rg.analyze ().report) banner ("prediction") // not currently rescaling val yq = rg.predict () // scaled predicted output values for all x println ("target output: y = " + y) println ("predicted output: yq = " + yq) println ("error: e = " + (y - yq)) banner ("NeuralNet_3L with scaled y values") // hp("eta") = 0.016 // try several values - train0 hp("eta") = 0.1 // try several values - train val nn = NeuralNet_3L (xy) // factory function automatically rescales // val nn = new NeuralNet_3L (x, MatrixD (Seq (y))) // constructor does not automatically rescale nn.trainSwitch (2).eval () // fit the weights using training data println (nn.report) banner ("scaled prediction") val yp = nn.predictV ().col (0) // scaled predicted output values for all x println ("target output: y = " + y) println ("predicted output: yp = " + yp) println ("error: e = " + (y - yp)) /* banner ("unscaled prediction") // val (ymu, ysig) = (y.mean, sqrt (y.variance)) // should obtain from apply - see below // val ypu = denormalizeV ((ymu, ysig))(yp) // denormalize predicted output values for all x val ypu = nn.itran (yp) // denormalize predicted output values for all x println ("target output: y = " + y) println ("unscaled output: ypu = " + ypu) */ } // NeuralNet_3LTest2 object //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `NeuralNet_3LTest3` object trains a neural netowrk on the `ExampleAutoMPG` dataset. * > runMain scalation.analytics.NeuralNet_3LTest3 */ object NeuralNet_3LTest3 extends App { import ExampleAutoMPG._ banner ("NeuralNet_3L vs. Regession - ExampleAutoMPG") banner ("Regression") val rg = Regression (oxy) println (rg.analyze ().report) /* banner ("prediction") // not currently rescaling val yq = rg.predict () // scaled predicted output values for all x println ("target output: y = " + y) println ("predicted output: yq = " + yq) println ("error: e = " + (y - yq)) */ banner ("NeuralNet_3L with scaled y values") // hp("eta") = 0.0014 // try several values - train0 hp("eta") = 0.1 // try several values - train val nn = NeuralNet_3L (xy) // factory function automatically rescales // val nn = new NeuralNet_3L (x, MatrixD (Seq (y))) // constructor does not automatically rescale nn.trainSwitch (2).eval () // fit the weights using training data (0, 1, 2) println (nn.report) /* banner ("scaled prediction") val yp = nn.predict ().col (0) // scaled predicted output values for all x println ("target output: y = " + y) println ("predicted output: yp = " + yp) println ("error: e = " + (y - yp)) banner ("unscaled prediction") // val (ymu, ysig) = (y.mean, sqrt (y.variance)) // should obtain from apply - see below // val ypu = denormalizeV ((ymu, ysig))(yp) // denormalize predicted output values for all x val ypu = nn.itran (yp) // denormalize predicted output values for all x println ("target output: y = " + y) println ("unscaled output: ypu = " + ypu) */ } // NeuralNet_3LTest3 object //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `NeuralNet_3LTest4` object trains a neural netowrk on the `ExampleAutoMPG` dataset. * It test cross-validation. * > runMain scalation.analytics.NeuralNet_3LTest4 */ object NeuralNet_3LTest4 extends App { import ExampleAutoMPG._ banner ("NeuralNet_3L cross-validation - ExampleAutoMPG") banner ("NeuralNet_3L with scaled y values") // hp("eta") = 0.0014 // try several values - train0 hp("eta") = 0.025 // try several values - train - set from best of `NeuralNet_3LTest3` val nn = NeuralNet_3L (xy) // factory function automatically rescales // val nn = new NeuralNet_3L (x, MatrixD (Seq (y))) // constructor does not automatically rescale nn.trainSwitch (1).eval () // fit the weights using training data (0, 1, 2) println (nn.report) banner ("cross-validation") val stats = nn.crossValidate () showQofStatTable (stats) } // NeuralNet_3LTest4 object //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `NeuralNet_3LTest5` object trains a neural network on the `ExampleAutoMPG` dataset. * This tests forward feature/variable selection. * > runMain scalation.analytics.NeuralNet_3LTest5 */ object NeuralNet_3LTest5 extends App { import ExampleAutoMPG._ val n = x.dim2 // number of parameters/variables val rSq = new MatrixD (n - 1, 3) // hold: R^2, R^2 Bar, R^2 cv banner ("NeuralNet_3L feature selection - ExampleAutoMPG") banner ("NeuralNet_3L with scaled y values") hp("eta") = 0.3 val nn = NeuralNet_3L (xy) // factory function automatically rescales // val nn = new NeuralNet_3L (x, y) // constructor does not automatically rescale nn.train ().eval () // fit the weights using training data println (nn.report) // parameters and quality of fit val ft = nn.fitA(0) // quality of fit for first output banner ("Forward Selection Test") nn.forwardSelAll () } // NeuralNet_3LTest5 object //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `NeuralNet_3LTest6` object trains a neural network on the `ExampleAutoMPG` dataset. * This tests forward feature/variable selection with plotting of R^2. * > runMain scalation.analytics.NeuralNet_3LTest6 */ object NeuralNet_3LTest6 extends App { import ExampleAutoMPG._ val n = ox.dim2 // number of parameters/variables banner ("NeuralNet_3L feature selection - ExampleAutoMPG") val f_ = (f_sigmoid, f_id) // try different activation functions // val f_ = (f_tanh, f_id) // try different activation functions // val f_ = (f_lreLU, f_id) // try different activation functions banner ("NeuralNet_3L with scaled y values") hp("eta") = 0.02 // learning rate hyoer-parameter (see Optimizer) val nn = NeuralNet_3L (oxy, f0 = f_._1, f1 = f_._2) // factory function automatically rescales // val nn = new NeuralNet_3L (ox, y, f0 = f_._1, f1= f_._2) // constructor does not automatically rescale nn.train ().eval () // fit the weights using training data println (nn.report) // report parameters and fit val ft = nn.fitA(0) // fit for first output variable banner ("Forward Selection Test") val (cols, rSq) = nn.forwardSelAll () // R^2, R^2 bar, R^2 cv println (s"rSq = $rSq") val k = cols.size println (s"k = $k, n = $n") val t = VectorD.range (1, k) // instance index new PlotM (t, rSq.t, Array ("R^2", "R^2 bar", "R^2 cv"), "R^2 vs n for NeuralNet_3L", lines = true) } // NeuralNet_3LTest6 object