//:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** @author John Miller, Hao Peng * @version 1.6 * @date Fri Mar 16 15:13:38 EDT 2018 * @see LICENSE (MIT style license file). * * @title Model: Neural Network with 4+ Layers (input, multiple hidden, output layers) * * @see hebb.mit.edu/courses/9.641/2002/lectures/lecture03.pdf * @see http://neuralnetworksanddeeplearning.com/ */ package scalation.analytics import scala.collection.mutable.Set import scala.math.{max => MAX} import scala.runtime.ScalaRunTime.stringOf import scalation.linalgebra.{FunctionV_2V, MatriD, MatrixD, VectoD, VectorD, VectorI} import scalation.math.noDouble import scalation.plot.PlotM import scalation.stat.Statistic import scalation.util.banner import ActivationFun._ import Fit._ import Initializer._ import MatrixTransform._ import Optimizer._ // Optimizer - configuration //import Optimizer_SGD._ // Stochastic Gradient Descent import Optimizer_SGDM._ // Stochastic Gradient Descent with Momentum import PredictorMat2._ //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `NeuralNet_XL` class supports multi-output, multi-layer (input, multiple hidden and output) * Neural-Networks. It can be used for both classification and prediction, * depending on the activation functions used. Given several input vectors and output * vectors (training data), fit the weight and bias parameters connecting the layers, * so that for a new input vector 'v', the net can predict the output value. * Defaults to two hidden layers. * This implementation is partially adapted from Michael Nielsen's Python implementation found in * @see github.com/mnielsen/neural-networks-and-deep-learning/blob/master/src/network2.py * @see github.com/MichalDanielDobrzanski/DeepLearningPython35/blob/master/network2.py *------------------------------------------------------------------------------ * @param x the m-by-nx data/input matrix (training data having m input vectors) * @param y the m-by-ny response/output matrix (training data having m output vectors) * @param nz the number of nodes in each hidden layer, e.g., Array (9, 8) => 2 hidden of sizes 9 and 8 * @param fname_ the feature/variable names (if null, use x_j's) * @param hparam the hyper-parameters for the model/network * @param f the array of activation function families between every pair of layers * @param itran the inverse transformation function returns responses to original scale */ class NeuralNet_XL (x: MatriD, y: MatriD, private var nz: Array [Int] = null, fname_ : Strings = null, hparam: HyperParameter = Optimizer.hp, f: Array [AFF] = Array (f_tanh, f_tanh, f_id), val itran: FunctionV_2V = null) extends PredictorMat2 (x, y, fname_, hparam) // sets eta in parent class { private val DEBUG = false // debug flag private val bSize = hp ("bSize").toInt // mini-batch size private val maxEpochs = hp ("maxEpochs").toInt // maximum number of training epochs/iterations private val lambda = hp ("lambda") // regularization hyper-parameter if (nz == null) nz = compute_nz (nx) // [1] default number of nodes for hidden layers // if (nz == null) nz = compute_nz (nx, ny) // [2] default number of nodes for hidden layers val df_m = compute_df_m (nz) // degrees of freedom for model (first output only) resetDF (df_m, x.dim1 - df_m) // degrees of freedom for (model, error) if (f.length != nz.length + 1) { flaw ("NeuralNet_XL Constructor", "dimension mismatch among number of layers or activation functions") } // if protected val sizes = nx +: nz :+ ny // sizes of all layers protected val nl = sizes.length - 1 // number of active layers protected val layers = 0 until nl protected var b = (for (l <- layers) yield new NetParam (weightMat (sizes(l), sizes(l+1)), // parameters (weights & weightVec (sizes(l+1)))).toArray // biases) per active layer println (s"Create a NeuralNet_XL with $nx input, ${stringOf (nz)} hidden and $ny output nodes: df_m = $df_m") //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Compute default values for the number nodes in each hidden layer, based on * the number of nodes in the input layer using the drop one/two rule. * Rule [1] nx, nx - 2, ... * @param nx the number of nodes in the input layer */ def compute_nz (nx: Int): Array [Int] = { (for (l <- 1 until f.length) yield MAX (1, nx + 2 - 2*l)).toArray } // compute_nz //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Compute default values for the number nodes in each hidden layer, based on * the number of nodes in the input and output layers using average of prior layer * and output layer rule. * Rule [2] (nx + ny) / 2, (nx + 3ny) / 4, ... * @param nx the number of nodes in the input layer * @param ny the number of nodes in the output layer */ def compute_nz (nx: Int, ny: Int): Array [Int] = { val n = Array.ofDim [Int] (f.length - 1) for (l <- 0 until f.length - 1) n(l) = if (l == 0) (nx + ny) / 2 else (n(l-1) + ny) / 2 n } // compute_nz //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Compute the degrees of freedom for the model (based on nx, n's, ny = 1). * Rough extimate based on total number of parameters - 1. * FIX: use better estimate * @param n the number of nodes in each hidden layer */ def compute_df_m (n: Array [Int]): Int = { var sum = n.last for (l <- n.indices) { if (l == 0) sum += nx * n(0) + n(0) else sum += n(l-1) * n(l) + n(l) } // for sum } // compute_df_m //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Return the parameters (weight matrices and bias vectors). */ def parameters: NetParams = b //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Given training data 'x_' and 'y_', fit the parameters 'b' (weight matrices and * bias vectors). Iterate over several epochs (no batching). * b.w(l) *= 1.0 - eta * (lambda / m) // regularization factor, weight decay * @param x_ the training/full data/input matrix * @param y_ the training/full response/output matrix */ def train0 (x_ : MatriD = x, y_ : MatriD = y): NeuralNet_XL = { println (s"train0: eta = $eta") var sse0 = Double.MaxValue // hold prior value of sse val z = Array.ofDim [MatriD] (nl+1); z(0) = x_ // storage: activations f(b(l), z(l)) val d = Array.ofDim [MatriD] (nl) // storage: deltas for (epoch <- 1 to maxEpochs) { // iterate over each epoch for (l <- layers) z(l+1) = f(l).fM (b(l) * z(l)) // feedforward and store activations ee = z.last - y_ // negative of error matrix d(nl-1) = f.last.dM (z.last) ** ee // delta for last layer before output for (l <- nl-2 to 0 by -1) d(l) = f(l).dM (z(l+1)) ** (d(l+1) * b(l+1).w.t) // deltas for previous hidden layers for (l <- layers) b(l) -= (z(l).t * d(l) * eta, d(l).mean * eta) // update parameters (weights, biases) val sse = ee.normFSq if (DEBUG) println (s"train0: parameters for $epoch th epoch: b = $b, sse = $sse") if (sse > sse0) return this // return early if moving up sse0 = sse // save prior sse } // for this } // train0 //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Given training data 'x_' and 'y_', fit the parameters 'b' (weight matrices and * bias vectors). Iterate over several epochs, where each epoch divides the * training set into 'nbat' batches. Each batch is used to update the weights. * @param x_ the training/full data/input matrix * @param y_ the training/full response/output matrix */ def train (x_ : MatriD = x, y_ : MatriD = y): NeuralNet_XL = { val epochs = optimizeX (x_, y_, b, eta, bSize, maxEpochs, lambda, f) // optimize parameters (weights & biases) println (s"ending epoch = $epochs") estat.tally (epochs._2) this } // train //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Given training data 'x_' and 'y_', fit the parameters 'b' (weight matrices and * bias vectors). Iterate over several epochs, where each epoch divides the * training set into 'nbat' batches. Each batch is used to update the weights. * This version preforms an interval search for the best 'eta' value. * @param x_ the training/full data/input matrix * @param y_ the training/full response/output matrix */ override def train2 (x_ : MatriD = x, y_ : MatriD = y): NeuralNet_XL = { val etaI = (0.25 * eta, 4.0 * eta) // quarter to four times eta val epochs = optimizeXI (x_, y_, b, etaI, bSize, maxEpochs, lambda, f) // optimize parameters (weights & biases) println (s"ending epoch = $epochs") estat.tally (epochs._2) this } // train2 //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Build a sub-model that is restricted to the given columns of the data matrix. * @param x_cols the columns that the new model is restricted to */ def buildModel (x_cols: MatriD): NeuralNet_XL = { new NeuralNet_XL (x_cols, y, null, null, hparam, f, itran) } // buildModel //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Return the network parameters (weights and biases) for the given 'layer'. * @param layer the layer to get the parameters from */ def getNetParam (layer: Int = 1) = b(layer) //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Given a new input vector 'v', predict the output/response vector 'f(v)'. * @param v the new input vector */ def predictV (v: VectoD): VectoD = { var u = v for (l <- layers) u = f(l).fV (b(l) dot u) u } // predictV //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Given an input matrix 'x', predict the output/response matrix 'f(x)'. * @param v the input matrix */ def predictV (v: MatriD = x): MatriD = { var u = v for (l <- layers) u = f(l).fM (b(l) * u) u } // predictV } // NeuralNet_XL class //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `NeuralNet_XL` companion object provides factory functions for buidling multi-layer * neural nets (defaults to two hidden layers). * Note, 'rescale' is defined in `ModelFactory` in Model.scala. */ object NeuralNet_XL extends ModelFactory { private val DEBUG = false // debug flag //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Create a `NeuralNet_XL` for a combined data matrix. * @param xy the combined input and output matrix * @param nz the number of nodes in each hidden layer, e.g., Array (5, 10) means 2 hidden with sizes 5 and 10 * @param fname the feature/variable names * @param hparam the hyper-parameters * @param af the array of activation function families over all layers */ def apply (xy: MatriD, nz: Array [Int] = null, fname: Strings = null, hparam: HyperParameter = Optimizer.hp, af: Array [AFF] = Array (f_tanh, f_tanh, f_id)): NeuralNet_XL = { var itran: FunctionV_2V = null // inverse transform -> original scale val (x, y) = pullResponse (xy) // assumes the last column is the response val x_s = if (rescale) rescaleX (x, af(0)) else x val y_s = if (af.last.bounds != null) { val y_i = rescaleY (y, af.last); itran = y_i._2; y_i._1 } else y if (DEBUG) println (s" scaled: x = $x_s \n scaled y = $y_s") new NeuralNet_XL (x_s, MatrixD (Seq (y_s)), nz, fname, hparam, af, itran) } // apply //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Create a `NeuralNet_XL` for a data matrix and response vector. * @param x the input/data matrix * @param y the output/response vector * @param nz the number of nodes in each hidden layer, e.g., Array (5, 10) means 2 hidden with sizes 5 and 10 * @param fname the feature/variable names * @param hparam the hyper-parameters * @param af the array of activation function families over all layers */ def apply (x: MatriD, y: VectoD, nz: Array [Int], fname: Strings, hparam: HyperParameter, af: Array [AFF]): NeuralNet_XL = { val hp2 = if (hparam == null) Optimizer.hp else hparam var itran: FunctionV_2V = null // inverse transform -> original scale val x_s = if (rescale) rescaleX (x, af(0)) else x val y_s = if (af.last.bounds != null) { val y_i = rescaleY (y, af.last); itran = y_i._2; y_i._1 } else y if (DEBUG) println (s" scaled: x = $x_s \n scaled y = $y_s") new NeuralNet_XL (x_s, MatrixD (Seq (y_s)), nz, fname, hp2, af, itran) } // apply } // NeuralNet_XL object //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `NeuralNet_XLTest` object is used to test the `NeuralNet_XL` class. * @see www4.rgu.ac.uk/files/chapter3%20-%20bp.pdf * > runMain scalation.analytics.NeuralNet_XLTest */ object NeuralNet_XLTest extends App { val s = 0 // random number stream to use val x = new MatrixD ((3, 3), 1.0, 0.35, 0.9, // training data - input matrix (m vectors) 1.0, 0.20, 0.7, 1.0, 0.40, 0.95) val y = new MatrixD ((3, 2), 0.5, 0.4, // training data - output matrix (m vectors) 0.3, 0.3, 0.6, 0.5) println ("input matrix x = " + x) println ("output matrix y = " + y) val hp2 = hp.updateReturn ("bSize", 1) val nn = new NeuralNet_XL (x, y, Array (3, 2), hparam = hp2) // create a NeuralNet_XL for (i <- 1 to 20) { val eta = i * 0.5 banner (s"NeuralNet_XLTest: Fit the parameter b using optimization with learning rate $eta") nn.reset (eta_ = eta) nn.train ().eval () // fit the weights using training data println (nn.report) // yp = nn.predict (x) // predicted output values // println ("target output: y = " + y) // println ("predicted output: yp = " + yp) println ("yp0 = " + nn.predict (x(0))) // predicted output values for row 0 } // for banner ("NeuralNet_XLTest: Compare with Linear Regression - first column of y") val y0 = y.col(0) // use first column of matrix y val rg0 = new Regression (x, y0) // create a Regression model println (rg0.analyze ().report) val y0p = rg0.predict (x) // predicted output value println ("target output: y0 = " + y0) println ("predicted output: y0p = " + y0p) banner ("NeuralNet_XLTest: Compare with Linear Regression - second column of y") val y1 = y.col(1) // use second column of matrix y val rg1 = new Regression (x, y1) // create a Regression model println (rg1.analyze ().report) val y1p = rg1.predict (x) // predicted output value println ("target output: y1 = " + y1) println ("predicted output: y1p = " + y1p) } // NeuralNet_XLTest object //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `NeuralNet_XLTest2` object trains a neural netowrk on the `ExampleBasketBall` dataset. * > runMain scalation.analytics.NeuralNet_XLTest2 */ object NeuralNet_XLTest2 extends App { import ExampleBasketBall._ banner ("NeuralNet_XL vs. Regession - ExampleBasketBall") println ("ox = " + ox) println ("y = " + y) banner ("Regression") val rg = Regression (oxy) println (rg.analyze ().report) banner ("prediction") // not currently rescaling val yq = rg.predict () // scaled predicted output values for all x println ("target output: y = " + y) println ("predicted output: yq = " + yq) println ("error: e = " + (y - yq)) banner ("NeuralNet_XL with scaled y values") // hp("eta") = 0.016 // try several values - train0 hp("eta") = 0.1 // try several values - train val nn = NeuralNet_XL (xy) // factory function automatically rescales // val nn = new NeuralNet_XL (x, MatrixD (Seq (y))) // constructor does not automatically rescale nn.trainSwitch (2).eval () // fit the weights using training data println (nn.report) banner ("scaled prediction") val yp = nn.predictV ().col (0) // scaled predicted output values for all x println ("target output: y = " + y) println ("predicted output: yp = " + yp) println ("error: e = " + (y - yp)) /* banner ("unscaled prediction") // val (ymu, ysig) = (y.mean, sqrt (y.variance)) // should obtain from apply - see below // val ypu = denormalizeV ((ymu, ysig))(yp) // denormalize predicted output values for all x val ypu = nn.itran (yp) // denormalize predicted output values for all x println ("target output: y = " + y) println ("unscaled output: ypu = " + ypu) */ } // NeuralNet_XLTest2 object //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `NeuralNet_XLTest3` object trains a neural netowrk on the `ExampleAutoMPG` dataset. * > runMain scalation.analytics.NeuralNet_XLTest3 */ object NeuralNet_XLTest3 extends App { import ExampleAutoMPG._ banner ("NeuralNet_XL vs. Regession - ExampleAutoMPG") banner ("Regression") val rg = Regression (oxy) println (rg.analyze ().report) banner ("prediction") // not currently rescaling val yq = rg.predict () // scaled predicted output values for all x println ("target output: y = " + y) println ("predicted output: yq = " + yq) println ("error: e = " + (y - yq)) banner ("NeuralNet_XL with scaled y values") // hp("eta") = 0.0014 // try several values - train0 hp("eta") = 0.01 // try several values - train val nn = NeuralNet_XL (xy) // factory function automatically rescales // val nn = new NeuralNet_XL (x, MatrixD (Seq (y))) // constructor does not automatically rescale nn.trainSwitch (2).eval () // fit the weights using training data (0, 1, 2) println (nn.report) /* banner ("scaled prediction") val yp = nn.predict ().col (0) // scaled predicted output values for all x println ("target output: y = " + y) println ("predicted output: yp = " + yp) println ("error: e = " + (y - yp)) banner ("unscaled prediction") // val (ymu, ysig) = (y.mean, sqrt (y.variance)) // should obtain from apply - see below // val ypu = denormalizeV ((ymu, ysig))(yp) // denormalize predicted output values for all x val ypu = nn.itran (yp) // denormalize predicted output values for all x println ("target output: y = " + y) println ("unscaled output: ypu = " + ypu) */ } // NeuralNet_XLTest3 object //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `NeuralNet_XLTest4` object trains a neural netowrk on the `ExampleAutoMPG` dataset. * It test cross-validation. * > runMain scalation.analytics.NeuralNet_XLTest4 */ object NeuralNet_XLTest4 extends App { import ExampleAutoMPG._ banner ("NeuralNet_XL cross-validation - ExampleAutoMPG") banner ("NeuralNet_XL with scaled y values") // hp("eta") = 0.0014 // try several values - train0 hp("eta") = 0.02 // try several values - train val nn = NeuralNet_XL (xy) // factory function automatically rescales // val nn = new NeuralNet_XL (x, MatrixD (Seq (y))) // constructor does not automatically rescale nn.trainSwitch (1).eval () // fit the weights using training data (0, 1, 2) println (nn.report) banner ("cross-validation") nn.crossValidate () } // NeuralNet_XLTest4 object //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `NeuralNet_XLTest5` object trains a neural network on the `ExampleAutoMPG` dataset. * This tests forward feature/variable selection. * > runMain scalation.analytics.NeuralNet_XLTest5 */ object NeuralNet_XLTest5 extends App { import ExampleAutoMPG._ banner ("NeuralNet_XL feature selection - ExampleAutoMPG") banner ("NeuralNet_XL with scaled y values") hp("eta") = 0.02 val nn = NeuralNet_XL (xy) // factory function automatically rescales // val nn = new NeuralNet_XL (x, y) // constructor does not automatically rescale val ft = nn.fitA(0) nn.train ().eval () // fit the weights using training data val n = x.dim2 // number of parameters/variables println (nn.report) banner ("Forward Selection Test") nn.forwardSelAll () } // NeuralNet_XLTest5 object //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `NeuralNet_XLTest6` object trains a neural network on the `ExampleAutoMPG` dataset. * This tests forward feature/variable selection with plotting of R^2, * > runMain scalation.analytics.NeuralNet_XLTest6 */ object NeuralNet_XLTest6 extends App { import ExampleAutoMPG._ val n = ox.dim2 // number of parameters/variables banner ("NeuralNet_XL feature selection - ExampleAutoMPG") val af_ = Array (f_sigmoid, f_sigmoid, f_id) // try different activation functions // val af_ = Array (f_tanh, f_tanh, f_id) // try different activation functions banner ("NeuralNet_XL with scaled y values") hp("eta") = 0.02 // learning rate hyoer-parameter (see Optimizer) val nn = NeuralNet_XL (oxy, af = af_) // factory function automatically rescales // val nn = new NeuralNet_XL (ox, y, af = af_) // constructor does not automatically rescale nn.train ().eval () // fit the weights using training data println (nn.report) // report parameters and fit val ft = nn.fitA(0) // fit for first output variable banner ("Forward Selection Test") val (cols, rSq) = nn.forwardSelAll () // R^2, R^2 bar, R^2 cv println (s"rSq = $rSq") val k = cols.size println (s"k = $k, n = $n") val t = VectorD.range (1, k) // instance index new PlotM (t, rSq.t, Array ("R^2", "R^2 bar", "R^2 cv"), "R^2 vs n for NeuralNet_XL", lines = true) } // NeuralNet_XLTest6 object