//:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** @author John Miller, Hao Peng * @version 1.5 * @date Fri Mar 16 15:13:38 EDT 2018 * @see LICENSE (MIT style license file). * * @see hebb.mit.edu/courses/9.641/2002/lectures/lecture03.pdf * @see http://neuralnetworksanddeeplearning.com/ */ package scalation.analytics import scala.math.sqrt import scalation.linalgebra.{FunctionM_2M, FunctionV_2V, matrixize, vectorize} import scalation.linalgebra.{MatriD, MatrixD, VectoD, VectorD, VectorI} import scalation.math.double_exp import scalation.random.{Normal, PermutedVecI, RandomMatD} import scalation.random.RNGStream.ranStream import scalation.util.{Error, banner} import ActivationFun._ import Optimizer._ //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `NeuralNet_XL` class supports multi-output, multi-layer (input, multiple hidden and output) * Neural-Networks. It can be used for both classification and prediction, * depending on the activation functions used. Given several input vectors and output * vectors (training data), fit the weight and bias parameters connecting the layers, * so that for a new input vector 'v', the net can predict the output value * This implementation is partially adapted from Michael Nielsen's Python implementation found in * @see github.com/mnielsen/neural-networks-and-deep-learning/blob/master/src/network2.py * @see github.com/MichalDanielDobrzanski/DeepLearningPython35/blob/master/network2.py *------------------------------------------------------------------------------ * @param x the m-by-nx input matrix (training data consisting of m input vectors) * @param y the m-by-ny output matrix (training data consisting of m output vectors) * @param nh the number of nodes in each hidden layer, e.g., Array (5, 10) means 2 hidden with sizes 5 and 10 * @param eta_ the learning/convergence rate (typically less than 1.0) * @param maxEpochs the maximum number of training epochs/iterations * @param bSize the mini-batch size * @param lambda the regularization parameter * @param actfV the array of activation function (mapping vector => vector) between every pair of layers * @param actfDM the array of derivative of the matrix activation functions */ class NeuralNet_XL (x: MatriD, y: MatriD, private var nh: Array [Int] = null, eta_ : Double = hp ("eta"), bSize: Int = hp ("bSize").toInt, maxEpochs: Int = hp ("maxEpochs").toInt, private var lambda: Double = 0.0, actfV: Array [FunctionV_2V] = Array (sigmoidV, sigmoidV), actfDM: Array [FunctionM_2M] = Array (sigmoidDM, sigmoidDM)) extends NeuralNet (x, y, eta_) // sets eta in parent class { private val DEBUG = false // debug flag private val permGen = PermutedVecI (VectorI.range(0, m), ranStream) // Guidelines for setting the number of nodes in hidden layer, e.g., // 2 nx + 1, nx + 1, (nx + ny) / 2, sqrt (nx ny) if (nh == null) nh = Array (nx + 1) if (actfV.length != actfDM.length || actfV.length != nh.length + 1) { flaw ("NeuralNet_XL Constructor", "Dimension mismatch among number of layers or activation functions") } // if private val sizes = nx +: nh :+ ny // sizes of all layers private val nl = sizes.length // number of layers private val nl1 = nl - 1 // number of layers - 1 private var ww: IndexedSeq [MatriD] = null // weight matrices for all layers private var bi: IndexedSeq [VectoD] = null // bias vectors for all layers private val actfM = for (f <- actfV) yield matrixize (f) // matrixize activation functions println (s"Create a NeuralNet_XL with $nx input, ${nh.deep} hidden and $ny output nodes") //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Return the weight matrices. */ def weights: Array [MatriD] = ww.toArray //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Return the bias vectors. */ def biases: Array [VectoD] = bi.toArray //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Set the initial weight matrices 'aa' and 'bi' with values in (0, limit) before * training. * @param stream the random number stream to use * @param limit the maximum value for any weight */ def setWeights (stream: Int = 0, limit: Double = 1.0 / sqrt (nx)) { val normal = Normal (0.0, 1.0, stream) bi = for (i <- 1 until nl) yield VectorD (for (j <- 0 until sizes(i)) yield normal.gen) ww = for (i <- 0 until nl1) yield MatrixD (for (j <- 0 until sizes(i)) yield VectorD (for (k <- 0 until sizes(i+1)) yield normal.gen/sqrt(sizes(i))), false) if (DEBUG) println (s"setWeights: weights = $ww \n biases = $bi") } // setWeights //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Reset the learning rate 'eta', batch size 'bSize', and the regularization parameter 'lambda'. * @param eta_ the learning rate * @param bSize_ the batch size * @param lambda_ the regularization parameter * def reset (eta_ : Double = 0.0, bSize_ : Int = 0, lambda_ : Double = -1.0) { if (eta_ > 0.0) eta = eta_ if (bSize_ > 0) bSize = bSize_ if (lambda_ >= 0.0) lambda = lambda_ ww = null bi = null } // reset */ //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Given training data 'x' and 'y', fit the parameter/weight matrices 'aa' and 'bi'. * Iterate over several epochs, where each epoch divides the training set into * 'nbat' batches. Each batch is used to update the weights. */ def train (): NeuralNet_XL = { if (ww == null || bi == null) setWeights () // initialize parameters/weights val nBat = m / bSize // the number of batches if (DEBUG) println (s"train: bSize = $bSize, nBat = $nBat") var sse0 = Double.MaxValue // hold prior value of sse var up = 0 // counter for number of times moving up for (epoch <- 1 to maxEpochs) { // iterate over each epoch var sse = 0.0 // hold sum of squared errors val batches = permGen.igen.split (nBat) // permute indices and split into nBat batches for (ib <- batches) { // iterate over each batch sse += updateWeights (x(ib), y(ib)) // update weight matrices aa and bi } // for if (DEBUG) println (s"weights for $epoch th epoch: sse = $sse") if (sse > sse0) up += 1 else up = 0 if (up > 4) { println (s"ending epoch = $epoch"); return this } // return early if moving up for too long sse0 = sse // save prior sse } // for if (DEBUG) println (s"maxEpochs = $maxEpochs") if (DEBUG) println (s"train: weights = $ww \n biases = $bi") this } // train //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Update the parameter/weight matrices 'aa' and 'bi' based on the current batch. * Take a step in the direction opposite to the gradient. * @param x the input matrix for the current batch * @param y the output matrix for the current batch */ private def updateWeights (x: MatriD, y: MatriD): Double = { val as = Array.ofDim [MatriD](nl) // array to store all the activations, layer by layer as(0) = x // initial activation, which is the input matrix for (i <- 0 until nl1) as(i+1) = actfM(i)(as(i) * ww(i) + bi(i)) // feedforward and store all activations val yp = as.last // predicted value of y val ee = yp - y // -E where E is the error matrix val δs = Array.ofDim [MatriD](nl1) // array to store all δ's δs(nl1-1) = actfDM.last(yp) ** ee // δ for the last layer for (l <- 2 until nl) δs(nl1-l) = actfDM(nl1-l)(as(nl-l)) ** (δs(nl-l) * ww(nl-l).t) // δ's for all previous hidden layers val eta_o_sz = eta / x.dim1 // learning rate divided by the size of this mini-batch for (i <- 0 until nl1) { ww(i) *= 1.0 - eta * (lambda / m) // regularization factor, weight decay ww(i) -= as(i).t * δs(i) * eta_o_sz // update weights bi(i) -= δs(i).mean * eta // update biases } // for ee.normF ~^ 2 // return see for this batch } // updateWeights //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Given a new input vector 'v', predict the output/response vector 'f(v)'. * @param v the new input vector */ def predictV (v: VectoD): VectoD = { var a = v for (i <- 0 until nl1) a = actfV(i)((ww(i) dot a) + bi(i)) a } // predictV //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Given an input matrix 'x', predict the output/response matrix 'f(x)'. * @param x the input matrix */ def predict (x: MatriD): MatriD = { var a = x for (i <- 0 until nl1) a = actfM(i)(a * ww(i) + bi(i)) a } // predict //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Perform 'k'-fold cross-validation. * @param k the number of folds * @param rando whether to use randomized cross-validation */ def crossVal (k: Int = 10, rando: Boolean = true) { crossValidate ((x: MatriD, y: MatriD) => new NeuralNet_XL (x, y), k, rando) } // crossVal } // NeuralNet_XL class //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `NeuralNet_XLTest` object is used to test the `NeuralNet_XL` class. * @see www4.rgu.ac.uk/files/chapter3%20-%20bp.pdf * > runMain scalation.analytics.NeuralNet_XLTest */ object NeuralNet_XLTest extends App { val s = 0 // random number stream to use val x = new MatrixD ((3, 3), 1.0, 0.35, 0.9, // training data - input matrix (m vectors) 1.0, 0.20, 0.7, 1.0, 0.40, 0.95) val y = new MatrixD ((3, 2), 0.5, 0.4, // training data - output matrix (m vectors) 0.3, 0.3, 0.6, 0.5) println ("input matrix x = " + x) println ("output matrix y = " + y) val nn = new NeuralNet_XL (x, y, Array (3), bSize = 1) // create a NeuralNet_XL banner ("NeuralNet_XLTest: Set the parameter matrix bb randomly") nn.setWeights (s) // set weights randomly println ("weights = " + nn.weights) nn.eval () nn.fitMap () var yp = nn.predict (x) // predicted output values println ("target output: y = " + y) println ("predicted output: yp = " + yp) // for (eta <- 0.5 to 10.0 by 0.5) { for (i <- 1 to 20) { val eta = i * 0.5 banner (s"NeuralNet_XLTest: Fit the parameter matrix bi using optimization with learning rate $eta") nn.reset (eta) nn.train ().eval () // fit the weights using training data println ("bb = " + nn.weights.deep) nn.fitMap () //yp = nn.predict (x) // predicted output values println ("target output: y = " + y) //println ("predicted output: yp = " + yp) println ("yp = " + nn.predict (x(0))) // predicted output values for row 0 } // for banner ("NeuralNet_XLTest: Compare with Linear Regression - first column of y") val y0 = y.col(0) // use first column of matrix y val rg0 = new Regression (x, y0) // create a Regression model rg0.train ().eval () println ("b = " + rg0.parameter) println ("fitMap = " + rg0.fitMap) val y0p = rg0.predict (x) // predicted output value println ("target output: y0 = " + y0) println ("predicted output: y0p = " + y0p) banner ("NeuralNet_XLTest: Compare with Linear Regression - second column of y") val y1 = y.col(1) // use second column of matrix y val rg1 = new Regression (x, y1) // create a Regression model rg1.train ().eval () println ("b = " + rg1.parameter) println ("fitMap = " + rg1.fitMap) val y1p = rg1.predict (x) // predicted output value println ("target output: y1 = " + y1) println ("predicted output: y1p = " + y1p) } // NeuralNet_XLTest object