//:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** @author John Miller * @version 1.5 * @date Sun Dec 23 15:53:01 EST 2018 * @see LICENSE (MIT style license file). */ package scalation.analytics import scala.math.sqrt import scalation.linalgebra.{MatriD, VectorI, VectoD, VectorD} import scalation.math.double_exp import scalation.random.PermutedVecI import scalation.random.{RandomMatD, RandomVecD} import scalation.random.RNGStream.ranStream import scalation.util.CircularQueue import ActivationFun._ //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `Optimizer` object provides functions to optimize the parameters/weights * of Neural Networks with various numbers of layers. */ object Optimizer { val hp = new HyperParameter; hp += ("eta", 0.1, 0.1) hp += ("bSize",10, 10) hp += ("maxEpochs", 10000, 10000) private val DEBUG = false // debug flag private val ADJUST_PERIOD = 100 // number of epochs before adjusting learning rate private val ADJUST_FACTOR = 1.1 // learning rate adjustment factor (1+) private val EPSILON = 1E-7 // number close to zero private val UP_LIMIT = 4 // stopping rule private val NSTEPS = 16 // steps for eta //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Deep assign matrix 'bb' to matrix 'aa' '(aa = bb)'. * @param y the actual response/output vector * @param yp the predicted response/output vector */ def assign (aa: MatriD, bb: MatriD) { for (i <- aa.range1) aa(i).set (bb(i)()) } // assign //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Compute the sum of squared errors (sse). * @param y the actual response/output vector * @param yp the predicted response/output vector */ def sseF (y: VectoD, yp: VectoD): Double = { val e = y - yp // error vector e.normSq } // sseF //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Compute the sum of squared errors (sse). * @param y the actual response/output matrix * @param yp the predicted response/output matrix */ def sseF (y: MatriD, yp: MatriD): Double = { val ee = y - yp // error matrix ee.normF ~^2 } // sseF def limitF (rows: Int): Double = 1.0 / sqrt (rows) //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Generate a random weight/parameter matrix with elements values in (0, limit). * @param rows the number of rows * @param limit the maximum value for any weight * @param stream the random number stream to use */ def weightVec (rows: Int, stream: Int = 0, limit: Double = -1.0): VectoD = { val lim = if (limit <= 0.0) limitF (rows) else limit val rvg = new RandomVecD (rows, lim, 0.0, stream = stream) // change stream to get different random numbers rvg.gen } // weightVec //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Generate a random weight/parameter matrix with elements values in (0, limit). * @param rows the number of rows * @param cols the number of columns * @param limit the maximum value for any weight * @param stream the random number stream to use */ def weightMat (rows: Int, cols: Int, stream: Int = 0, limit: Double = -1.0): MatriD = { val lim = if (limit <= 0.0) limitF (rows) else limit val rmg = new RandomMatD (rows, cols, lim, stream = stream) // change stream to get different random numbers rmg.gen } // weightMat //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Given training data 'x' and 'y' for a 2-layer, single output neural network, fit * the parameter/weight vector 'b'. Iterate over several epochs, where each epoch divides * the training set into 'nbat' batches. Each batch is used to update the weights. * @param x the m-by-nx input matrix (training data consisting of m input vectors) * @param y the m output vector (training data consisting of m output vectors) * @param b the nx parameter/weight vector for layer 1->2 (input to output) * @param eta_ the initial learning/convergence rate * @param bSize the batch size * @param maxEpochs the maximum number of training epochs/iterations * @param f1 the activation function family for layers 1->2 (input to output) */ def optimizeI (x: MatriD, y: VectoD, b: VectoD, etaI: (Double, Double), bSize: Int = hp.default ("bSize").toInt, maxEpochs: Int = hp.default ("maxEpochs").toInt, f1: AFF = f_sigmoid): (Double, Int) = { println (s"optimizeI: etaI = $etaI") var best = (Double.MaxValue, -1) var b_best: VectoD = null for (i <- 0 to NSTEPS) { val step = (etaI._2 - etaI._1) / NSTEPS // compute step size val eta = etaI._1 + i * step // current learning rate b.set (weightVec (b.dim)()) // randomly assign weights val result = optimize (x, y, b, eta, bSize, maxEpochs, f1) println (s"optimizeI: eta = $eta, result = $result") if (result._1 < best._1) { best = result b_best = b.copy // save best weights } // if } // for b.set (b_best ()) // use best weights best } // optimizeI //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Given training data 'x' and 'y' for a 2-layer, single output neural network, fit * the parameter/weight vector 'b'. Iterate over several epochs, where each epoch divides * the training set into 'nbat' batches. Each batch is used to update the weights. * @param x the m-by-nx input matrix (training data consisting of m input vectors) * @param y the m output vector (training data consisting of m output vectors) * @param b the nx parameter/weight vector for layer 1->2 (input to output) * @param eta_ the initial learning/convergence rate * @param bSize the batch size * @param maxEpochs the maximum number of training epochs/iterations * @param f1 the activation function family for layers 1->2 (input to output) */ def optimize (x: MatriD, y: VectoD, b: VectoD, eta_ : Double = hp.default ("eta"), bSize: Int = hp.default ("bSize").toInt, maxEpochs: Int = hp.default ("maxEpochs").toInt, f1: AFF = f_sigmoid): (Double, Int) = { val queue = new CircularQueue [VectoD] (UP_LIMIT) // queue for weights and sse val idx = VectorI.range (0, x.dim1) // instance index range val permGen = PermutedVecI (idx, ranStream) // permutation vector generator val nBat = x.dim1 / bSize // the number of batches var eta = eta_ // set initial learning rate var up = 0 // counter for number of times moving up var sse0 = Double.MaxValue // hold prior value of sse println (s"optimize: bSize = $bSize, nBat = $nBat, eta = $eta") for (epoch <- 1 to maxEpochs) { // iterate over each epoch val batches = permGen.igen.split (nBat) // permute indices and split into nBat batches for (ib <- batches) b -= updateWeight (x(ib), y(ib)) // iteratively update weight vector b queue += b.copy // save most recent weights val sse = sseF (y, f1.fV (x * b)) // recompute sum of squared errors if (DEBUG) println (s"optimize: weights for $epoch th epoch: sse = $sse") if (sse > sse0 + EPSILON) up += 1 else up = 0 // if (up > UP_LIMIT) return (sse, epoch) // return early if moving up for too long if (up > UP_LIMIT) return jumpBack (epoch) // return early if moving up for too long sse0 = sse // save prior sse if (epoch % ADJUST_PERIOD == 0) eta *= ADJUST_FACTOR // adjust the learning rate } // for //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /* Compute the parameter/weight vector 'b' update based on the current batch. * A step in the direction opposite to the gradient. * @param x the input matrix for the current batch * @param y the output vector for the current batch */ def updateWeight (x: MatriD, y: VectoD): VectoD = { val yp = f1.fV (x * b) // Yp = f (X b) val e = yp - y // -e where e is the error vector val dy = f1.dV (yp) * e // delta y val eta_o_sz = eta / x.dim1 // eta over the current batch size x.t * dy * eta_o_sz // return change in input-output weights } // updateWeight //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /* Jump back to the weights before the objective function starting going up. * @param ep the number of epochs */ def jumpBack (ep: Int): (Double, Int) = { val qlength = queue.size val old_b = queue.dequeue () b.set (old_b()) // restore previous weights (sseF (y, f1.fV (x * b)), ep - qlength) } // jumpBack if (DEBUG) println (s"optimize: weights b = $b") (sseF (y, f1.fV (x * b)), maxEpochs) // return sse and number of epochs } // optimize //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Given training data 'x' and 'y' for a 2-layer, single output neural network, fit * the parameter/weight vector 'b'. Iterate over several epochs, where each epoch divides * the training set into 'nbat' batches. Each batch is used to update the weights. * @param x the m-by-nx input matrix (training data consisting of m input vectors) * @param y the m output vector (training data consisting of m output vectors) * @param bb the nx-by-ny parameter/weight matrix for layer 1->2 (input to output) * @param eta_ the initial learning/convergence rate * @param bSize the batch size * @param maxEpochs the maximum number of training epochs/iterations * @param f1 the activation function family for layers 1->2 (input to output) */ def optimize2I (x: MatriD, y: MatriD, bb: MatriD, etaI: (Double, Double), bSize: Int = hp.default ("bSize").toInt, maxEpochs: Int = hp.default ("maxEpochs").toInt, f1: AFF = f_sigmoid): (Double, Int) = { println (s"optimize2I: etaI = $etaI") var best = (Double.MaxValue, -1) var bb_best: MatriD = null for (i <- 0 to NSTEPS) { val step = (etaI._2 - etaI._1) / NSTEPS // compute step size val eta = etaI._1 + i * step // current learning rate assign (bb, weightMat (bb.dim1, bb.dim2)) // randomly assign weights to bb val result = optimize2 (x, y, bb, eta, bSize, maxEpochs, f1) println (s"optimize2I: eta = $eta, result = $result") if (result._1 < best._1) { best = result bb_best = bb.copy // save best weights } // if } // for assign (bb, bb_best) // use best weights best } // optimize2I //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Given training data 'x' and 'y' for a 2-layer neural network, fit the parameter/weight * matrix 'bb'. Iterate over several epochs, where each epoch divides the training set * into 'nbat' batches. Each batch is used to update the weights. * @param x the m-by-nx input matrix (training data consisting of m input vectors) * @param y the m-by-ny output matrix (training data consisting of m output vectors) * @param bb the nx-by-ny parameter/weight matrix for layer 1->2 (input to output) * @param eta_ the initial learning/convergence rate * @param bSize the batch size * @param maxEpochs the maximum number of training epochs/iterations * @param f1 the activation function family for layers 1->2 (input to output) */ def optimize2 (x: MatriD, y: MatriD, bb: MatriD, eta_ : Double = hp.default ("eta"), bSize: Int = hp.default ("bSize").toInt, maxEpochs: Int = hp.default ("maxEpochs").toInt, f1: AFF = f_sigmoid): (Double, Int) = { val queue = new CircularQueue [MatriD] (UP_LIMIT) // weight queue val idx = VectorI.range (0, x.dim1) // instance index range val permGen = PermutedVecI (idx, ranStream) // permutation vector generator val nBat = x.dim1 / bSize // the number of batches var eta = eta_ // set initial learning rate var up = 0 // counter for number of times moving up var sse0 = Double.MaxValue // hold prior value of sse println (s"optimize2: bSize = $bSize, nBat = $nBat") for (epoch <- 1 to maxEpochs) { // iterate over each epoch val batches = permGen.igen.split (nBat) // permute indices and split into nBat batches for (ib <- batches) bb -= updateWeight (x(ib), y(ib)) // iteratively update weight matrix bb queue += bb.copy // save most recent weights val sse = sseF (y, f1.fM (x * bb)) // recompute sum of squared errors if (DEBUG) println (s"optimize2: weights for $epoch th epoch: sse = $sse") if (sse > sse0 + EPSILON) up += 1 else up = 0 // if (up > UP_LIMIT) return (sse, epoch) // return early if moving up for too long if (up > UP_LIMIT) return jumpBack (epoch) // return early if moving up for too long sse0 = sse // save prior sse if (epoch % ADJUST_PERIOD == 0) eta *= ADJUST_FACTOR // adjust the learning rate } // for //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /* Update the parameter/weight matrix 'bb' based on the current batch. * Take a step in the direction opposite to the gradient. * @param x the input matrix for the current batch * @param y the output matrix for the current batch */ def updateWeight (x: MatriD, y: MatriD): MatriD = { val yp = f1.fM (x * bb) // Yp = f (X B) val ee = yp - y // -E where E is the error matrix val dy = f1.dM (yp) ** ee // delta y val eta_o_sz = eta / x.dim1 // eta over the current batch size x.t * dy * eta_o_sz // return change in input-output weights } // updateWeight //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /* Jump back to the weights before the objective function starting going up. * @param ep the number of epochs */ def jumpBack (ep: Int): (Double, Int) = { val qlength = queue.size val old_bb = queue.dequeue () for (i <- bb.range1) bb(i) = old_bb(i) // restore previous weight (sseF (y, f1.fM (x * bb)), ep - qlength) } // jumpBack if (DEBUG) println (s"optimize2: weights bb = $bb") (sseF (y, f1.fM (x * bb)), maxEpochs) // return number of epochs } // optimize2 //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Given training data 'x' and 'y' for a 2-layer, single output neural network, fit * the parameter/weight vector 'b'. Iterate over several epochs, where each epoch divides * the training set into 'nbat' batches. Each batch is used to update the weights. * @param x the m-by-nx input matrix (training data consisting of m input vectors) * @param y the m output vector (training data consisting of m output vectors) * @param aa the nx-by-nz parameter/weight matrix for layer 1->2 (input to hidden) * @param bb the nx-by-ny parameter/weight matrix for layer 1->2 (input to output) * @param eta_ the initial learning/convergence rate * @param bSiz the batch size * @param maxEpochs the maximum number of training epochs/iterations * @param f1 the activation function family for layers 1->2 (input to output) */ def optimize3I (x: MatriD, y: MatriD, aa: MatriD, bb: MatriD, etaI: (Double, Double), bSize: Int = hp.default ("bSize").toInt, maxEpochs: Int = hp.default ("maxEpochs").toInt, f1: AFF = f_sigmoid, f2: AFF = f_sigmoid): (Double, Int) = { println (s"optimize3I: etaI = $etaI") var best = (Double.MaxValue, -1) var aa_best, bb_best: MatriD = null for (i <- 0 to NSTEPS) { val step = (etaI._2 - etaI._1) / NSTEPS // compute step size val eta = etaI._1 + i * step // current learning rate assign (aa, weightMat (aa.dim1, aa.dim2)) // randomly assign weights to aa assign (bb, weightMat (bb.dim1, bb.dim2)) // randomly assign weights to bb val result = optimize3 (x, y, aa, bb, eta, bSize, maxEpochs, f1, f2) println (s"optimize3I: eta = $eta, result = $result") if (result._1 < best._1) { best = result aa_best = aa.copy; bb_best = bb.copy // save best weights } // if } // for assign (aa, aa_best); assign (bb, bb_best) // use best weights best } // optimize3I //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Given training data 'x' and 'y' for a 3-layer neural network, fit the parameter/weight * matrices 'aa' and 'bb'. Iterate over several epochs, where each epoch divides the * training set into 'nbat' batches. Each batch is used to update the weights. * @param x the m-by-nx input matrix (training data consisting of m input vectors) * @param y the m-by-ny output matrix (training data consisting of m output vectors) * @param aa the nx-by-nz parameter/weight matrix for layer 1->2 (input to hidden) * @param bb the nz-by-ny parameter/weight matrix for layer 2->3 (hidden to output) * @param eta_ the initial learning/convergence rate * @param bSize the batch size * @param maxEpochs the maximum number of training epochs/iterations * @param f1 the activation function family for layers 1->2 (input to hidden) * @param f2 the activation function family for layers 2->3 (hidden to output) */ def optimize3 (x: MatriD, y: MatriD, aa: MatriD, bb: MatriD, eta_ : Double = hp.default ("eta"), bSize: Int = hp.default ("bSize").toInt, maxEpochs: Int = hp.default ("maxEpochs").toInt, f1: AFF = f_sigmoid, f2: AFF = f_sigmoid): (Double, Int) = { val queue = new CircularQueue [(MatriD, MatriD)] (UP_LIMIT) // weight queue val idx = VectorI.range (0, x.dim1) // instance index range val permGen = PermutedVecI (idx, ranStream) // permutation vector generator val nBat = x.dim1 / bSize // the number of batches var eta = eta_ // counter for number of times moving up var up = 0 // counter for number of times moving up var sse0 = Double.MaxValue // hold prior value of sse println (s"optimize3: bSize = $bSize, nBat = $nBat") for (epoch <- 1 to maxEpochs) { // iterate over each epoch val batches = permGen.igen.split (nBat) // permute indices and split into nBat batches for (ib <- batches) { val ab = updateWeight (x(ib), y(ib)) // iteratively update weight matrices aa and b aa -= ab._1; bb -= ab._2 } // for queue += (aa.copy, bb.copy) // save most recent weights val sse = sseF (y, f2.fM (f1.fM (x * aa) * bb)) if (DEBUG) println (s"optimize2: weights for $epoch th epoch: sse = $sse") if (sse > sse0 + EPSILON) up += 1 else up = 0 // if (up > UP_LIMIT) return (sse, epoch) // return early if moving up for too long if (up > UP_LIMIT) return jumpBack (epoch) // return early if moving up for too long sse0 = sse // save prior sse if (epoch % ADJUST_PERIOD == 0) eta *= ADJUST_FACTOR // adjust the learning rate } // for //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /* Compute the parameter/weight matrices 'aa' and 'bb' upadtes based on the current batch. * A step in the direction opposite to the gradient. * @param x the input matrix for the current batch * @param y the output matrix for the current batch */ def updateWeight (x: MatriD, y: MatriD): (MatriD, MatriD) = { var z = f1.fM (x * aa); /* z.setCol (0, _1) */ // Z = f (X A) (and opt. set hidden bias) var yp = f2.fM (z * bb) // Yp = f (Z B) var ee = yp - y // -E where E is the error matrix val dy = f2.dM (yp) ** ee // delta y val dz = f1.dM (z) ** (dy * bb.t) // delta z val eta_o_sz = eta / x.dim1 // eta over the current batch size (x.t * dz * eta_o_sz, z.t * dy * eta_o_sz) } // updateWeight //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /* Jump back to the weights before the objective function starting going up. * @param ep the number of epochs */ def jumpBack (ep: Int): (Double, Int) = { val qlength = queue.size val (old_aa, old_bb) = queue.dequeue () for (i <- aa.range1) aa(i) = old_aa(i) // restore previous weight for (i <- bb.range1) bb(i) = old_bb(i) // restore previous weight (sseF (y, f2.fM (f1.fM (x * aa) * bb)), ep - qlength) } // jumpBack if (DEBUG) println (s"optimize3: weights aa = $aa \n bb = $bb") (sseF (y, f2.fM (f1.fM (x * aa) * bb)), maxEpochs) // return sse and number of epochs } // optimize3 } // Optimizer object