//:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** @author John Miller * @version 2.0 * @date Mon Sep 9 13:30:41 EDT 2013 * @see LICENSE (MIT style license file). * * @note Model: Perceptron (single output 2-layer Neural-Network) * * @see hebb.mit.edu/courses/9.641/2002/lectures/lecture03.pdf */ package scalation package modeling import scala.runtime.ScalaRunTime.stringOf import scalation.mathstat._ import ActivationFun._ import Initializer._ //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `Perceptron` class supports single-output, 2-layer (input and output) * Neural-Networks. Although perceptrons are typically used for classification, * this class is used for prediction. Given several input vectors and output * values (training data), fit the weights/parameters b connecting the layers, * so that for a new input vector z, the net can predict the output value, i.e., * z = f (b dot z) * The parameter vector b (w) gives the weights between input and output layers. * Note, b0 is treated as the bias, so x0 must be 1.0. * @param x the data/input m-by-n matrix (data consisting of m input vectors) * @param y the response/output m-vector (data consisting of m output values) * @param fname_ the feature/variable names (defaults to null) * @param hparam the hyper-parameters for the model/network (defaults to Perceptron.hp) * @param f the activation function family for layers 1->2 (input to output) * @param itran the inverse transformation function returns responses to original scale */ class Perceptron (x: MatrixD, y: VectorD, fname_ : Array [String] = null, hparam: HyperParameter = Perceptron.hp, f: AFF = f_sigmoid, val itran: FunctionV2V = null) extends Predictor (x, y, fname_, hparam) with Fit (dfm = x.dim2 - 1, df = x.dim - x.dim2) with MonitorLoss: private val debug = debugf ("Perceptron", false) // debug function private val flaw = flawf ("Perceptron") // flaw function private val (m, n) = x.dims // input data matrix dimensions private val η = hparam ("eta").toDouble // the learning/convergence rate (requires adjustment) private val maxEpochs = hparam ("maxEpochs").toInt // the maximum number of training epcochs/iterations modelName = "Perceptron_" + f.name if y.dim != m then flaw ("init", "dimensions of x and y are incompatible") println (s"Create a Perceptron with $n input nodes and 1 output node") //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Set the initial parameter/weight vector b manually before training. * This is mainly for testing purposes. * @param w0 the initial weights for parameter b */ def setWeights (w0: VectorD): Unit = b = w0 //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Given training data x_ and y_, fit the parameter/weight vector b. * Minimize the error in the prediction by adjusting the weight vector b. * The error e is simply the difference between the target value y_ and the * predicted value yp. Minimize the dot product of error with itself using * gradient-descent (move in the opposite direction of the gradient). * Iterate over several epochs (no batching). * @param x_ the training/full data/input matrix * @param y_ the training/full response/output vector */ def train (x_ : MatrixD = x, y_ : VectorD = y): Unit = println (s"train: eta η = $η") if b == null then b = weightVec (n) // initialize parameters/weights var sse0 = Double.MaxValue var (go, epoch) = (true, 1) cfor (go && epoch <= maxEpochs, epoch += 1) { // epoch learning phase val yp = f.f_ (x_ * b) // predicted output vector yp = f(Xb) e = y_ - yp // error vector for y (protected var from `Predictor) val δ = -f.d (yp) * e // delta vector for y (protected var from `Predictor) b -= x_.Ƭ * δ * η // update the parameters/weights (Ƭ for transpose) val sse = (y_ - f.f_ (x_ * b)).normSq // recompute sum of squared errors collectLoss (sse) // collect loss per epoch debug ("train", s"parameters for $epoch th epoch: b = $b, sse = $sse") if sse >= sse0 then go = false // stop when sse increases else sse0 = sse // save prior sse } // cfor end train //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Test a predictive model y_ = f(x_) + e and return its QoF vector. * Testing may be be in-sample (on the training set) or out-of-sample * (on the testing set) as determined by the parameters passed in. * Note: must call train before test. * @param x_ the testing/full data/input matrix (defaults to full x) * @param y_ the testing/full response/output vector (defaults to full y) */ def test (x_ : MatrixD = x, y_ : VectorD = y): (VectorD, VectorD) = val yp = predict (x_) // make predictions val yy = if itran == null then y_ else itran (y_) // undo scaling, if used e = yy - yp // RECORD the residuals/errors (@see `Predictor`) (yp, diagnose (yy, yp)) // return predictions and QoF vector end test //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Train and test the predictive model y_ = f(x_) + e and report its QoF * and plot its predictions. * FIX - currently must override if y is transformed, @see `Predictor` * @param x_ the training/full data/input matrix (defaults to full x) * @param y_ the training/full response/output vector (defaults to full y) * @param xx the testing/full data/input matrix (defaults to full x) * @param yy the testing/full response/output vector (defaults to full y) */ override def trainNtest (x_ : MatrixD = x, y_ : VectorD = getY) (xx: MatrixD = x, yy: VectorD = y): (VectorD, VectorD) = train (x_, y_) debug ("trainNTest", s"b = $b") val (yp, qof) = test (xx, yy) println (report (qof)) if DO_PLOT then val yy_ = if itran == null then yy else itran (yy) // undo scaling, if used val (ryy, ryp) = orderByY (yy_, yp) // order by yy new Plot (null, ryy, ryp, s"$modelName: y actual, predicted") end if (yp, qof) end trainNtest //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Given a new input vector z, predict the output/response value f(z). * @param z the new input vector */ override def predict (z: VectorD): Double = val yp = f.f (b dot z) // scaled prediction if itran == null then yp else itran (VectorD (yp))(0) // back to original scale end predict //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Given a new input matrix z, predict the output/response value f(z). * @param z the new input matrix */ override def predict (z: MatrixD = x): VectorD = val yp = f.f_ (z * b) // scaled predictions if itran == null then yp else itran (yp) // back to original scale end predict //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Build a sub-model that is restricted to the given columns of the data matrix. * @param x_cols the columns that the new model is restricted to */ override def buildModel (x_cols: MatrixD): Perceptron = new Perceptron (x_cols, y, null, hparam, f, itran) end buildModel end Perceptron //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `Perceptron` companion object provides factory methods for creating perceptrons. */ object Perceptron extends Scaling: /** hyper-parameters for tuning the optimization algorithms - user tuning */ val hp = new HyperParameter hp += ("eta", 0.1, 0.1) // learning/convergence rate hp += ("maxEpochs", 400, 400) // maximum number of epochs/iterations private val debug = debugf ("Perceptron", false) // debug function //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Create a `Perceptron` with automatic rescaling from a combined data matrix. * @param xy the combined data/input and response/output matrix * @param fname the feature/variable names (defaults to null) * @param hparam the hyper-parameters (defaults to hp) * @param f the activation function family for layers 1->2 (input to output) * @param col the designated response column (defaults to the last column) */ def apply (xy: MatrixD, fname: Array [String] = null, hparam: HyperParameter = hp, f: AFF = f_sigmoid) (col: Int = xy.dim2 - 1): Perceptron = var itran: FunctionV2V = null // inverse transform -> original scale val (x, y) = (xy.not(?, col), xy(?, col)) // column col is the response val x_s = if scale then rescaleX (x, f) else x val y_s = if f.bounds != null then { val y_i = rescaleY (y, f); itran = y_i._2; y_i._1 } else y debug ("apply", s" scaled: x = $x_s \n scaled y = $y_s") new Perceptron (x_s, y_s, fname, hparam, f, itran) end apply //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Create a `Perceptron` with automatic rescaling from a data matrix and response vector. * @param x the data/input matrix * @param y the response/output vector * @param fname the feature/variable names (defaults to null) * @param hparam the hyper-parameters (defaults to hp) * @param f the activation function family for layers 1->2 (input to output) */ def rescale (x: MatrixD, y: VectorD, fname: Array [String] = null, hparam: HyperParameter = hp, f: AFF = f_sigmoid): Perceptron = var itran: FunctionV2V = null // inverse transform -> original scale val x_s = if scale then rescaleX (x, f) else x val y_s = if f.bounds != null then { val y_i = rescaleY (y, f); itran = y_i._2; y_i._1 } else y debug ("rescale", s"scaled: x = $x_s \n scaled y = $y_s") new Perceptron (x_s, y_s, fname, hparam, f, itran) end rescale end Perceptron import Perceptron.hp //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `perceptronTest` object trains a perceptron on a small dataset with variables * x1 and x2. The model equation is the following: * y = sigmoid (b dot x) = sigmoid (b0 + b1*x1 + b2*x2) * Does not call the train method; improvements steps for sigmoid are explicitly in code below. * > runMain scalation.modeling.perceptronTest */ @main def perceptronTest (): Unit = /* // 9 data points: Constant x1 x2 y val xy = MatrixD ((9, 4), 1.0, 1.0, 1.0, 0.04, // dataset 1 1.0, 2.0, 1.0, 0.05, 1.0, 3.0, 1.0, 0.06, 1.0, 1.0, 2.0, 0.10, 1.0, 2.0, 2.0, 0.11, 1.0, 3.0, 2.0, 0.12, 1.0, 1.0, 3.0, 0.20, 1.0, 2.0, 3.0, 0.21, 1.0, 3.0, 3.0, 0.22) val b = VectorD ( 4.0, 0.58, 4.0) // initial weights/parameters // val b = VectorD (-5.0, -0.5, 1.5) // initial weights/parameters, better */ // 9 data points: Constant x1 x2 y val xy = MatrixD ((9, 4), 1.0, 0.0, 0.0, 0.5, // dataset 2 1.0, 0.0, 0.5, 0.3, 1.0, 0.0, 1.0, 0.2, 1.0, 0.5, 0.0, 0.8, 1.0, 0.5, 0.5, 0.5, 1.0, 0.5, 1.0, 0.3, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.5, 0.8, 1.0, 1.0, 1.0, 0.5) val b = VectorD (0.1, 0.2, 0.1) // initial weights/parameters // val b = VectorD (0.1, 0.1, 0.1) // initial weights/parameters val _1 = VectorD.one (xy.dim) // vector of all ones println (s"xy = $xy") val (x, y) = (xy.not(?, 3), xy(?, 3)) // input matrix, output/response vector val sst = (y - y.mean).normSq // sum of squares total println (s"sst = $sst") val eta = 0.5 hp("eta") = eta // try several values for eta val nn = new Perceptron (x, y, null, hp, f_reLU) // create a perceptron, user control // val nn = new Perceptron (x, y, null, hp) // create a perceptron, user control // val nn = Perceptron (xy, null, hp) // create a perceptron, automatic scaling banner ("initialize") nn.setWeights (b) // set the parameters/weights for epoch <- 1 to 2 do banner (s"improvement step $epoch") val u = x * b // pre-activation value val yp = nn.predict () // predicted response from nn // val yp2 = sigmoid_ (u) // predicted response from calculation for sigmoid val yp2 = reLU_ (u) // predicted response from calculation for reLU assert (yp == yp2) val e = y - yp // error val fp = yp * (_1 - yp) // derivative (f') for sigmoid // val fp = u.map (z => is_ (z >= 0.0)) // derivative (f') for reLU val d = - e * fp // delta val g = x.transpose * d // gradient val bup = g * eta // parameter update b -= bup // new parameter vector val sse = e dot e // sum of squared errors println (s"b = $b") println (s"u = $u") println (s"y = $y") println (s"yp = $yp") println (s"yp2 = $yp2") println (s"e = $e") println (s"fp = $fp") println (s"d = $d") println (s"g = $g") println (s"bup = $bup") println (s"b = $b") println (s"sse = $sse") println (s"R^2 = ${1 - sse/sst}") nn.setWeights (b) end for end perceptronTest //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `perceptronTest2` object trains a perceptron on a small dataset of * temperatures from counties in Texas where the variables/factors to consider * are Latitude (x1), Elevation (x2) and Longitude (x3). The model equation * is the following: * y = sigmoid (w dot x) = sigmoid (w0 + w1*x1 + w2*x2 + w3*x3) * This test case illustrates how to transform the columns of the matrix * so that the sigmoid activation function can work effectively. * > runMain scalation.modeling.perceptronTest2 */ @main def perceptronTest2 (): Unit = val fname = Array ("one", "Lat", "Elev", "Long") // 16 data points: one x1 x2 x3 y // Lat Elev Long Temp County val xy = MatrixD ((16, 5), 1.0, 29.767, 41.0, 95.367, 56.0, // Harris 1.0, 32.850, 440.0, 96.850, 48.0, // Dallas 1.0, 26.933, 25.0, 97.800, 60.0, // Kennedy 1.0, 31.950, 2851.0, 102.183, 46.0, // Midland 1.0, 34.800, 3840.0, 102.467, 38.0, // Deaf Smith 1.0, 33.450, 1461.0, 99.633, 46.0, // Knox 1.0, 28.700, 815.0, 100.483, 53.0, // Maverick 1.0, 32.450, 2380.0, 100.533, 46.0, // Nolan 1.0, 31.800, 3918.0, 106.400, 44.0, // El Paso 1.0, 34.850, 2040.0, 100.217, 41.0, // Collington 1.0, 30.867, 3000.0, 102.900, 47.0, // Pecos 1.0, 36.350, 3693.0, 102.083, 36.0, // Sherman 1.0, 30.300, 597.0, 97.700, 52.0, // Travis 1.0, 26.900, 315.0, 99.283, 60.0, // Zapata 1.0, 28.450, 459.0, 99.217, 56.0, // Lasalle 1.0, 25.900, 19.0, 97.433, 62.0) // Cameron val (x, y) = (xy.not (?, 4), xy(?, 4)) println (s"x = $x") banner ("Perceptron with scaled y values") hp("eta") = 0.5 // try several values for the learning rate val mod = Perceptron.rescale (x, y, fname) // factory method automatically rescales // val mod = new Perceptron (x, y, fname) // constructor does not automatically rescale mod.trainNtest ()() // train and test the model // println (mod.summary ()) // parameter/coefficient statistics - FIX implement? banner ("scaled prediction") val yp = mod.predict () // scaled predicted output values for all x println ("target output: y = " + y) println ("predicted output: yp = " + yp) banner ("unscaled prediction") val (ymin, ymax) = (y.min, y.max) // FIX - obtain from apply val ypu = unscaleV ((ymin, ymax), (0, 1)) (yp) // unscaled predicted output values for all x println ("target output: y = " + y) println ("unscaled output: ypu = " + ypu) end perceptronTest2 import Example_AutoMPG._ //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `perceptronTest3` main function tests the `Perceptron` class using the AutoMPG * dataset. It test cross validation. * > runMain scalation.modeling.perceptronTest3 */ @main def perceptronTest3 (): Unit = // println (s"ox = $ox") // println (s"y = $y") println (s"ox_fname = ${stringOf (ox_fname)}") banner ("AutoMPG Perceptron") hp("eta") = 0.015 // try several values for the learning rate val mod = Perceptron.rescale (ox, y, ox_fname) // create model with intercept (else pass x) mod.trainNtest ()() // train and test the model mod.plotLoss ("Perceptron") // loss function vs epochs // println (mod.summary ()) // parameter/coefficient statistics banner ("AutoMPG Validation Test") mod.validate ()() banner ("AutoMPG Cross-Validation Test") val stats = mod.crossValidate () FitM.showQofStatTable (stats) end perceptronTest3 //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `perceptronTest4` main function tests the `Perceptron` class using the AutoMPG * dataset. Assumes no missing values. It tests forward selection. * > runMain scalation.modeling.perceptronTest4 */ @main def perceptronTest4 (): Unit = // println (s"ox = $ox") // println (s"y = $y") println (s"ox_fname = ${stringOf (ox_fname)}") banner ("AutoMPG Perceptron") hp("eta") = 0.01 // try several values for the learning rate val mod = Perceptron.rescale (ox, y, ox_fname) // create model with intercept (else pass x) mod.trainNtest ()() // train and test the model // println (mod.summary ()) // parameter/coefficient statistics banner ("Feature Selection Technique: Forward") val (cols, rSq) = mod.forwardSelAll () // R^2, R^2 bar, R^2 cv // val (cols, rSq) = mod.backwardElimAll () // R^2, R^2 bar, R^2 cv val k = cols.size println (s"k = $k, n = ${x.dim2}") new PlotM (null, rSq.transpose, Array ("R^2", "R^2 bar", "R^2 cv"), "R^2 vs n for Perceptron", lines = true) println (s"rSq = $rSq") end perceptronTest4 //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `perceptronTest5` main function tests the `Perceptron` class using the AutoMPG * dataset. Assumes no missing values. It tests forward, backward and stepwise selection. * > runMain scalation.modeling.perceptronTest5 */ @main def perceptronTest5 (): Unit = // println (s"ox = $ox") // println (s"y = $y") banner ("AutoMPG Perceptron") hp("eta") = 0.01 // try several values for the learning rate val mod = Perceptron.rescale (ox, y, ox_fname) // create model with intercept (else pass x) mod.trainNtest ()() // train and test the model // println (mod.summary ()) // parameter/coefficient statistics banner ("Cross-Validation") FitM.showQofStatTable (mod.crossValidate ()) println (s"ox_fname = ${stringOf (ox_fname)}") for tech <- SelectionTech.values do banner (s"Feature Selection Technique: $tech") val (cols, rSq) = mod.selectFeatures (tech) // R^2, R^2 bar, R^2 cv val k = cols.size println (s"k = $k, n = ${x.dim2}") new PlotM (null, rSq.transpose, Array ("R^2", "R^2 bar", "R^2 cv"), s"R^2 vs n for Perceptron with $tech", lines = true) println (s"$tech: rSq = $rSq") end for end perceptronTest5