//:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** @author John Miller * @version 1.4 * @date Fri Mar 16 15:13:38 EDT 2018 * @see LICENSE (MIT style license file). * * @see hebb.mit.edu/courses/9.641/2002/lectures/lecture03.pdf */ package scalation.analytics import scala.math.sqrt import scalation.linalgebra.{MatriD, VectoD, VectorD} import scalation.stat.Statistic import scalation.util.Error //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `NeuralNet` abstract class provides the basic structure and API for * a variety of Neural Networks. * @param x the m-by-nx input matrix (training data consisting of m input vectors) * @param y the m-by-ny output matrix (training data consisting of m output vectors) * @param eta the learning/convergence rate (typically less than 1.0) */ abstract class NeuralNet (x: MatriD, y: MatriD, private var eta: Double = 0.1) extends Predictor with Error { private val DEBUG = true protected val m = x.dim1 // number of data points (input vectors) protected val nx = x.dim2 // dimensionality of the input protected val ny = y.dim2 // dimensionality of the output protected val _1 = VectorD.one (m) // vector of all ones if (y.dim1 != m) flaw ("constructor", "row dimensions of x and y are incompatible") protected val fitA = Array.ofDim [Fit] (ny) for (k <- fitA.indices) fitA(k) = new Fit (y.col(k), nx, (nx-1, m - nx)) //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Return the weight matrix. */ def weights: Array [MatriD] //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Set the initial weight matrix (ces) with values in (0, limit) before training. * @param stream the random number stream to use * @param limit the maximum value for any weight */ def setWeights (stream: Int = 0, limit: Double = 1.0 / sqrt (nx)) //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Reset the learning rate 'eta'. * @param eta_ the learning rate */ def reset (eta_ : Double) { eta = eta_ } //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Given training data 'x' and 'y', fit the parameter/weight matrix. */ def train (): NeuralNet //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Given training data 'x' and 'yy', fit the parameter/weight matrix. * @param yy the vector of outputs for the first variable (currently ignored) */ def train (yy: VectoD): NeuralNet = train () //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Evaluate the quality of the fit for the parameter weight matrices on the * the entire dataset or the training dataset. */ def eval () { val yp = predict (x) // predict output/responses val e = y - yp // error matrix for (k <- e.range2) fitA(k).diagnose (e.col(k)) // compute diagonostics, per column } // eval //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Evaluate the quality of the fit for the parameter/weight matrices on the * test dataset. * @param xx the test input data matrix * @param yy the test output response matrix */ def eval (xx: MatriD, yy: MatriD) { val yp = predict (xx) // predict output/responses val e = yy - yp // error matrix for (k <- e.range2) fitA(k).diagnose (e.col(k)) // compute diagonostics, per column } // eval //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Return the labels for the quality of fit measures. */ def fitLabel: Seq [String] = fitA(0).fitLabel //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Show 'fitMap' for each y-column. */ def fitMap () { var sst, sse = 0.0 for (k <- fitA.indices) { val fit_k = fitA(k).fitMap println (s"For column $k: fitMap = \n $fit_k") sst += fit_k ("sst").toDouble sse += fit_k ("sse").toDouble } // for val rSq = (sst - sse) / sst println (s"overall: rSq = $rSq, sst = $sst, sse = $sse") } // fitMap //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Given a new input vector 'z', predict the output/response value 'f(z)'. * Return only the first output variable's value. * @param z the new input vector */ def predict (z: VectoD): Double = predictV (z)(0) //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Given a new input vector 'z', predict the output/response vector 'f(z)'. * @param z the new input vector */ def predictV (z: VectoD): VectoD //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Given a new input matrix 'z', predict the output/response matrix 'f(z)'. * @param z the new input matrix */ def predict (z: MatriD): MatriD //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /* Use 'k'-fold cross-validation to compute test quality of fit measures by * dividing the dataset into a test dataset and a training dataset. * The test dataset is defined by 'tRange' and the rest of the data is training dataset". * @param x the input data matrix * @param y the output response matrix * @param algor the prediction algorithm being applied (e.g., `NeuralNet_3L`) * @param k the number of crosses and cross-validations (defaults to 10x). */ def crossValidate (algor: (MatriD, MatriD) => NeuralNet, k: Int = 10): Array [Statistic] = { val stats = Array.fill (ny * fitLabel.length) (new Statistic ()) val tSize = x.dim1 / k for (i <- 0 until k) { val tRange = Range (i * tSize, (i+1) * tSize) // row range for test dataset val x_te = x.slice (tRange) // test data matrix val y_te = y.slice (tRange) // test response vector val x_tr = x.sliceEx (tRange) // training data matrix val y_tr = y.sliceEx (tRange) // training response vector if (DEBUG) { println ("x_te = " + x_te) println ("y_te = " + y_te) println ("x_tr = " + x_tr) println ("y_tr = " + y_tr) } // if val model = algor (x_tr, y_tr) // construct next model using training dataset model.train () // train the model model.eval (x_te, y_te) // evaluate model on test dataset for (j <- 0 until ny) { val qm = model.fitA(j).fit // get quality of fit measures for (q <- qm.indices) stats(q).tally (qm(q)) // tally these measures } // for } // for println ("------------------------------------------------------------") println ("stats = " + stats.deep) println ("------------------------------------------------------------") stats } // crossValidate //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The 'crossVal' abstract method must be coded in implementing classes to * call the above 'crossValidate' method. The 'algor' parameter may be * specified as a lambda function to create the prediction algorithm. * @param k the number of crosses and cross-validations (defaults to 10x). */ def crossVal (k: Int = 10) } // NeuralNet abstract class