//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** @author John Miller, Michael E. Cotterell * @version 1.6 * @date Sun Jan 11 19:05:20 EST 2015 * @see LICENSE (MIT style license file). * * @title Model: Exponential Regression */ // FIX: needs improved optimization: try IRWLS package scalation.analytics import scala.collection.mutable.Set import scala.math.{exp, log, sqrt} import scalation.linalgebra.{MatriD, MatrixD, VectoD, VectorD} import scalation.math.noDouble import scalation.minima.QuasiNewton import scalation.stat.Statistic import scalation.plot.Plot import Fit._ import MatrixTransform._ //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `ExpRegression` class supports exponential regression. In this case, * 'x' is multi-dimensional [1, x_1, ... x_k]. Fit the parameter vector 'b' in the * exponential regression equation *

* log (mu (x)) = b dot x = b_0 + b_1 * x_1 + ... b_k * x_k *

* @see www.stat.uni-muenchen.de/~leiten/Lehre/Material/GLM_0708/chapterGLM.pdf * @param x the data/input matrix * @param y the response/output vector * @param fname_ the feature/variable names * @param hparam the hyper-parameters (currently none) * @param nonneg whether to check that responses are nonnegative */ class ExpRegression (x: MatriD, y: VectoD, fname_ : Strings = null, hparam: HyperParameter = null, nonneg: Boolean = true) extends PredictorMat (x, y, fname_, hparam) { if (nonneg && ! y.isNonnegative) flaw ("constructor", "response vector y must be nonnegative") private val DEBUG = false // debug flag private var n_dev = -1.0 // null dev: -LL, for null model (intercept only) private var r_dev = -1.0 // residual dev: -LL, for full model private var pseudo_rSq = -1.0 // McFaffen's pseudo R-squared //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** For a given parameter vector b, compute '-2 * Log-Likelihood' (-2LL). * '-2LL' is the standard measure that follows a Chi-Square distribution. * @see www.stat.cmu.edu/~cshalizi/350/lectures/26/lecture-26.pdf * @see www.statisticalhorizons.com/wp-content/uploads/Allison.StatComp.pdf * @param b the parameters to fit */ def ll (b: VectoD): Double = { var sum = 0.0 for (i <- y.range) { val bx = b dot x(i) sum += -bx - y(i) / exp (bx) } // for -2.0 * sum } // ll //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** For a given parameter vector b, compute '-2 * Log-Likelihood' (-2LL) for * the null model (the one that does not consider the effects of x(i)). * @param b the parameters to fit */ def ll_null (b: VectoD): Double = { var sum = 0.0 for (i <- y.range) { val bx = b(0) // only use intercept sum += -bx - y(i) / exp (bx) } // for -2.0 * sum } // ll_null //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Train the predictor by fitting the parameter vector (b-vector) in the * exponential regression equation. * @param x_ the training/full data/input matrix * @param y_ the training/full response/output vector */ def train (x_ : MatriD = x, y_ : VectoD = y): ExpRegression = { // FIX - currently only works for x_ = x and y_ = y train_null () val b0 = new VectorD (x_.dim2) // use b_0 = 0 for starting guess for parameters val bfgs = new QuasiNewton (ll) // minimizer for -2LL b = bfgs.solve (b0) // find optimal solution for parameters // e = y_ / (x_ * b) // residual/error vector e e = y_ - (x_ * b).map (exp _) // residual/error vector e this } // train //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** For the null model, train the classifier by fitting the parameter vector * (b-vector) in the logistic regression equation using maximum likelihood. * Do this by minimizing -2l. */ def train_null (): Unit = { val b0 = new VectorD (x.dim2) // use b0 = 0 for starting guess for parameters val bfgs = new QuasiNewton (ll_null) // minimizer for -2l val b_n = bfgs.solve (b0) // find optimal solution for parameters n_dev = ll_null (b_n) // measure of fitness for null model } // train_null //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Predict the value of 'y = f(z)' by evaluating the formula 'y = exp (b dot z)', * e.g., 'exp (b_0, b_1, b_2) dot (1, z_1, z_2)'. * @param z the new vector to predict */ override def predict (z: VectoD): Double = exp (b dot z) //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Build a sub-model that is restricted to the given columns of the data matrix. * @param x_cols the columns that the new model is restricted to */ def buildModel (x_cols: MatriD): ExpRegression = { new ExpRegression (x_cols, y, null, hparam, nonneg) } // buildModel } // ExpRegression class //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `ExpRegression` companion object provides factory apply functions and a testing method. */ object ExpRegression extends ModelFactory { val drp = (null, null, true) // default remaining parameters //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Create an `ExpRegression` object from a combined data-response matrix. * The last column is assumed to be the response column. * @param xy the combined data-response matrix (predictors and response) * @param fname the feature/variable names * @param hparam the hyper-parameters * @param nonneg whether to check that responses are nonnegative */ def apply (xy: MatriD, fname: Strings = null, hparam: HyperParameter = null, nonneg: Boolean = true): ExpRegression = { val n = xy.dim2 if (n < 2) { flaw ("apply", s"dim2 = $n of the 'xy' matrix must be at least 2") null } else { val (x, y) = pullResponse (xy) new ExpRegression (x, y, fname, hparam, nonneg) } // if } // apply //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Create an `ExpRegression` object from a data matrix and a response vector. * This factory function provides data rescaling. * @see `ModelFactory` * @param x the data/input m-by-n matrix * (augment with a first column of ones to include intercept in model) * @param y the response/output m-vector * @param fname the feature/variable names (use null for default) * @param hparam the hyper-parameters (use null for default) * @param nonneg whether to check that responses are nonnegative */ def apply (x: MatriD, y: VectoD, fname: Strings, hparam: HyperParameter, nonneg: Boolean): ExpRegression = { val n = x.dim2 if (n < 1) { flaw ("apply", s"dim2 = $n of the 'x' matrix must be at least 1") null } else if (rescale) { // normalize the x matrix val (mu_x, sig_x) = (x.mean, stddev (x)) val xn = normalize (x, (mu_x, sig_x)) new ExpRegression (xn, y, fname, hparam, nonneg) } else { new ExpRegression (x, y, fname, hparam, nonneg) } // if } // apply } // ExpRegression object //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `ExpRegressionTest` object tests `ExpRegression` class using the following * exponential regression problem. * > runMain scalation.analytics.ExpRegressionTest */ object ExpRegressionTest extends App { val x = new MatrixD ((5, 3), 1.0, 36.0, 66.0, // 5-by-3 matrix 1.0, 37.0, 68.0, 1.0, 47.0, 64.0, 1.0, 32.0, 53.0, 1.0, 1.0, 101.0) val y = VectorD (745.0, 895.0, 442.0, 440.0, 1598.0) val z = VectorD (1.0, 20.0, 80.0) println ("x = " + x) val erg = new ExpRegression (x, y) erg.train ().eval () println (erg.report) val yp = erg.predict (x) println ("y = " + y) println ("yp = " + yp) val t = VectorD.range (0, y.dim) new Plot (t, y, yp, "ExpRegressionTest") val yp2 = erg.predict (z) println (s"predict ($z) = $yp2") } // ExpRegressionTest object //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `ExpRegressionTest2` object has a basic test for the `ExpRegression` class. * > runMain scalation.analytics.ExpRegressionTest */ object ExpRegressionTest2 extends App { import scalation.random.{Uniform, Exponential, Random} //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Test `ExpRegression` by simulating 'n'-many observations. * @param n number of observations * @param k number of variables * @return (actual, estimated, r^2) */ def test (n: Int = 10000, k: Int = 5): (Int, Int, VectorD, VectoD, Double) = { val u = new Uniform (0, 1) // uniform random val e = new Exponential (1) // exponential error val r = new Random () val x = new MatrixD (n, k) // data matrix val y = new VectorD (x.dim1) // response vector val b = new VectorD (k) // known coefficients for (i <- b.range) b(i) = 1 + r.gen * 6 for (i <- x.range1; j <- x.range2) x(i, j) = u.gen for (i <- y.range) y(i) = exp (x(i) dot b) * e.gen val erg = new ExpRegression (x, y) erg.train ().eval () (n, k, b, erg.parameter, erg.fit(0)) } // test val tests = Array.ofDim [(Int, Int, VectorD, VectoD, Double)] (10) for (k <- tests.indices) tests(k) = test (1000, k + 1) tests.foreach { case (n, k, actual, fit, rSquared) => { actual.setFormat ("%.4f, ") fit.setFormat ("%.4f, ") println ("nobs = %d, regressors = %d, R^2 = %f\nactual = %s\nfir = %s\n".format(n, k, rSquared, actual, fit)) } // case } // foreach } // ExpRegressionTest2