//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** @author John Miller * @version 1.6 * @date Sun Jan 4 23:09:27 EST 2015 * @see LICENSE (MIT style license file). * * @title Model: ANalysis of COVAriance (ANCOVA) with 1 categorical variable */ package scalation.analytics import scala.collection.mutable.{Map, Set} import scalation.linalgebra.{MatriD, MatrixD, VectoD, VectorD, VectoI, VectorI} import scalation.util.{banner, Error} import RegTechnique._ import Variable.dummyVar //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `ANCOVA1` class supports ANalysis of COVAriance 'ANCOVA1'. It allows * the addition of a categorical treatment variable 't' into a multiple linear * regression. This is done by introducing dummy variables 'dj' to distinguish * the treatment level. The problem is again to fit the parameter vector 'b' * in the augmented regression equation *

* y = b dot x + e = b0 + b_1 * x_1 + b_2 * x_2 + ... b_k * x_k + b_k+1 * d_1 + b_k+2 * d_2 + ... b_k+l * d_l + e *

* where 'e' represents the residuals (the part not explained by the model). * Use Least-Squares (minimizing the residuals) to solve for the parameter vector 'b' * using the Normal Equations: *

* x.t * x * b = x.t * y * b = fac.solve (.) *

* 't' has categorical values/levels, e.g., treatment levels (0, ... 't.max ()') * @see see.stanford.edu/materials/lsoeldsee263/05-ls.pdf * @param x_ the data/input matrix of continuous variables * @param t the treatment/categorical variable vector * @param y the response/output vector * @param fname_ the feature/variable names * @param technique the technique used to solve for b in x.t*x*b = x.t*y */ class ANCOVA1 (x_ : MatriD, t: VectoI, y: VectoD, fname_ : Strings = null, technique: RegTechnique = QR) extends Regression (x_ ++^ Variable.dummyVars (t), y, fname_, null, technique) with ExpandableVariable { if (t.dim != y.dim) flaw ("constructor", "dimensions of t and y are incompatible") private val (shift, tmax) = Variable.get_shift_tmax // save shift and tmax //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Expand the vector 'zt' into a vector of terms/columns including dummy variables. * @param zt the vector with categorical value (at the end) to expand * @param nCat the number of categorical variable (currently locked at one) */ def expand (zt: VectoD, nCat: Int = 1): VectoD = { val (z, t) = (zt.slice (0, zt.dim - 1), zt(zt.dim - 1)) // use 1, not nCat z ++ Variable.dummyVar (t.toInt, shift, tmax) } // expand //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Given the vector 'zt', expand it and predict the response value. * @param zt the vector with categorical values (at the end) to expand */ def predict_ex (zt: VectoD): Double = predict (expand (zt)) } // ANCOVA1 class //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `ANCOVA1` companion object provides factor functions. */ object ANCOVA1 extends Error { //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Create an `ANCOVA1` model from a combined data matrix 'xt'. * @param xt the data/input matrix of continuous variables and a * treatment/categorical variable in the last column * @param y the response/output vector * @param fname the feature/variable names * @param technique the technique used to solve for b in x.t*x*b = x.t*y */ def apply (xt: MatriD, y: VectoD, fname: Strings = null, technique: RegTechnique = QR) { val (x, t) = pullResponse (xt: MatriD) new ANCOVA1 (x, t.toInt, y, fname, technique) } // apply } // ANCOVA1 object //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `ANCOVA1Test` object tests the `ANCOVA1` class using the following * regression equation. *

* y = b dot x = b_0 + b_1*x_1 + b_2*x_2 + b_3*d_1 + b_4*d_2 *

* > runMain scalation.analytics.ANCOVA1Test */ object ANCOVA1Test extends App { // 5 data points: constant term, x_1 coordinate, x_2 coordinate val x = new MatrixD ((6, 3), 1.0, 36.0, 66.0, // 6-by-3 matrix 1.0, 37.0, 68.0, 1.0, 47.0, 64.0, 1.0, 32.0, 53.0, 1.0, 42.0, 83.0, 1.0, 1.0, 101.0) val t = VectorI (0, 0, 1, 1, 2, 2) // treatments levels val y = VectorD (745.0, 895.0, 442.0, 440.0, 643.0, 1598.0) // response vector println ("x = " + x) println ("t = " + t) println ("y = " + y) banner ("ANCOVA1 Model") val anc = new ANCOVA1 (x, t, y) println ("xe = " + anc.getX) // x expanded with dummy variables anc.train ().eval () println (anc.report) banner ("Make Predictions") val z = VectorD (1.0, 20.0, 80.0, 1) // new instance with categorical value val ze = VectorD (1.0, 20.0, 80.0, 2.0, 1.0) // expanded vector assert (ze == anc.expand (z)) println (s"predict ($ze) = ${anc.predict (ze)}") println (s"predict_ex ($z) = ${anc.predict_ex (z)}") } // ANCOVA1Test object