//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** @author John Miller * @version 1.6 * @date Sun Jan 4 23:09:27 EST 2015 * @see LICENSE (MIT style license file). * * @title Model: ANalysis Of VAriance (ANOVA) with one categorial variable */ package scalation.analytics import scala.collection.mutable.Set import scalation.linalgebra.{VectoD, VectorD, VectoI, VectorI} import scalation.plot.Plot import scalation.stat.Statistic import scalation.util.{banner, Error} import RegTechnique._ //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `ANOVA1` class supports one-way ANalysis Of VAriance (ANOVA), i.e, * it allows only one binary/categorial treatment variable. It is framed using * General Linear Model 'GLM' notation and supports the use of one * binary/categorical treatment variable 't'. This is done by introducing * dummy variables 'd_j' to distinguish the treatment level. The problem is * again to fit the parameter vector 'b' in the following equation *

* y = b dot x + e = b_0 + b_1 * d_1 + b_1 * d_2 ... b_k * d_k + e *

* where 'e' represents the residuals (the part not explained by the model). * Use Least-Squares (minimizing the residuals) to solve for the parameter vector 'b' * using the Normal Equations: *

* x.t * x * b = x.t * y * b = fac.solve (.) *

* @see `ANCOVA` for models with multiple variables * @see psych.colorado.edu/~carey/Courses/PSYC5741/handouts/GLM%20Theory.pdf * @param t the treatment/categorical variable vector * @param y the response/output vector * @param fname_ the feature/variable names * @param technique the technique used to solve for b in x.t*x*b = x.t*y */ class ANOVA1 (t: VectoI, y: VectoD, fname_ : Strings = null, technique: RegTechnique = QR) extends Regression (VectorD.one (t.dim) +^: Variable.dummyVars (t), y, fname_, null, technique) with ExpandableVariable { if (t.dim != y.dim) flaw ("constructor", "dimensions of t and y are incompatible") private val (shift, tmax) = Variable.get_shift_tmax // save shift and tmax //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Expand the scalar 't' into a vector of terms/columns including dummy variables. * @param t the scalar to expand into the vector * @param nCat the number of categorical variable (currently locked at 1) */ def expand (t: VectoD, nCat: Int = 1): VectoD = { VectorD (1.0) ++ Variable.dummyVar (t(0).toInt, shift, tmax) } // expand //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Given the vector 'zt', expand it and predict the response value. * @param zt the vector with categorical values (at the end) to expand */ def predict_ex (zt: VectoD): Double = predict (expand (zt)) } // ANOVA1 class //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `ANOVA1Test` object tests the `ANOVA1` class using the following * regression equation. *

* y = b dot x = b_0 + b_1*d_1 + b_2*d_2 *

* > runMain scalation.analytics.ANOVA1Test */ object ANOVA1Test extends App { // val t = VectorI (0, 0, 0, 1, 1, 1, 2, 2, 2) // treatment level data val t = VectorI (1, 1, 1, 2, 2, 2, 3, 3, 3) // treatment level data val y = VectorD (755.0, 865.0, 815.0, 442.0, 420.0, 401.0, 282.0, 250.0, 227.0) // response vector println ("t = " + t) println ("y = " + y) banner ("ANOVA1 Model") val ano = new ANOVA1 (t, y) println ("x = " + ano.getX) ano.train ().eval () println (ano.report) banner ("Make Predictions") val z = VectorD (2) // new instance val ze = VectorD (1.0, 2.0, 1) // expanded vector assert (ze == ano.expand (z)) println (s"predict ($ze) = ${ano.predict (ze)}") println (s"predict_ex ($z) = ${ano.predict_ex (z)}") val yp = new VectorD (y.dim) for (i <- yp.range) yp(i) = ano.predict (ano.expand (VectorD (t(i)))) println (s" y = $y \n yp = $yp") new Plot (t.toDouble, y, yp, "ANOVA1", true) } // ANOVA1Test object